# Source code for dgl.sampling.negative

```"""Negative sampling APIs"""

from numpy.polynomial import polynomial

from .. import backend as F, utils
from .._ffi.function import _init_api
from ..heterograph import DGLGraph

__all__ = ["global_uniform_negative_sampling"]

def _calc_redundancy(
k_hat, num_edges, num_pairs, r=3
):  # pylint: disable=invalid-name
# pylint: disable=invalid-name
# Calculates the number of samples required based on a lower-bound
# of the expected number of negative samples, based on N draws from
# a binomial distribution.  Solves the following equation for N:
#
# k_hat = N*p_k - r * np.sqrt(N*p_k*(1-p_k))
#
# where p_k is the probability that a node pairing is a negative edge
# and r is the number of standard deviations to construct the lower bound
#
# Credits to @zjost
p_m = num_edges / num_pairs
p_k = 1 - p_m

a = p_k**2
b = -p_k * (2 * k_hat + r**2 * p_m)
c = k_hat**2

poly = polynomial.Polynomial([c, b, a])
N = poly.roots()[-1]
redundancy = N / k_hat - 1.0
return redundancy

[docs]def global_uniform_negative_sampling(
g,
num_samples,
exclude_self_loops=True,
replace=False,
etype=None,
redundancy=None,
):
"""Performs negative sampling, which generate source-destination pairs such that
edges with the given type do not exist.

Specifically, this function takes in an edge type and a number of samples.  It
returns two tensors ``src`` and ``dst``, the former in the range of ``[0, num_src)``
and the latter in the range of ``[0, num_dst)``, where ``num_src`` and ``num_dst``
represents the number of nodes with the source and destination node type respectively.
It guarantees that no edge will exist between the corresponding pairs of ``src``
with the source node type and ``dst`` with the destination node type.

.. note::

This negative sampler will try to generate as many negative samples as possible, but
it may rarely return less than :attr:`num_samples` negative samples.
This is more likely to happen when a graph is so small or dense that not many
unique negative samples exist.

Parameters
----------
g : DGLGraph
The graph.
num_samples : int
The number of desired negative samples to generate.
exclude_self_loops : bool, optional
Whether to exclude self-loops from the negative samples.  Only impacts the
edge types whose source and destination node types are the same.

Default: True.
replace : bool, optional
Whether to sample with replacement.  Setting it to True will make things
faster.  (Default: False)
etype : str or tuple of str, optional
The edge type.  Can be omitted if the graph only has one edge type.
redundancy : float, optional
Indicates how much more negative samples to actually generate during rejection sampling
before finding the unique pairs.

Increasing it will increase the likelihood of getting :attr:`num_samples` negative
samples, but will also take more time and memory.

(Default: automatically determined by the density of graph)

Returns
-------
tuple[Tensor, Tensor]
The source and destination pairs.

Examples
--------
>>> g = dgl.graph(([0, 1, 2], [1, 2, 3]))
>>> dgl.sampling.global_uniform_negative_sampling(g, 3)
(tensor([0, 1, 3]), tensor([2, 0, 2]))
"""
if etype is None:
etype = g.etypes
utype, _, vtype = g.to_canonical_etype(etype)
exclude_self_loops = exclude_self_loops and (utype == vtype)

redundancy = _calc_redundancy(
num_samples, g.num_edges(etype), g.num_nodes(utype) * g.num_nodes(vtype)
)

etype_id = g.get_etype_id(etype)
src, dst = _CAPI_DGLGlobalUniformNegativeSampling(
g._graph,
etype_id,
num_samples,
3,
exclude_self_loops,
replace,
redundancy,
)
return F.from_dgl_nd(src), F.from_dgl_nd(dst)

DGLGraph.global_uniform_negative_sampling = utils.alias_func(
global_uniform_negative_sampling
)

_init_api("dgl.sampling.negative", __name__)
```