Source code for evalne.evaluation.edge_embeddings

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Mara Alexandru Cristian
# Contact: alexandru.mara@ugent.be
# Date: 18/12/2018

# This file provides implementations of several operators for computing node-pair embeddings from node feature vectors.

import numpy as np


[docs]def average(X, ebunch):
    """
    Computes the embedding of each node pair (u, v) in ebunch as the element-wise average of the embeddings of
    nodes u and v.

    Parameters
    ----------
    X : dict
        A dictionary of {`nodeID`: embed_vect, `nodeID`: embed_vect, ...}. Dictionary keys are expected to be of type
        string and values array_like.
    ebunch : iterable
        An iterable of node pairs (u,v) for which the embeddings must be computed.

    Returns
    -------
    emb : ndarray
        A column vector containing node-pair embeddings as rows. In the same order as ebunch.

    Notes
    -----
    Formally, if we use x(u) to denote the embedding corresponding to node u and x(v) to denote the embedding
    corresponding to node v, and if we use i to refer to the ith position in these vectors, then, the embedding of the
    pair (u, v) can be computed element-wise as: :math:`x(u, v)_i = \\frac{x(u)_i + x(v)_i}{2}`.
    Also note that all nodeID's in ebunch must exist in X, otherwise, the method will fail.

    Examples
    --------
    Simple example of function use and input parameters:

    >>> X = {'1': np.array([0, 0, 0, 0]), '2': np.array([2, 2, 2, 2]), '3': np.array([1, 1, -1, -1])}
    >>> ebunch = ((2, 1), (1, 1), (2, 2), (1, 3), (3, 1), (2, 3), (3, 2))
    >>> average(X, ebunch)
    array([[ 1. ,  1. ,  1. ,  1. ],
           [ 0. ,  0. ,  0. ,  0. ],
           [ 2. ,  2. ,  2. ,  2. ],
           [ 0.5,  0.5, -0.5, -0.5],
           [ 0.5,  0.5, -0.5, -0.5],
           [ 1.5,  1.5,  0.5,  0.5],
           [ 1.5,  1.5,  0.5,  0.5]])

    """
    edge_embeds = np.zeros((len(ebunch), len(X[list(X.keys())[0]])))
    i = 0
    for edge in ebunch:
        edge_embeds[i] = (X[str(edge[0])] + X[str(edge[1])]) / 2.0
        i += 1
    return edge_embeds


[docs]def hadamard(X, ebunch):
    """
    Computes the embedding of each node pair (u, v) in ebunch as the element-wise product between the
    embeddings of nodes u and v.

    Parameters
    ----------
    X : dict
        A dictionary of {`nodeID`: embed_vect, `nodeID`: embed_vect, ...}. Dictionary keys are expected to be of type
        string and values array_like.
    ebunch : iterable
        An iterable of node pairs (u,v) for which the embeddings must be computed.

    Returns
    -------
    emb : ndarray
        A column vector containing node-pair embeddings as rows. In the same order as ebunch.

    Notes
    -----
    Formally, if we use x(u) to denote the embedding corresponding to node u and x(v) to denote the embedding
    corresponding to node v, and if we use i to refer to the ith position in these vectors, then, the embedding of the
    pair (u, v) can be computed element-wise as: :math:`x(u, v)_i = x(u)_i * x(v)_i`.
    Also note that all nodeID's in ebunch must exist in X, otherwise, the method will fail.

    Examples
    --------
    Simple example of function use and input parameters:

    >>> X = {'1': np.array([0, 0, 0, 0]), '2': np.array([2, 2, 2, 2]), '3': np.array([1, 1, -1, -1])}
    >>> ebunch = ((2, 1), (1, 1), (2, 2), (1, 3), (3, 1), (2, 3), (3, 2))
    >>> hadamard(X, ebunch)
    array([[ 0.,  0.,  0.,  0.],
           [ 0.,  0.,  0.,  0.],
           [ 4.,  4.,  4.,  4.],
           [ 0.,  0.,  0.,  0.],
           [ 0.,  0.,  0.,  0.],
           [ 2.,  2., -2., -2.],
           [ 2.,  2., -2., -2.]])

    """
    edge_embeds = np.zeros((len(ebunch), len(X[list(X.keys())[0]])))
    i = 0
    for edge in ebunch:
        edge_embeds[i] = X[str(edge[0])] * X[str(edge[1])]
        i += 1
    return edge_embeds


[docs]def weighted_l1(X, ebunch):
    """
    Computes the embedding of each node pair (u, v) in ebunch as the element-wise weighted L1 distance between the
    embeddings of nodes u and v.

    Parameters
    ----------
    X : dict
        A dictionary of {`nodeID`: embed_vect, `nodeID`: embed_vect, ...}. Dictionary keys are expected to be of type
        string and values array_like.
    ebunch : iterable
        An iterable of node pairs (u,v) for which the embeddings must be computed.

    Returns
    -------
    emb : ndarray
        A column vector containing node-pair embeddings as rows. In the same order as ebunch.

    Notes
    -----
    Formally, if we use x(u) to denote the embedding corresponding to node u and x(v) to denote the embedding
    corresponding to node v, and if we use i to refer to the ith position in these vectors, then, the embedding of the
    pair (u, v) can be computed element-wise as: :math:`x(u, v)_i = |x(u)_i - x(v)_i|`.
    Also note that all nodeID's in ebunch must exist in X, otherwise, the method will fail.

    Examples
    --------
    Simple example of function use and input parameters:

    >>> X = {'1': np.array([0, 0, 0, 0]), '2': np.array([2, 2, 2, 2]), '3': np.array([1, 1, -1, -1])}
    >>> ebunch = ((2, 1), (1, 1), (2, 2), (1, 3), (3, 1), (2, 3), (3, 2))
    >>> weighted_l1(X, ebunch)
    array([[2., 2., 2., 2.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 3., 3.],
           [1., 1., 3., 3.]])

    """
    edge_embeds = np.zeros((len(ebunch), len(X[list(X.keys())[0]])))
    i = 0
    for edge in ebunch:
        edge_embeds[i] = np.abs(X[str(edge[0])] - X[str(edge[1])])
        i += 1
    return edge_embeds


[docs]def weighted_l2(X, ebunch):
    """
    Computes the embedding of each node pair (u, v) in ebunch as the element-wise weighted L2 distance between the
    embeddings of nodes u and v.

    Parameters
    ----------
    X : dict
        A dictionary of {`nodeID`: embed_vect, `nodeID`: embed_vect, ...}. Dictionary keys are expected to be of type
        string and values array_like.
    ebunch : iterable
        An iterable of node pairs (u,v) for which the embeddings must be computed.

    Returns
    -------
    emb : ndarray
        A column vector containing node-pair embeddings as rows. In the same order as ebunch.

    Notes
    -----
    Formally, if we use x(u) to denote the embedding corresponding to node u and x(v) to denote the embedding
    corresponding to node v, and if we use i to refer to the ith position in these vectors, then, the embedding of the
    pair (u, v) can be computed element-wise as: :math:`x(u, v)_i = (x(u)_i - x(v)_i)^2`.
    Also note that all nodeID's in ebunch must exist in X, otherwise, the method will fail.

    Examples
    --------
    Simple example of function use and input parameters:

    >>> X = {'1': np.array([0, 0, 0, 0]), '2': np.array([2, 2, 2, 2]), '3': np.array([1, 1, -1, -1])}
    >>> ebunch = ((2, 1), (1, 1), (2, 2), (1, 3), (3, 1), (2, 3), (3, 2))
    >>> weighted_l2(X, ebunch)
    array([[4., 4., 4., 4.],
           [0., 0., 0., 0.],
           [0., 0., 0., 0.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 9., 9.],
           [1., 1., 9., 9.]])

    """
    edge_embeds = np.zeros((len(ebunch), len(X[list(X.keys())[0]])))
    i = 0
    for edge in ebunch:
        edge_embeds[i] = np.power(X[str(edge[0])] - X[str(edge[1])], 2)
        i += 1
    return edge_embeds


[docs]def compute_edge_embeddings(X, ebunch, method='hadamard'):
    """
    Computes the embedding of each node pair (u, v) in ebunch as an element-wise operation on the embeddings of the end
    nodes u and v. The operator used is determined by the `method` parameter.

    Parameters
    ----------
    X : dict
        A dictionary of {`nodeID`: embed_vect, `nodeID`: embed_vect, ...}. Dictionary keys are expected to be of type
        string and values array_like.
    ebunch : iterable
        An iterable of node pairs (u,v) for which the embeddings must be computed.
    method : string, optional
        The operator to be used for computing the node-pair embeddings. Options are: `average`, `hadamard`,
        `weighted_l1` or `weighted_l2`. Default is `hadamard`.

    Returns
    -------
    emb : ndarray
        A column vector containing node-pair embeddings as rows. In the same order as ebunch.

    Examples
    --------
    Simple example of function use and input parameters:

    >>> X = {'1': np.array([0, 0, 0, 0]), '2': np.array([2, 2, 2, 2]), '3': np.array([1, 1, -1, -1])}
    >>> ebunch = ((2, 1), (1, 1), (2, 2), (1, 3), (3, 1), (2, 3), (3, 2))
    >>> compute_edge_embeddings(X, ebunch, 'average')
    array([[ 1. ,  1. ,  1. ,  1. ],
           [ 0. ,  0. ,  0. ,  0. ],
           [ 2. ,  2. ,  2. ,  2. ],
           [ 0.5,  0.5, -0.5, -0.5],
           [ 0.5,  0.5, -0.5, -0.5],
           [ 1.5,  1.5,  0.5,  0.5],
           [ 1.5,  1.5,  0.5,  0.5]])

    """
    if method == 'hadamard':
        return hadamard(X, ebunch)
    elif method == 'average':
        return average(X, ebunch)
    elif method == 'weighted_l1':
        return weighted_l1(X, ebunch)
    elif method == 'weighted_l2':
        return weighted_l2(X, ebunch)
    else:
        raise ValueError("Unknown method!")