#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Mara Alexandru Cristian
# Contact: alexandru.mara@ugent.be
# Date: 18/12/2018
# This file provides two different implementations of the katz score. The first, computes the exact score for the
# complete graph using the adjacency matrix. The second, computes the approximated Katz score for each input node-pair.
# Only undirected Graphs and Digraphs are supported.
import networkx as nx
import numpy as np
__all__ = ['Katz', 'KatzApprox']
[docs]class Katz(object):
"""
Computes the exact katz similarity based on paths between nodes in the graph. Shorter paths will contribute more
than longer ones. This contribution depends of the damping factor 'beta'. The exact katz score is computed using
the adj matrix of the full graph.
Parameters
----------
G : graph
A NetworkX graph or digraph with nodes being consecutive integers starting at 0.
beta = float, optional
The damping factor for the model. Default is 0.005.
Notes
-----
The execution is based on dense matrices, so it may run out of memory.
"""
def __init__(self, G, beta=0.005):
self._G = G
self.beta = beta
self.sim = self._fit()
def _fit(self):
# Version using sparse matrices
# adj = nx.adjacency_matrix(self._G, nodelist=range(len(self._G.nodes)))
# ident = sparse.identity(len(self._G.nodes)).tocsc()
# sim = inv(ident - adj.multiply(self.beta).T) - ident
# adj = nx.adjacency_matrix(self._G, nodelist=range(len(self._G.nodes)))
# aux = adj.multiply(-self.beta).T
# aux.setdiag(1+aux.diagonal(), k=0)
# sim = inv(aux)
# sim.setdiag(sim.diagonal()-1)
# print(sim.nnz)
# print(adj.nnz)
# Version using dense matrices
adj = nx.adjacency_matrix(self._G, nodelist=range(len(self._G.nodes)))
aux = adj.T.multiply(-self.beta).todense()
np.fill_diagonal(aux, 1+aux.diagonal())
sim = np.linalg.inv(aux)
np.fill_diagonal(sim, sim.diagonal()-1)
return sim
[docs] def predict(self, ebunch):
"""
Computes the katz score for all node-pairs in ebunch.
Parameters
----------
ebunch : iterable
An iterable of node-pairs for which to compute the katz score.
Returns
-------
ndarray
An array containing the similarity scores.
"""
ebunch = np.array(ebunch)
return np.array(self.sim[ebunch[:, 0], ebunch[:, 1]]).flatten()
[docs] def save_sim_matrix(self, filename):
"""
Stores the similarity matrix to a file with the given name.
Parameters
----------
filename : string
The name and path of the file where the similarity matrix should be stored.
"""
np.savetxt(filename, self.sim, delimiter=',', fmt='%d')
[docs] def get_params(self):
"""
Returns a dictionary of model parameters.
Returns
-------
params : dict
A dictionary of model parameters and their values.
"""
params = {'beta': self.beta}
return params
[docs]class KatzApprox(object):
"""
Computes the approximated katz similarity based on paths between nodes in the graph. Shorter paths will contribute
more than longer ones. This contribution depends of the damping factor 'beta'. The approximated score is computed
using all paths between nodes of length at most 'path_len'.
Parameters
----------
G : graph
A NetworkX graph or digraph.
beta : float, optional
The damping factor for the model. Default is 0.005.
path_len : int, optional
The maximum path length to consider between each pair of nodes. Default is 3.
Notes
-----
The implementation follows the indication in [1]. It becomes extremely slow for large dense graphs.
References
----------
.. [1] R. Laishram "Link Prediction in Dynamic Weighted and Directed Social Network using Supervised Learning"
Dissertations - ALL. 355. 2015
"""
def __init__(self, G, beta=0.005, path_len=3):
self._G = G
self.beta = beta
self.path_len = path_len
[docs] def fit_predict(self, ebunch):
"""
Computes the katz score for all node-pairs in ebunch.
Parameters
----------
ebunch : iterable
An iterable of node-pairs for which to compute the katz score.
Returns
-------
ndarray
An array containing the similarity scores.
"""
res = list()
betas = np.zeros(self.path_len)
for i in range(len(betas)):
betas[i] = np.power(self.beta, i+1)
for u, v in ebunch:
paths = np.zeros(self.path_len)
for path in nx.all_simple_paths(self._G, source=u, target=v, cutoff=self.path_len):
paths[len(path)-2] += 1 # Simple paths output at most path_len+1, plus -1 because indexing at 0
res.append(np.sum(betas * paths))
return np.array(res).reshape(-1, 1)
[docs] def get_params(self):
"""
Returns a dictionary of model parameters.
Returns
-------
params : dict
A dictionary of model parameters and their values.
"""
params = {'beta': self.beta, 'path_len': self.path_len}
return params