#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Mara Alexandru Cristian
# Contact: alexandru.mara@ugent.be
# Date: 18/12/2018
# This file contains methods and classes that simplify the management and splitting of edges in sets of train and test
# or train and validation.
# TODO v0.4.0: Change naming from train_edges/test_edges to train_data/test_data.
# TODO v0.4.0: Change naming from train_E/train_E_false to train_pos/train_neg.
import numpy as np
import networkx as nx
from abc import abstractmethod
from evalne.utils import preprocess as pp
from evalne.utils import split_train_test as stt
[docs]class BaseEvalSplit(object):
"""
Base class that provides a high level interface for managing/computing sets of train and test edges and non-edges
for LP, SP and NR tasks. The class exposes the train edges and non-edges through the `train_edges` property and
the test edges and non-edges through the `test_edges` property. Parameters used to compute these sets are also made
available.
"""
def __init__(self):
self._train_edges = None
self._test_edges = None
self._train_labels = None
self._test_labels = None
self._TG = None
# Data related statistics
self._train_frac = None
self._split_alg = None
self._nw_name = None
self._split_id = None
@property
def train_edges(self):
"""The set of training node pairs."""
return self._train_edges
@property
def test_edges(self):
"""The set of test node pairs."""
return self._test_edges
@property
def train_labels(self):
"""A list of train node-pair labels. Labels can be either 0 or 1 and denote non-edges and edges,
respectively (for SP they denote negative and positive links, respectively)."""
return self._train_labels
@property
def test_labels(self):
"""A list of test node-pair labels. Labels can be either 0 or 1 and denote non-edges and edges,
respectively (for SP they denote negative and positive links, respectively)."""
return self._test_labels
@property
def TG(self):
"""A NetworkX graph or digraph to be used for training the embedding methods. For LP this should be the graph
spanned by all train edges, for SP the graph spanned by the positive and negative train edges (with signs as
edge weights) and for NR the entire graph being evaluated."""
return self._TG
@property
def train_frac(self):
"""A float indicating the fraction of train edges out of all train and test edges."""
return self._train_frac
@property
def split_alg(self):
"""A string indicating the algorithm used to split edges in train and test sets."""
return self._split_alg
@property
def nw_name(self):
"""A string indicating the name of the dataset used to generate the sets of edges."""
return self._nw_name
@property
def split_id(self):
"""An int used as an ID for this particular train/test split."""
return self._split_id
def _set_splits(self, train_E, train_E_false=None, test_E=None, test_E_false=None, directed=False, nw_name='test',
TG=None, split_id=0, split_alg='spanning_tree', verbose=False):
"""
Sets the class attributes to the provided input values. The input train edges and non-edges as well as the
test edges and non-edges are respectively joined to form the `train_edges` and `test_edges` class attributes.
Train and test labels are also inferred from the input data.
Parameters
----------
train_E : set
Set of train edges.
train_E_false : set, optional
Set of train non-edges. Default is None.
test_E : set, optional
Set of test edges. Default is None.
test_E_false : set, optional
Set of test non-edges. Default is None.
directed : bool, optional
True if the splits correspond to a directed graph, false otherwise. Default is False.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is `test`.
TG : graph, optional
A NetworkX graph or digraph to be used for training the embedding methods. If None, the graph will be
generated from the set of train edges. Default is None.
split_id : int, optional
An ID that identifies this particular train/test split. Default is 0.
split_alg : string, optional
A string indicating the algorithm used to generate the train/test splits. Options are `spanning_tree`,
`random`, `naive`, `fast`, `timestamp` and `random_edge_sample`. Default is `spanning_tree`.
verbose : bool, optional
If True prints progress info. Default is False.
Raises
------
ValueError
If the train edge set is not provided.
"""
if len(train_E) != 0:
if train_E_false is not None:
# Stack the edges and non-edges together.
self._train_edges = np.vstack((list(train_E), list(train_E_false)))
# Create labels vector with 1s for edges and 0s for non-edges
self._train_labels = np.hstack((np.ones(len(train_E)), np.zeros(len(train_E_false))))
else:
# Stack the edges and non-edges together.
self._train_edges = np.array(list(train_E))
# Create labels vector with 1s for edges and 0s for non-edges
self._train_labels = np.ones(len(train_E))
if test_E is not None:
if test_E_false is not None:
# Stack the edges and non-edges together.
self._test_edges = np.vstack((list(test_E), list(test_E_false)))
# Create labels vector with 1s for edges and 0s for non-edges
self._test_labels = np.hstack((np.ones(len(test_E)), np.zeros(len(test_E_false))))
else:
# We only have test edges (no test non-edges)
self._test_edges = np.array(list(test_E))
# Create labels vector with 1s for edges
self._test_labels = np.ones(len(test_E))
else:
self._test_edges = []
self._test_labels = []
# Initialize the training graph
if TG is None:
if directed:
self._TG = nx.DiGraph()
else:
self._TG = nx.Graph()
self._TG.add_edges_from(train_E)
else:
self._TG = TG.copy()
# Set class attributes to new values
if test_E is not None:
self._train_frac = np.around(len(train_E) / (len(train_E) + len(test_E)), 4)
else:
self._train_frac = 1
self._split_alg = split_alg
self._split_id = split_id
self._nw_name = nw_name
else:
raise ValueError("Train edges are always required!")
# Print the process
if verbose:
print("Edge splits computed using {} alg. ready.".format(self.split_alg))
[docs] def get_parameters(self):
"""
Returns the class properties except the sets of train and test node pairs, labels and train graph.
Returns
-------
parameters : dict
The parameters used when computing this split as a dictionary of parameters and values.
"""
params = {"train_frac": self.train_frac, "split_alg": self.split_alg,
"nw_name": self._nw_name, "split_id": self.split_id}
return params
[docs] def get_data(self):
"""
Returns the sets of train and test node pairs and label vectors.
Returns
-------
train_edges : set
Set of all train edges and non-edges.
test_edges : set
Set of all test edges and non-edges.
train_labels : list
A list of labels indicating if each train node-pair is an edge or non-edge (1 or 0).
test_labels : list
A list of labels indicating if each test node-pair is an edge or non-edge (1 or 0).
"""
return self.train_edges, self.train_labels, self.test_edges, self.test_labels
[docs] def save_tr_graph(self, output_path, delimiter, write_stats=False, write_weights=False, write_dir=True):
"""
Saves the TG graph to a file.
Parameters
----------
output_path : file or string
File or filename to write. If a file is provided, it must be opened in 'wb' mode.
delimiter : string, optional
The string used to separate values. Default is ','.
write_stats : bool, optional
Adds basic graph statistics to the file as a header or not. Default is True.
write_weights : bool, optional
If True data will be stored as weighted edgelist i.e. triplets (src, dst, weight), otherwise, as regular
(src, dst) pairs. For unweighted graphs, setting this parameter to True will add weight 1 to all edges.
Default is False.
write_dir : bool, optional
This parameter is only relevant for undirected graphs. If True, it forces the method to write both edge
directions in the file i.e. (src, dst) and (dst, src). If False, only one direction is stored.
Default is True.
See also
--------
evalne.utils.preprocess.save_graph
"""
pp.save_graph(self._TG, output_path=output_path, delimiter=delimiter, write_stats=write_stats,
write_weights=write_weights, write_dir=write_dir)
[docs] def store_edgelists(self, train_path, test_path):
"""
Writes the sets of train and test node pairs to files with the specified names.
Parameters
----------
train_path : string
Indicates the path where the train data will be stored.
test_path : string
Indicates the path where the test data will be stored.
See also
--------
evalne.utils.split_train_test.store_edgelists
"""
stt.store_edgelists(train_path, test_path, self.train_edges, self.test_edges)
[docs]class NREvalSplit(BaseEvalSplit):
"""
Class that provides a high level interface for managing/computing sets of train edges and non-edges
for NR tasks. The class exposes the train edges and non-edges through the `train_edges` property. Test edges
are not used for NR and therefore the `test_edges` property will be left empty. Parameters used to compute
these sets are also made available.
Notes
-----
In network reconstruction the aim is to asses how well an embedding method captures the structure of a given graph.
The embedding methods are trained on a complete input graph. Hyperparameter tuning is performed directly on this
graph (overfitting is, in this case, expected and desired). The embeddings obtained are used to perform link
predictions and their quality is evaluated. Checking the link predictions for all node pairs is generally
unfeasible, therefore a subset of all node pairs in the input graph are selected for evaluation.
"""
def __init__(self):
self._samp_frac = None
super(NREvalSplit, self).__init__()
@property
def samp_frac(self):
"""A float indicating the fraction of node pairs out of all possible ones sampled for NR evaluation."""
return self._samp_frac
[docs] def set_splits(self, TG, train_E, train_E_false=None, samp_frac=None, directed=False, nw_name='test',
split_id=0, verbose=False):
"""
Sets the class attributes to the provided input values. The input train edges and non-edges are joined to form
the `train_edges` class attribute. Train labels are also inferred from the input data.
Parameters
----------
TG : graph
A NetworkX graph or digraph, the complete network from which train_E and train_E_false were sampled.
train_E : set
Set of train edges.
train_E_false : set, optional
Set of train non-edges. Default is None.
samp_frac : float, optional
The fraction of node-pairs out of all possible ones sampled for NR evaluation. Default is None.
directed : bool, optional
True if the splits correspond to a directed graph, false otherwise. Default is False.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is `test`.
split_id : int, optional
An ID that identifies this particular train/test split. Default is 0.
verbose : bool, optional
If True prints progress info. Default is False.
Raises
------
ValueError
If the train edge set is not provided.
"""
# Set the NR specific parameters
self._samp_frac = samp_frac
# Set the remaining parameters by calling the parent class private set method
# For NR we do not have test data, so initialize these sets to None
super(NREvalSplit, self)._set_splits(train_E=train_E, train_E_false=train_E_false, test_E=None,
test_E_false=None, directed=directed, nw_name=nw_name,
TG=TG, split_id=split_id, split_alg='random_edge_sample', verbose=verbose)
[docs] def compute_splits(self, G, nw_name='test', samp_frac=0.01, split_id=0, verbose=False):
"""
Computes sets of train edges and non-edges by randomly sampling elements from the adjacency matrix of G and
initializes the class attributes.
Parameters
----------
G : graph
A NetworkX graph or digraph to sample node pairs from.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is 'test'.
samp_frac : float, optional
The fraction of node-pairs out of all possible ones to sample for NR evaluation. Default is 0.01 (1%).
split_id : int, optional
The id to be assigned to the train/test splits generated. Default is 0.
verbose : bool, optional
If True print progress info. Default is False.
Returns
-------
train_E : set
The set of train edges.
train_false_E : set
The set of train non-edges.
Raises
------
ValueError
If the edge split algorithm is unknown.
"""
# Sample the required number of node pairs from the graph
train_E, train_E_false = stt.random_edge_sample(nx.adjacency_matrix(G, nodelist=range(len(G.nodes))),
samp_frac, nx.is_directed(G))
# Raise an error if no edges were selected while sampling matrix entries (both edges and non-edges are required)
if len(train_E) == 0:
raise ValueError("Sampling fraction {} on {} network is too low, no edges were selected.".format(samp_frac,
nw_name))
# Set class attributes to new values
self.set_splits(TG=G, train_E=train_E, train_E_false=train_E_false, samp_frac=samp_frac,
directed=nx.is_directed(G), nw_name=nw_name, split_id=split_id, verbose=verbose)
return train_E, train_E_false
[docs] def get_parameters(self):
"""
Returns the class properties except the sets of train and test node pairs, labels and train graph.
Returns
-------
parameters : dict
The parameters used when computing this split as a dictionary of parameters and values.
"""
# Get the parameters from the parent class
params = super(NREvalSplit, self).get_parameters()
# Add the LP specific parameters
params.update({"samp_frac": self._samp_frac})
return params
[docs]class SPEvalSplit(BaseEvalSplit):
"""
Class that provides a high level interface for managing/computing sets of train and test positive and negative edges
for SP tasks. The class exposes the train positive and negative edges through the `train_edges` property and
the test positive and negative edges through the `test_edges` property. Parameters used to compute these sets are
also made available.
Notes
-----
In sign prediction the aim is to predict the sign (positive or negative) of given edges. The existence of the edges
is assumed (i.e. we do not predict the sign of unconnected node pairs). Therefore, sign prediction is also a binary
classification task similar to link prediction where, instead of predicting the existence of edges or not, we
predict the signs for edges we know exist. Unlike for link prediction, in this case we do not need to perform
negative sampling, since we already have both classes (the positively and the negatively connected node pairs).
"""
def __init__(self):
super(SPEvalSplit, self).__init__()
[docs] def set_splits(self, train_E, train_E_false=None, test_E=None, test_E_false=None, directed=False, nw_name='test',
TG=None, split_id=0, split_alg='spanning_tree', verbose=False):
"""
Sets the class attributes to the provided input values. The input train positive and negative edges as well as
the test positive and negative edges are respectively joined to form the `train_edges` and `test_edges` class
attributes. Train and test labels (0 or 1 representing negative and positive edges, respectively) are also
inferred from the input data.
Parameters
----------
train_E : set
Set of positive train edges.
train_E_false : set, optional
Set of negative train edges. Default is None.
test_E : set, optional
Set of positive test edges. Default is None.
test_E_false : set, optional
Set of negative test edges. Default is None.
directed : bool, optional
True if the splits correspond to a directed graph, false otherwise. Default is False.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is `test`.
TG : graph, optional
A NetworkX graph or digraph containing all the train edges (positive and negative). If None, the graph will
be generated from the sets of positive and negative train edges. Default is None.
split_id : int, optional
An ID that identifies this particular train/test split. Default is 0.
split_alg : string, optional
A string indicating the algorithm used to generate the train/test splits. Options are `spanning_tree`,
`random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`.
verbose : bool, optional
If True prints progress info. Default is False.
Raises
------
ValueError
If the train edge set is not provided.
"""
# Initialize the training graph
if TG is None:
if directed:
TG = nx.DiGraph()
else:
TG = nx.Graph()
TG.add_edges_from(train_E)
TG.add_edges_from(train_E_false)
# Set the parameters by calling the parent class private set method
super(SPEvalSplit, self)._set_splits(train_E=train_E, train_E_false=train_E_false, test_E=test_E,
test_E_false=test_E_false, directed=directed, nw_name=nw_name,
TG=TG, split_id=split_id, split_alg=split_alg, verbose=verbose)
[docs] def compute_splits(self, G, nw_name='test', train_frac=0.51, split_alg='spanning_tree', split_id=0, verbose=False):
"""
Computes sets of train and test positive and negative edges according to the given input parameters and
initializes the class attributes.
Parameters
----------
G : graph
A NetworkX graph or digraph to compute the train test split from.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is 'test'.
train_frac : float, optional
The proportion of train edges w.r.t. the total number of edges in the input graph (range (0.0, 1.0]).
Default is 0.51.
split_alg : string, optional
A string indicating the algorithm to use for generating the train/test splits. Options are `spanning_tree`,
`random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`.
split_id : int, optional
The id to be assigned to the train/test splits generated. Default is 0.
verbose : bool, optional
If True print progress info. Default is False.
Returns
-------
train_E : set
The set of train positive edges.
train_false_E : set
The set of train negative edges.
test_E : set
The set of test positive edges.
test_false_E : set
The set of test negative edges.
Raises
------
ValueError
If the edge split algorithm is unknown.
"""
# Compute train/test split
if split_alg == 'random':
tr_E, te_E = stt.rand_split_train_test(G, train_frac)
train_E, test_E, G, mp = pp.relabel_nodes(tr_E, te_E, G.is_directed())
elif split_alg == 'naive':
train_E, test_E = stt.naive_split_train_test(G, train_frac)
elif split_alg == 'spanning_tree':
train_E, test_E = stt.split_train_test(G, train_frac)
elif split_alg == 'fast':
train_E, test_E = stt.quick_split(G, train_frac)
elif split_alg == 'timestamp':
train_E, test_E, _ = stt.timestamp_split(G, train_frac)
else:
raise ValueError('Split alg. {} unknown!'.format(split_alg))
# Make sure the edges are numpy arrays
train_E = np.array(list(train_E))
test_E = np.array(list(test_E))
# Get the labels of train and test
a = nx.adjacency_matrix(G, nodelist=range(len(G.nodes)))
tr_labels = np.ravel(a[train_E[:, 0], train_E[:, 1]])
te_labels = np.ravel(a[test_E[:, 0], test_E[:, 1]])
# Split train and test edges in those with positive and negative signs
pos_tr_e = train_E[np.where(tr_labels == 1)[0], :]
neg_tr_e = train_E[np.where(tr_labels == -1)[0], :]
pos_te_e = test_E[np.where(te_labels == 1)[0], :]
neg_te_e = test_E[np.where(te_labels == -1)[0], :]
# Make a train graph with appropriate weights +1 / -1
H = G.copy()
H.remove_edges_from(test_E)
# Set class attributes to new values
self.set_splits(train_E=pos_tr_e, train_E_false=neg_tr_e, test_E=pos_te_e, test_E_false=neg_te_e,
directed=G.is_directed(), nw_name=nw_name, TG=H, split_id=split_id,
split_alg=split_alg, verbose=verbose)
return pos_tr_e, neg_tr_e, pos_te_e, neg_te_e
[docs]class LPEvalSplit(BaseEvalSplit):
"""
Class that provides a high level interface for managing/computing sets of train and test edges and non-edges
for LP tasks. The class exposes the train edges and non-edges through the `train_edges` property and
the test edges and non-edges through the `test_edges` property. Parameters used to compute these sets are
also made available.
Notes
-----
In link prediction the aim is to predict, given a set of node pairs, if they should be connected or not. This is
generally solved as a binary classification task. For training the binary classifier, we sample a set of edges as
well as a set of unconnected node pairs. We then compute the node-pair embeddings of this training data. We use
the node-pair embeddings together with the corresponding labels (0 for non-edges and 1 for edges) to train the
classifier. Finally, the performance is evaluated on the test data (the remaining edges not used in training plus
another set of randomly selected non-edges).
"""
def __init__(self):
self._owa = None
self._fe_ratio = None
super(LPEvalSplit, self).__init__()
@property
def owa(self):
"""A bool parameter indicating if the non-edges have been generated using the OWA (otherwise CWA)."""
return self._owa
@property
def fe_ratio(self):
"""A float indicating the ratio of non-edges to edges."""
return self._fe_ratio
[docs] def set_splits(self, train_E, train_E_false=None, test_E=None, test_E_false=None, directed=False, nw_name='test',
TG=None, split_id=0, split_alg='spanning_tree', owa=True, verbose=False):
"""
Sets the class attributes to the provided input values. The input train edges and non-edges as well as the
test edges and non-edges are respectively joined to form the `train_edges` and `test_edges` class attributes.
Train and test labels are also inferred from the input data.
Parameters
----------
train_E : set
Set of train edges.
train_E_false : set, optional
Set of train non-edges. Default is None.
test_E : set, optional
Set of test edges. Default is None.
test_E_false : set, optional
Set of test non-edges. Default is None.
directed : bool, optional
True if the splits correspond to a directed graph, false otherwise. Default is False.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is `test`.
TG : graph, optional
A NetworkX graph or digraph containing all the train edges. If None, the graph will be generated from the
set of train edges. Default is None.
split_id : int, optional
An ID that identifies this particular train/test split. Default is 0.
split_alg : string, optional
A string indicating the algorithm used to generate the train/test splits. Options are `spanning_tree`,
`random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`.
owa : bool, optional
Encodes the belief that the network respects or not the open world assumption. Default is True.
If owa=True, train non-edges are sampled from the train graph only and can overlap with test edges.
If owa=False, train non-edges are sampled from the full graph and cannot overlap with test edges.
verbose : bool, optional
If True prints progress info. Default is False.
Raises
------
ValueError
If the train edge set is not provided.
"""
# Set the LP specific parameters
self._owa = owa
if train_E_false is not None:
self._fe_ratio = np.around(len(train_E_false) / len(train_E), 4)
else:
self._fe_ratio = 1
# Set the remaining parameters by calling the parent class private set method
super(LPEvalSplit, self)._set_splits(train_E=train_E, train_E_false=train_E_false, test_E=test_E,
test_E_false=test_E_false, directed=directed, nw_name=nw_name,
TG=TG, split_id=split_id, split_alg=split_alg, verbose=verbose)
[docs] def compute_splits(self, G, nw_name='test', train_frac=0.51, split_alg='spanning_tree', owa=True, fe_ratio=1,
split_id=0, verbose=False):
"""
Computes sets of train and test edges and non-edges according to the given input parameters and initializes
the class attributes.
Parameters
----------
G : graph
A NetworkX graph or digraph to compute the train test split from.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is 'test'.
train_frac : float, optional
The proportion of train edges w.r.t. the total number of edges in the input graph (range (0.0, 1.0]).
Default is 0.51.
split_alg : string, optional
A string indicating the algorithm to use for generating the train/test splits. Options are `spanning_tree`,
`random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`.
owa : bool, optional
Encodes the belief that the network should respect or not the open world assumption. Default is True.
If owa=True, train non-edges are sampled from the train graph only and can overlap with test edges.
If owa=False, train non-edges are sampled from the full graph and cannot overlap with test edges.
fe_ratio : float, optional
The ratio of non-edges to edges to sample. For fr_ratio > 0 and < 1 less non-edges than edges will be
generated. For fe_edges > 1 more non-edges than edges will be generated. Default 1, same amounts.
split_id : int, optional
The id to be assigned to the train/test splits generated. Default is 0.
verbose : bool, optional
If True print progress info. Default is False.
Returns
-------
train_E : set
The set of train edges
train_false_E : set
The set of train non-edges
test_E : set
The set of test edges
test_false_E : set
The set of test non-edges
Raises
------
ValueError
If the edge split algorithm is unknown.
"""
# Compute train/test split
if split_alg == 'random':
tr_E, te_E = stt.rand_split_train_test(G, train_frac)
train_E, test_E, G, mp = pp.relabel_nodes(tr_E, te_E, G.is_directed())
elif split_alg == 'naive':
train_E, test_E = stt.naive_split_train_test(G, train_frac)
elif split_alg == 'spanning_tree':
train_E, test_E = stt.split_train_test(G, train_frac)
elif split_alg == 'fast':
train_E, test_E = stt.quick_split(G, train_frac)
train_E_false, test_E_false = stt.quick_nonedges(G, train_frac, fe_ratio)
elif split_alg == 'timestamp':
train_E, test_E, G = stt.timestamp_split(G, train_frac)
train_E = set(zip(train_E[:, 0], train_E[:, 1]))
test_E = set(zip(test_E[:, 0], test_E[:, 1]))
else:
raise ValueError('Split alg. {} unknown!'.format(split_alg))
# Compute non-edges
if split_alg != 'fast':
num_fe_train = len(train_E) * fe_ratio
num_fe_test = len(test_E) * fe_ratio
if owa:
train_E_false, test_E_false = stt.generate_false_edges_owa(G, train_E, test_E,
num_fe_train, num_fe_test)
else:
train_E_false, test_E_false = stt.generate_false_edges_cwa(G, train_E, test_E,
num_fe_train, num_fe_test)
# Set class attributes to new values
self.set_splits(train_E, train_E_false, test_E, test_E_false, directed=G.is_directed(), nw_name=nw_name,
split_id=split_id, split_alg=split_alg, owa=owa, verbose=verbose)
return train_E, train_E_false, test_E, test_E_false
[docs] def get_parameters(self):
"""
Returns the class properties except the sets of train and test node pairs, labels and train graph.
Returns
-------
parameters : dict
The parameters used when computing this split as a dictionary of parameters and values.
"""
# Get the parameters from the parent class
params = super(LPEvalSplit, self).get_parameters()
# Add the LP specific parameters
params.update({"owa": self._owa, "fe_ratio": self._fe_ratio})
return params
[docs]class EvalSplit(LPEvalSplit):
"""
Deprecated and will be removed in v0.4.0. Use LPEvalSplit instead.
"""
def __init__(self):
super(LPEvalSplit, self).__init__()
[docs] def read_splits(self, filename, split_id, directed=False, nw_name='test', verbose=False):
"""
Reads the train and test edges and non-edges from files and initializes the class attributes.
Parameters
----------
filename : string
The filename shared by all edge splits as given by the 'store_train_test_splits' method
split_id : int
The ID of the edge splits to read. As provided by the 'store_train_test_splits' method
directed : bool, optional
True if the splits correspond to a directed graph, false otherwise. Default is False.
nw_name : string, optional
A string indicating the name of the dataset from which this split was generated.
This is required in order to keep track of the evaluation results. Default is `test`.
verbose : bool, optional
If True print progress info. Default is False.
See also
--------
evalne.utils.preprocess.read_train_test :
The low level function used for reading the sets of edges and non-edges.
evalne.utils.split_train_test.store_train_test_splits :
The files in the provided input path are expected to follow the naming convention of this function.
"""
# Read edge sets from file
train_E, train_E_false, test_E, test_E_false = pp.read_train_test(filename, split_id)
# Set class attributes to new values
self.set_splits(train_E, train_E_false, test_E, test_E_false, directed=directed, nw_name=nw_name,
split_id=split_id, verbose=verbose)