Source code for evalne.evaluation.split

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Mara Alexandru Cristian
# Contact: alexandru.mara@ugent.be
# Date: 18/12/2018

# This file contains methods and classes that simplify the management and splitting of edges in sets of train and test
# or train and validation.
# TODO v0.4.0: Change naming from train_edges/test_edges to train_data/test_data.
# TODO v0.4.0: Change naming from train_E/train_E_false to train_pos/train_neg.

import numpy as np
import networkx as nx

from abc import abstractmethod

from evalne.utils import preprocess as pp
from evalne.utils import split_train_test as stt


[docs]class BaseEvalSplit(object): """ Base class that provides a high level interface for managing/computing sets of train and test edges and non-edges for LP, SP and NR tasks. The class exposes the train edges and non-edges through the `train_edges` property and the test edges and non-edges through the `test_edges` property. Parameters used to compute these sets are also made available. """ def __init__(self): self._train_edges = None self._test_edges = None self._train_labels = None self._test_labels = None self._TG = None # Data related statistics self._train_frac = None self._split_alg = None self._nw_name = None self._split_id = None @property def train_edges(self): """The set of training node pairs.""" return self._train_edges @property def test_edges(self): """The set of test node pairs.""" return self._test_edges @property def train_labels(self): """A list of train node-pair labels. Labels can be either 0 or 1 and denote non-edges and edges, respectively (for SP they denote negative and positive links, respectively).""" return self._train_labels @property def test_labels(self): """A list of test node-pair labels. Labels can be either 0 or 1 and denote non-edges and edges, respectively (for SP they denote negative and positive links, respectively).""" return self._test_labels @property def TG(self): """A NetworkX graph or digraph to be used for training the embedding methods. For LP this should be the graph spanned by all train edges, for SP the graph spanned by the positive and negative train edges (with signs as edge weights) and for NR the entire graph being evaluated.""" return self._TG @property def train_frac(self): """A float indicating the fraction of train edges out of all train and test edges.""" return self._train_frac @property def split_alg(self): """A string indicating the algorithm used to split edges in train and test sets.""" return self._split_alg @property def nw_name(self): """A string indicating the name of the dataset used to generate the sets of edges.""" return self._nw_name @property def split_id(self): """An int used as an ID for this particular train/test split.""" return self._split_id def _set_splits(self, train_E, train_E_false=None, test_E=None, test_E_false=None, directed=False, nw_name='test', TG=None, split_id=0, split_alg='spanning_tree', verbose=False): """ Sets the class attributes to the provided input values. The input train edges and non-edges as well as the test edges and non-edges are respectively joined to form the `train_edges` and `test_edges` class attributes. Train and test labels are also inferred from the input data. Parameters ---------- train_E : set Set of train edges. train_E_false : set, optional Set of train non-edges. Default is None. test_E : set, optional Set of test edges. Default is None. test_E_false : set, optional Set of test non-edges. Default is None. directed : bool, optional True if the splits correspond to a directed graph, false otherwise. Default is False. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is `test`. TG : graph, optional A NetworkX graph or digraph to be used for training the embedding methods. If None, the graph will be generated from the set of train edges. Default is None. split_id : int, optional An ID that identifies this particular train/test split. Default is 0. split_alg : string, optional A string indicating the algorithm used to generate the train/test splits. Options are `spanning_tree`, `random`, `naive`, `fast`, `timestamp` and `random_edge_sample`. Default is `spanning_tree`. verbose : bool, optional If True prints progress info. Default is False. Raises ------ ValueError If the train edge set is not provided. """ if len(train_E) != 0: if train_E_false is not None: # Stack the edges and non-edges together. self._train_edges = np.vstack((list(train_E), list(train_E_false))) # Create labels vector with 1s for edges and 0s for non-edges self._train_labels = np.hstack((np.ones(len(train_E)), np.zeros(len(train_E_false)))) else: # Stack the edges and non-edges together. self._train_edges = np.array(list(train_E)) # Create labels vector with 1s for edges and 0s for non-edges self._train_labels = np.ones(len(train_E)) if test_E is not None: if test_E_false is not None: # Stack the edges and non-edges together. self._test_edges = np.vstack((list(test_E), list(test_E_false))) # Create labels vector with 1s for edges and 0s for non-edges self._test_labels = np.hstack((np.ones(len(test_E)), np.zeros(len(test_E_false)))) else: # We only have test edges (no test non-edges) self._test_edges = np.array(list(test_E)) # Create labels vector with 1s for edges self._test_labels = np.ones(len(test_E)) else: self._test_edges = [] self._test_labels = [] # Initialize the training graph if TG is None: if directed: self._TG = nx.DiGraph() else: self._TG = nx.Graph() self._TG.add_edges_from(train_E) else: self._TG = TG.copy() # Set class attributes to new values if test_E is not None: self._train_frac = np.around(len(train_E) / (len(train_E) + len(test_E)), 4) else: self._train_frac = 1 self._split_alg = split_alg self._split_id = split_id self._nw_name = nw_name else: raise ValueError("Train edges are always required!") # Print the process if verbose: print("Edge splits computed using {} alg. ready.".format(self.split_alg))
[docs] def get_parameters(self): """ Returns the class properties except the sets of train and test node pairs, labels and train graph. Returns ------- parameters : dict The parameters used when computing this split as a dictionary of parameters and values. """ params = {"train_frac": self.train_frac, "split_alg": self.split_alg, "nw_name": self._nw_name, "split_id": self.split_id} return params
[docs] def get_data(self): """ Returns the sets of train and test node pairs and label vectors. Returns ------- train_edges : set Set of all train edges and non-edges. test_edges : set Set of all test edges and non-edges. train_labels : list A list of labels indicating if each train node-pair is an edge or non-edge (1 or 0). test_labels : list A list of labels indicating if each test node-pair is an edge or non-edge (1 or 0). """ return self.train_edges, self.train_labels, self.test_edges, self.test_labels
[docs] def save_tr_graph(self, output_path, delimiter, write_stats=False, write_weights=False, write_dir=True): """ Saves the TG graph to a file. Parameters ---------- output_path : file or string File or filename to write. If a file is provided, it must be opened in 'wb' mode. delimiter : string, optional The string used to separate values. Default is ','. write_stats : bool, optional Adds basic graph statistics to the file as a header or not. Default is True. write_weights : bool, optional If True data will be stored as weighted edgelist i.e. triplets (src, dst, weight), otherwise, as regular (src, dst) pairs. For unweighted graphs, setting this parameter to True will add weight 1 to all edges. Default is False. write_dir : bool, optional This parameter is only relevant for undirected graphs. If True, it forces the method to write both edge directions in the file i.e. (src, dst) and (dst, src). If False, only one direction is stored. Default is True. See also -------- evalne.utils.preprocess.save_graph """ pp.save_graph(self._TG, output_path=output_path, delimiter=delimiter, write_stats=write_stats, write_weights=write_weights, write_dir=write_dir)
[docs] def store_edgelists(self, train_path, test_path): """ Writes the sets of train and test node pairs to files with the specified names. Parameters ---------- train_path : string Indicates the path where the train data will be stored. test_path : string Indicates the path where the test data will be stored. See also -------- evalne.utils.split_train_test.store_edgelists """ stt.store_edgelists(train_path, test_path, self.train_edges, self.test_edges)
[docs]class NREvalSplit(BaseEvalSplit): """ Class that provides a high level interface for managing/computing sets of train edges and non-edges for NR tasks. The class exposes the train edges and non-edges through the `train_edges` property. Test edges are not used for NR and therefore the `test_edges` property will be left empty. Parameters used to compute these sets are also made available. Notes ----- In network reconstruction the aim is to asses how well an embedding method captures the structure of a given graph. The embedding methods are trained on a complete input graph. Hyperparameter tuning is performed directly on this graph (overfitting is, in this case, expected and desired). The embeddings obtained are used to perform link predictions and their quality is evaluated. Checking the link predictions for all node pairs is generally unfeasible, therefore a subset of all node pairs in the input graph are selected for evaluation. """ def __init__(self): self._samp_frac = None super(NREvalSplit, self).__init__() @property def samp_frac(self): """A float indicating the fraction of node pairs out of all possible ones sampled for NR evaluation.""" return self._samp_frac
[docs] def set_splits(self, TG, train_E, train_E_false=None, samp_frac=None, directed=False, nw_name='test', split_id=0, verbose=False): """ Sets the class attributes to the provided input values. The input train edges and non-edges are joined to form the `train_edges` class attribute. Train labels are also inferred from the input data. Parameters ---------- TG : graph A NetworkX graph or digraph, the complete network from which train_E and train_E_false were sampled. train_E : set Set of train edges. train_E_false : set, optional Set of train non-edges. Default is None. samp_frac : float, optional The fraction of node-pairs out of all possible ones sampled for NR evaluation. Default is None. directed : bool, optional True if the splits correspond to a directed graph, false otherwise. Default is False. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is `test`. split_id : int, optional An ID that identifies this particular train/test split. Default is 0. verbose : bool, optional If True prints progress info. Default is False. Raises ------ ValueError If the train edge set is not provided. """ # Set the NR specific parameters self._samp_frac = samp_frac # Set the remaining parameters by calling the parent class private set method # For NR we do not have test data, so initialize these sets to None super(NREvalSplit, self)._set_splits(train_E=train_E, train_E_false=train_E_false, test_E=None, test_E_false=None, directed=directed, nw_name=nw_name, TG=TG, split_id=split_id, split_alg='random_edge_sample', verbose=verbose)
[docs] def compute_splits(self, G, nw_name='test', samp_frac=0.01, split_id=0, verbose=False): """ Computes sets of train edges and non-edges by randomly sampling elements from the adjacency matrix of G and initializes the class attributes. Parameters ---------- G : graph A NetworkX graph or digraph to sample node pairs from. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is 'test'. samp_frac : float, optional The fraction of node-pairs out of all possible ones to sample for NR evaluation. Default is 0.01 (1%). split_id : int, optional The id to be assigned to the train/test splits generated. Default is 0. verbose : bool, optional If True print progress info. Default is False. Returns ------- train_E : set The set of train edges. train_false_E : set The set of train non-edges. Raises ------ ValueError If the edge split algorithm is unknown. """ # Sample the required number of node pairs from the graph train_E, train_E_false = stt.random_edge_sample(nx.adjacency_matrix(G, nodelist=range(len(G.nodes))), samp_frac, nx.is_directed(G)) # Raise an error if no edges were selected while sampling matrix entries (both edges and non-edges are required) if len(train_E) == 0: raise ValueError("Sampling fraction {} on {} network is too low, no edges were selected.".format(samp_frac, nw_name)) # Set class attributes to new values self.set_splits(TG=G, train_E=train_E, train_E_false=train_E_false, samp_frac=samp_frac, directed=nx.is_directed(G), nw_name=nw_name, split_id=split_id, verbose=verbose) return train_E, train_E_false
[docs] def get_parameters(self): """ Returns the class properties except the sets of train and test node pairs, labels and train graph. Returns ------- parameters : dict The parameters used when computing this split as a dictionary of parameters and values. """ # Get the parameters from the parent class params = super(NREvalSplit, self).get_parameters() # Add the LP specific parameters params.update({"samp_frac": self._samp_frac}) return params
[docs]class SPEvalSplit(BaseEvalSplit): """ Class that provides a high level interface for managing/computing sets of train and test positive and negative edges for SP tasks. The class exposes the train positive and negative edges through the `train_edges` property and the test positive and negative edges through the `test_edges` property. Parameters used to compute these sets are also made available. Notes ----- In sign prediction the aim is to predict the sign (positive or negative) of given edges. The existence of the edges is assumed (i.e. we do not predict the sign of unconnected node pairs). Therefore, sign prediction is also a binary classification task similar to link prediction where, instead of predicting the existence of edges or not, we predict the signs for edges we know exist. Unlike for link prediction, in this case we do not need to perform negative sampling, since we already have both classes (the positively and the negatively connected node pairs). """ def __init__(self): super(SPEvalSplit, self).__init__()
[docs] def set_splits(self, train_E, train_E_false=None, test_E=None, test_E_false=None, directed=False, nw_name='test', TG=None, split_id=0, split_alg='spanning_tree', verbose=False): """ Sets the class attributes to the provided input values. The input train positive and negative edges as well as the test positive and negative edges are respectively joined to form the `train_edges` and `test_edges` class attributes. Train and test labels (0 or 1 representing negative and positive edges, respectively) are also inferred from the input data. Parameters ---------- train_E : set Set of positive train edges. train_E_false : set, optional Set of negative train edges. Default is None. test_E : set, optional Set of positive test edges. Default is None. test_E_false : set, optional Set of negative test edges. Default is None. directed : bool, optional True if the splits correspond to a directed graph, false otherwise. Default is False. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is `test`. TG : graph, optional A NetworkX graph or digraph containing all the train edges (positive and negative). If None, the graph will be generated from the sets of positive and negative train edges. Default is None. split_id : int, optional An ID that identifies this particular train/test split. Default is 0. split_alg : string, optional A string indicating the algorithm used to generate the train/test splits. Options are `spanning_tree`, `random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`. verbose : bool, optional If True prints progress info. Default is False. Raises ------ ValueError If the train edge set is not provided. """ # Initialize the training graph if TG is None: if directed: TG = nx.DiGraph() else: TG = nx.Graph() TG.add_edges_from(train_E) TG.add_edges_from(train_E_false) # Set the parameters by calling the parent class private set method super(SPEvalSplit, self)._set_splits(train_E=train_E, train_E_false=train_E_false, test_E=test_E, test_E_false=test_E_false, directed=directed, nw_name=nw_name, TG=TG, split_id=split_id, split_alg=split_alg, verbose=verbose)
[docs] def compute_splits(self, G, nw_name='test', train_frac=0.51, split_alg='spanning_tree', split_id=0, verbose=False): """ Computes sets of train and test positive and negative edges according to the given input parameters and initializes the class attributes. Parameters ---------- G : graph A NetworkX graph or digraph to compute the train test split from. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is 'test'. train_frac : float, optional The proportion of train edges w.r.t. the total number of edges in the input graph (range (0.0, 1.0]). Default is 0.51. split_alg : string, optional A string indicating the algorithm to use for generating the train/test splits. Options are `spanning_tree`, `random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`. split_id : int, optional The id to be assigned to the train/test splits generated. Default is 0. verbose : bool, optional If True print progress info. Default is False. Returns ------- train_E : set The set of train positive edges. train_false_E : set The set of train negative edges. test_E : set The set of test positive edges. test_false_E : set The set of test negative edges. Raises ------ ValueError If the edge split algorithm is unknown. """ # Compute train/test split if split_alg == 'random': tr_E, te_E = stt.rand_split_train_test(G, train_frac) train_E, test_E, G, mp = pp.relabel_nodes(tr_E, te_E, G.is_directed()) elif split_alg == 'naive': train_E, test_E = stt.naive_split_train_test(G, train_frac) elif split_alg == 'spanning_tree': train_E, test_E = stt.split_train_test(G, train_frac) elif split_alg == 'fast': train_E, test_E = stt.quick_split(G, train_frac) elif split_alg == 'timestamp': train_E, test_E, _ = stt.timestamp_split(G, train_frac) else: raise ValueError('Split alg. {} unknown!'.format(split_alg)) # Make sure the edges are numpy arrays train_E = np.array(list(train_E)) test_E = np.array(list(test_E)) # Get the labels of train and test a = nx.adjacency_matrix(G, nodelist=range(len(G.nodes))) tr_labels = np.ravel(a[train_E[:, 0], train_E[:, 1]]) te_labels = np.ravel(a[test_E[:, 0], test_E[:, 1]]) # Split train and test edges in those with positive and negative signs pos_tr_e = train_E[np.where(tr_labels == 1)[0], :] neg_tr_e = train_E[np.where(tr_labels == -1)[0], :] pos_te_e = test_E[np.where(te_labels == 1)[0], :] neg_te_e = test_E[np.where(te_labels == -1)[0], :] # Make a train graph with appropriate weights +1 / -1 H = G.copy() H.remove_edges_from(test_E) # Set class attributes to new values self.set_splits(train_E=pos_tr_e, train_E_false=neg_tr_e, test_E=pos_te_e, test_E_false=neg_te_e, directed=G.is_directed(), nw_name=nw_name, TG=H, split_id=split_id, split_alg=split_alg, verbose=verbose) return pos_tr_e, neg_tr_e, pos_te_e, neg_te_e
[docs]class LPEvalSplit(BaseEvalSplit): """ Class that provides a high level interface for managing/computing sets of train and test edges and non-edges for LP tasks. The class exposes the train edges and non-edges through the `train_edges` property and the test edges and non-edges through the `test_edges` property. Parameters used to compute these sets are also made available. Notes ----- In link prediction the aim is to predict, given a set of node pairs, if they should be connected or not. This is generally solved as a binary classification task. For training the binary classifier, we sample a set of edges as well as a set of unconnected node pairs. We then compute the node-pair embeddings of this training data. We use the node-pair embeddings together with the corresponding labels (0 for non-edges and 1 for edges) to train the classifier. Finally, the performance is evaluated on the test data (the remaining edges not used in training plus another set of randomly selected non-edges). """ def __init__(self): self._owa = None self._fe_ratio = None super(LPEvalSplit, self).__init__() @property def owa(self): """A bool parameter indicating if the non-edges have been generated using the OWA (otherwise CWA).""" return self._owa @property def fe_ratio(self): """A float indicating the ratio of non-edges to edges.""" return self._fe_ratio
[docs] def set_splits(self, train_E, train_E_false=None, test_E=None, test_E_false=None, directed=False, nw_name='test', TG=None, split_id=0, split_alg='spanning_tree', owa=True, verbose=False): """ Sets the class attributes to the provided input values. The input train edges and non-edges as well as the test edges and non-edges are respectively joined to form the `train_edges` and `test_edges` class attributes. Train and test labels are also inferred from the input data. Parameters ---------- train_E : set Set of train edges. train_E_false : set, optional Set of train non-edges. Default is None. test_E : set, optional Set of test edges. Default is None. test_E_false : set, optional Set of test non-edges. Default is None. directed : bool, optional True if the splits correspond to a directed graph, false otherwise. Default is False. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is `test`. TG : graph, optional A NetworkX graph or digraph containing all the train edges. If None, the graph will be generated from the set of train edges. Default is None. split_id : int, optional An ID that identifies this particular train/test split. Default is 0. split_alg : string, optional A string indicating the algorithm used to generate the train/test splits. Options are `spanning_tree`, `random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`. owa : bool, optional Encodes the belief that the network respects or not the open world assumption. Default is True. If owa=True, train non-edges are sampled from the train graph only and can overlap with test edges. If owa=False, train non-edges are sampled from the full graph and cannot overlap with test edges. verbose : bool, optional If True prints progress info. Default is False. Raises ------ ValueError If the train edge set is not provided. """ # Set the LP specific parameters self._owa = owa if train_E_false is not None: self._fe_ratio = np.around(len(train_E_false) / len(train_E), 4) else: self._fe_ratio = 1 # Set the remaining parameters by calling the parent class private set method super(LPEvalSplit, self)._set_splits(train_E=train_E, train_E_false=train_E_false, test_E=test_E, test_E_false=test_E_false, directed=directed, nw_name=nw_name, TG=TG, split_id=split_id, split_alg=split_alg, verbose=verbose)
[docs] def compute_splits(self, G, nw_name='test', train_frac=0.51, split_alg='spanning_tree', owa=True, fe_ratio=1, split_id=0, verbose=False): """ Computes sets of train and test edges and non-edges according to the given input parameters and initializes the class attributes. Parameters ---------- G : graph A NetworkX graph or digraph to compute the train test split from. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is 'test'. train_frac : float, optional The proportion of train edges w.r.t. the total number of edges in the input graph (range (0.0, 1.0]). Default is 0.51. split_alg : string, optional A string indicating the algorithm to use for generating the train/test splits. Options are `spanning_tree`, `random`, `naive`, `fast` and `timestamp`. Default is `spanning_tree`. owa : bool, optional Encodes the belief that the network should respect or not the open world assumption. Default is True. If owa=True, train non-edges are sampled from the train graph only and can overlap with test edges. If owa=False, train non-edges are sampled from the full graph and cannot overlap with test edges. fe_ratio : float, optional The ratio of non-edges to edges to sample. For fr_ratio > 0 and < 1 less non-edges than edges will be generated. For fe_edges > 1 more non-edges than edges will be generated. Default 1, same amounts. split_id : int, optional The id to be assigned to the train/test splits generated. Default is 0. verbose : bool, optional If True print progress info. Default is False. Returns ------- train_E : set The set of train edges train_false_E : set The set of train non-edges test_E : set The set of test edges test_false_E : set The set of test non-edges Raises ------ ValueError If the edge split algorithm is unknown. """ # Compute train/test split if split_alg == 'random': tr_E, te_E = stt.rand_split_train_test(G, train_frac) train_E, test_E, G, mp = pp.relabel_nodes(tr_E, te_E, G.is_directed()) elif split_alg == 'naive': train_E, test_E = stt.naive_split_train_test(G, train_frac) elif split_alg == 'spanning_tree': train_E, test_E = stt.split_train_test(G, train_frac) elif split_alg == 'fast': train_E, test_E = stt.quick_split(G, train_frac) train_E_false, test_E_false = stt.quick_nonedges(G, train_frac, fe_ratio) elif split_alg == 'timestamp': train_E, test_E, G = stt.timestamp_split(G, train_frac) train_E = set(zip(train_E[:, 0], train_E[:, 1])) test_E = set(zip(test_E[:, 0], test_E[:, 1])) else: raise ValueError('Split alg. {} unknown!'.format(split_alg)) # Compute non-edges if split_alg != 'fast': num_fe_train = len(train_E) * fe_ratio num_fe_test = len(test_E) * fe_ratio if owa: train_E_false, test_E_false = stt.generate_false_edges_owa(G, train_E, test_E, num_fe_train, num_fe_test) else: train_E_false, test_E_false = stt.generate_false_edges_cwa(G, train_E, test_E, num_fe_train, num_fe_test) # Set class attributes to new values self.set_splits(train_E, train_E_false, test_E, test_E_false, directed=G.is_directed(), nw_name=nw_name, split_id=split_id, split_alg=split_alg, owa=owa, verbose=verbose) return train_E, train_E_false, test_E, test_E_false
[docs] def get_parameters(self): """ Returns the class properties except the sets of train and test node pairs, labels and train graph. Returns ------- parameters : dict The parameters used when computing this split as a dictionary of parameters and values. """ # Get the parameters from the parent class params = super(LPEvalSplit, self).get_parameters() # Add the LP specific parameters params.update({"owa": self._owa, "fe_ratio": self._fe_ratio}) return params
[docs]class EvalSplit(LPEvalSplit): """ Deprecated and will be removed in v0.4.0. Use LPEvalSplit instead. """ def __init__(self): super(LPEvalSplit, self).__init__()
[docs] def read_splits(self, filename, split_id, directed=False, nw_name='test', verbose=False): """ Reads the train and test edges and non-edges from files and initializes the class attributes. Parameters ---------- filename : string The filename shared by all edge splits as given by the 'store_train_test_splits' method split_id : int The ID of the edge splits to read. As provided by the 'store_train_test_splits' method directed : bool, optional True if the splits correspond to a directed graph, false otherwise. Default is False. nw_name : string, optional A string indicating the name of the dataset from which this split was generated. This is required in order to keep track of the evaluation results. Default is `test`. verbose : bool, optional If True print progress info. Default is False. See also -------- evalne.utils.preprocess.read_train_test : The low level function used for reading the sets of edges and non-edges. evalne.utils.split_train_test.store_train_test_splits : The files in the provided input path are expected to follow the naming convention of this function. """ # Read edge sets from file train_E, train_E_false, test_E, test_E_false = pp.read_train_test(filename, split_id) # Set class attributes to new values self.set_splits(train_E, train_E_false, test_E, test_E_false, directed=directed, nw_name=nw_name, split_id=split_id, verbose=verbose)