Source code for evalne.tests.test_baselines

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Mara Alexandru Cristian
# Contact: alexandru.mara@ugent.be
# Date: 18/12/2018

import random
import numpy as np

from time import time

from evalne.evaluation import evaluator
from evalne.evaluation import score
from evalne.evaluation import split
from evalne.methods import katz
from evalne.utils import preprocess as pp


# TODO: there are big differences between katz exact and approx. Exact probably is wrong.


[docs]def test_katz(nee):
    # Evaluate exact katz implementation
    exact = katz.Katz(nee.traintest_split.TG)
    train_pred = exact.predict(nee.traintest_split.train_edges)
    test_pred = exact.predict(nee.traintest_split.test_edges)
    ms = score.Results(method='Katz', params=exact.get_params(),
                       train_pred=train_pred, train_labels=nee.traintest_split.train_labels,
                       test_pred=test_pred, test_labels=nee.traintest_split.test_labels)
    ms.pretty_print(results='test', precatk_vals=[2, 4, 6, 10, 100, 1000])
    # ms.plot()

    # # Evaluate approx katz implementation
    # approx = katz.KatzApprox(nee.traintest_split.TG)
    # train_pred = approx.fit_predict(nee.traintest_split.train_edges)
    # test_pred = approx.fit_predict(nee.traintest_split.test_edges)
    # ms = score.Results(method='Katz', params=approx.get_params(),
    #                    train_pred=train_pred, train_labels=nee.traintest_split.train_labels,
    #                    test_pred=test_pred, test_labels=nee.traintest_split.test_labels)
    # ms.pretty_print(results='test', precatk_vals=[2, 4, 6, 10, 100, 1000])
    # # ms.plot()


[docs]def test_baselines(nee, directed):
    """
    Experiment to test the baselines.
    """
    print('Evaluating baselines...')

    # Set the baselines
    methods = ['random_prediction', 'common_neighbours', 'jaccard_coefficient', 'adamic_adar_index',
               'preferential_attachment', 'resource_allocation_index']

    # Results list
    results = list()

    # Evaluate baseline methods
    for method in methods:
        if directed:
            results.append(nee.evaluate_baseline(method=method, neighbourhood="in"))
            results.append(nee.evaluate_baseline(method=method, neighbourhood="out"))
        else:
            results.append(nee.evaluate_baseline(method=method))

    for result in results:
        result.pretty_print()

    results[0].save_predictions('predictions.txt')


[docs]def run_test():

    random.seed(42)
    np.random.seed(42)

    # Set some variables
    filename = "./data/network.edgelist"
    directed = False

    # Load the test graph
    G = pp.load_graph(filename, delimiter=",", comments='#', directed=directed)
    G, ids = pp.prep_graph(G)

    # Print some stars about the graph
    pp.get_stats(G)

    # Generate one train/test split with all edges in train set
    start = time()
    traintest_split = split.EvalSplit()
    # traintest_split.read_splits('./', 0)
    traintest_split.compute_splits(G, train_frac=0.9)
    end = time() - start
    print("\nSplits computed in {} sec".format(end))

    # Create an evaluator
    nee = evaluator.LPEvaluator(traintest_split)

    # Test baselines
    start = time()
    test_baselines(nee, directed)
    end = time() - start
    print("\nBaselines computed in {} sec".format(end))

    # Test Katz
    start = time()
    test_katz(nee)
    end = time() - start
    print("\nKatz computed in {} sec".format(end))


if __name__ == "__main__":
    run_test()