"""
Module for running Louvain community detection algorithm.
"""
from __future__ import division, print_function, absolute_import
import six
range = six.moves.range
zip = six.moves.zip
import numpy as np
import subprocess
import tempfile
import threading
import os
import scipy.sparse as sp
from contextlib import closing
from . import qualityfuncs
def _edge_lines_iter(conn_mx, is_sparse):
"""
Iterate over Pajek edge lines
"""
if is_sparse:
for ndx in range(conn_mx.shape[0]):
conns = conn_mx.rows[ndx]
weights = conn_mx.data[ndx]
if not len(conns):
yield ('%d %d %0.5f 0\n' % (ndx, 0, 0))
else:
for c, w in zip(conns, weights):
yield ('%d %d %0.5f 0\n' % (ndx, c, w))
else:
for ndx in range(conn_mx.shape[0]):
r = conn_mx[ndx, :]
conns = np.flatnonzero(r)
weights = r[conns]
if not len(conns):
yield ('%d %d %0.5f 0\n' % (ndx, 0, 0))
else:
for c, w in zip(conns, weights):
yield ('%d %d %0.5f 0\n' % (ndx, c, w))
[docs]
def optimize_modularity(conn_mx, rand_init=True, num_runs=1, debug=False, errortol=1e-2):
"""
Optimize directed, weighted Newman's modularity
using the Louvain algorithm. This uses C++ implementation from
https://github.com/raldecoa/SurpriseMe/tree/master/src/CPM
For example:
>>> import networkx as nx
>>> from graphy.louvain import optimize_modularity
>>> G = nx.karate_club_graph()
>>> best_membership, q = optimize_modularity(nx.to_numpy_array(G))
>>> print(best_membership, q) # doctest: +SKIP
Parameters
----------
conn_mx : 2-dimensional np.array or scipy.sparse matrix
Connectivity matrix.
rand_init : bool (default True)
Whether to randomly shuffle order of nodes (makes results
non-deterministic)
num_runs : int (default 1)
How many runs to perform (highest quality run returned). Only allow if
rand_init is True.
debug : bool (default False)
If True, prints various debugging information.
Returns
-------
np.array
Optimal membership vector indicating community membership of each node.
float
Modularity value corresponding to the optimal membership vector. Notice
that because the modularity value is computed by adding up increments
over many moves, this may only be accurate to a few decimal places.
"""
# TODO: Implement multithreading? Code seems to support it already
# check inputs
if num_runs > 1 and not rand_init:
raise ValueError('Multiple runs only makes sense when initial order of'
'nodes is randomized')
# check is sparse or not
is_sparse = sp.isspmatrix(conn_mx)
if is_sparse:
# transform to list of lists
conn_mx = conn_mx.tolil()
else:
conn_mx = np.asarray(conn_mx)
# check is square matrix
if conn_mx.shape[0] != conn_mx.shape[1]:
raise ValueError('conn mx should be square')
qObj = qualityfuncs.DirectedModularity(conn_mx)
# create files
DIR = tempfile.gettempdir()
pfx = '%d_%d_' % (os.getpid(), hash(threading.current_thread().name))
NETWORK_FILE = os.path.join(DIR, pfx+'net.txt')
OUTPUT_FILE = os.path.join(DIR, pfx+'net.bin')
NODEMAP_FILE = os.path.join(DIR, pfx+'nmap.bin')
CONF_FILE = os.path.join(DIR, pfx+'conf.bin')
with closing(open(NETWORK_FILE, 'w')) as f:
f.write('>\n')
N_nodes = conn_mx.shape[0]
node_lines = ('%d %d\n' % (ndx, ndx) for ndx in range(N_nodes))
f.writelines(node_lines)
f.write('>\n0 0\n>\n')
edges_lines = _edge_lines_iter(conn_mx, is_sparse)
f.writelines(edges_lines)
bin_dir = os.path.join(os.path.dirname(__file__), 'external',
'SurpriseMeCPM', 'bin')
subprocess.call([os.path.join(bin_dir, 'slicer'),
'-i', NETWORK_FILE,
'-o', OUTPUT_FILE,
'-n', NODEMAP_FILE,
'-c', CONF_FILE], stderr=subprocess.PIPE)
call_opts = [os.path.join(bin_dir, 'community'), ]
if rand_init:
call_opts.append('-r')
call_opts.append(OUTPUT_FILE)
call_opts.append(CONF_FILE)
best_membership, best_q = None, None
for run_ndx in range(num_runs):
rand_seed_opt = [] # ['-s', str(1+run_ndx)]
res = subprocess.Popen(call_opts + rand_seed_opt, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, errors = map(lambda s: s.decode('ascii'), res.communicate())
all_files = [NETWORK_FILE, OUTPUT_FILE, NODEMAP_FILE, CONF_FILE]
if debug:
print("******* RUN %d *******" % run_ndx)
print("**** OUTPUT: ****")
print(output)
print()
print("**** STDERR: ****")
print(errors)
print("*****************")
print("Leaving files in place:")
print(all_files)
returned_q = float(errors.strip().split("\n")[-1].split(" ")[0])
membership = np.zeros(conn_mx.shape[0], dtype='int')
ndxs, vals = zip(*[map(int, l.strip().split("\t"))
for l in output.strip().split("\n")])
membership[list(ndxs)] = vals
accurate_q = qObj.quality(membership)
if (np.abs(returned_q - accurate_q) > errortol):
raise Exception('Quality returned by Louvain (%0.5f) does not match explicitly computed quality (%0.5f)' %
(returned_q, accurate_q))
if best_q is None or accurate_q > best_q:
best_membership, best_q = membership, accurate_q
if not debug:
for fname in all_files:
os.remove(fname)
return best_membership, best_q