In [1]:
import pandas as pd
import graph_tool as gt
import graph_tool.draw
import graph_tool.community
import itertools
import collections
import matplotlib
import math
In [2]:
df = pd.read_csv('/home/aahu/Downloads/evolution/evolution/products_vendors.tsv',sep='\t')

#discard meta-categories"
meta_cats = ['Other','Drugs','Guides & Tutorials','Fraud Related',
             'Services','Digital Goods','Electronics', 'Custom Listings']
df = df[df['category'].map(lambda x:x not in meta_cats)]     
In [3]:
#df['count'] = df.groupby(['vendor','category']).transform('count').index

#build graph-tool ids
node_lbs = {}
rev_node_lbs = {}
for idx,vendor in enumerate(df['category'].drop_duplicates()):
    node_lbs[vendor] = idx
    rev_node_lbs[idx] = vendor
df['id'] = df['category'].map(lambda x:node_lbs[x])
In [4]:
edge_list = []
dfg = df.groupby('vendor')
for name,group in dfg:
    ei = itertools.combinations(group['id'].drop_duplicates(),2)
    for e in ei:
        edge_list.append(tuple(sorted(e)))
        
#filter edges by num shared vendors
MIN_SHARED_VENDORS=1
c = collections.Counter(edge_list)
edge_list = [e for e in c if c[e]>=MIN_SHARED_VENDORS]

#build graph
g = gt.Graph(directed=False)
g.add_edge_list(edge_list)
g.vertex_properties['label'] = g.new_vertex_property('string')
for v in g.vertices():
    g.vertex_properties['label'][v] = rev_node_lbs[g.vertex_index[v]]
print('g vert/edges: ',g.num_vertices(), g.num_edges())

#add edge weight property
g.edge_properties['weight'] = g.new_edge_property('double')
g.edge_properties['color'] = g.new_edge_property('vector<double>')
for e in g.edges():
    w = c[tuple(sorted([e.source(),e.target()]))]
    g.edge_properties['weight'][e] = w
    alpha = (float(w)/max(c.values())) + .025
    g.edge_properties['color'][e] = [103/255.0,134/255.0,239/255.0,alpha]   
g vert/edges:  73 2219
In [10]:
state = gt.community.minimize_nested_blockmodel_dl(g,deg_corr=False,
                                                   eweight=g.ep['weight'])
bstack = state.get_bstack()
t = gt.community.get_hierarchy_tree(bstack)[0]
tpos = pos = gt.draw.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
cts = gt.draw.get_hierarchy_control_points(g, t, tpos,beta=.87)
pos = g.own_property(tpos)
b = bstack[0].vp["b"]

#text rotation
text_rot = g.new_vertex_property('double')
g.vertex_properties['text_rot'] = text_rot
for v in g.vertices():
    if pos[v][0] >0:
        text_rot[v] = math.atan(pos[v][1]/pos[v][0])
    else:
        text_rot[v] = math.pi + math.atan(pos[v][1]/pos[v][0])

print('saving to disk...')
gt.draw.graph_draw(g, pos=pos, vertex_fill_color=b,
            edge_control_points=cts,
            vertex_size=20,
            vertex_text=g.vertex_properties['label'],
            vertex_text_rotation=g.vertex_properties['text_rot'],
            vertex_text_position=1,
            vertex_font_size=20,
            vertex_font_family='mono',
            vertex_anchor=0,
            vertex_color=b,
            vcmap=matplotlib.cm.Spectral,
            ecmap=matplotlib.cm.Spectral,
            edge_color=g.edge_properties['color'],
            bg_color=[0,0,0,1],
            output_size=[1024*2,1024*2],
            output='/home/aahu/Desktop/evo_nvends={0}.png'.format(MIN_SHARED_VENDORS))
saving to disk...
Out[10]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x7fb31b86a9b0, at 0x7fb320810c88>
In [ ]: