This repository has been archived by the owner on Jun 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinw_sample.py
53 lines (47 loc) · 1.54 KB
/
inw_sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import networkx as nx
import csv
from itertools import combinations
from random import sample
# all networks
R = {}
with open('data/srep00196-s3.csv') as f:
reader = csv.reader(f)
for r in reader:
# discard comments
if r[0].startswith('#'):
continue
cuisine = r[0]
if cuisine not in R:
R[cuisine] = [r[1:],]
else:
R[cuisine].append(r[1:])
S = {}
for cuisine in R:
if len(R[cuisine]) >= 2512:
S[cuisine] = sample(R[cuisine], 2512)
# build networks from sampled recipes
G = {}
for cuisine in S:
# a different network per cuisine
if cuisine not in G:
G[cuisine] = nx.Graph()
# connect all combinations of two ingredients, repeats sum weight
for r in S[cuisine]:
for pair in combinations(r, 2):
e = G[cuisine].get_edge_data(*pair)
if e:
w = e['weight'] + 1
else:
w = 1
G[cuisine].add_edge(*pair, weight=w)
for cuisine in G:
# write an edgelist by cuisine
nx.edgelist.write_weighted_edgelist(G[cuisine],
'data/sampled/%s_edgelist.csv' % cuisine,
comments='#',
delimiter=',',
encoding='utf-8')
# write out unique ingredients
ingredients = set(G[cuisine].nodes())
with open('data/sampled/ingredients_%s.csv' % cuisine, 'w') as o:
o.writelines(['%s\n' % i for i in ingredients])