forked from LuoUndergradXJTU/TwiBot-22
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataload3.py
86 lines (76 loc) · 2.83 KB
/
dataload3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import argparse
import os
import os.path as osp
import time
import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import f1_score
import pandas
import json
import torch_geometric.transforms as T
from torch_geometric.nn import ChebConv, GCNConv # noqa
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('--use_gdc', action='store_true',
help='Use GDC preprocessing.')
args = parser.parse_args()
path = '../../datasets'
dataset1 = 'Twibot-20'
dataset2 = 'cresci-2015'
dataset3 = 'cresci-2017'
path2 = os.path.join(path, dataset2)
path3 = os.path.join(path, dataset3)
path1 = os.path.join(path, dataset1)
with open(os.path.join(path3, 'node.json'), 'r', encoding = 'UTF-8') as f:
node1 = json.load(f)
# node1 = pandas.read_json(os.path.join(path1, 'node.json'))
edge1 = pandas.read_csv(os.path.join(path3, 'edge.csv'))
label1 = pandas.read_csv(os.path.join(path3, 'label.csv'))
split1 = pandas.read_csv(os.path.join(path3, 'split.csv'))
source_node_index = []
target_node_index = []
i = 0
v = 0
age = []
account_length_name = []
userid=[]
id_map = dict()
for node in tqdm(node1):
if (node['id'][0] == 'u'):
# age.append(age_calculate(node.created_at,time_now))
account_length_name.append(len(str(node['name'])))
userid.append(str(node['id']))
id_map[node['id']] = i
i = i+ 1
statuses_count = np.zeros(i)
followers_count = np.zeros(i)
friends_count = np.zeros(i)
# edge2 = edge1.values[edge1['relation'].values != 'post']
for node in tqdm(edge1['source_id']):
if (edge1['relation'][v] == 'post'):
# statuses_count[userid.index(node)]+=1
statuses_count[id_map[node]] += 1
v += 1
elif (edge1['relation'][v] == 'follow'):
# followers_count[userid.index(str(edge1.target_id[v]))]+=1
followers_count[id_map[edge1['target_id'][v]]] += 1
# source_node_index.append(userid.index(node))
source_node_index.append(id_map[node])
# target_node_index.append(userid.index(edge1.target_id[v]))
target_node_index.append(id_map[edge1['target_id'][v]])
v = v+ 1
elif (edge1['relation'][v] == 'friend'):
# friends_count[node1['id'].index(node)] += 1
friends_count[id_map[node]] += 1
# source_node_index.append(userid.index(node))
source_node_index.append(id_map[node])
# target_node_index.append(userid.index(edge1.target_id[v]))
target_node_index.append(id_map[edge1['target_id'][v]])
v = v+ 1
X_matrix=np.vstack([statuses_count,np.array(account_length_name),followers_count,friends_count]).T
edge_index = np.vstack(([np.array(source_node_index), np.array(target_node_index)]))
X=pandas.DataFrame(X_matrix)
X.to_csv('X_matrix3.csv', index=False)
edge_Index=pandas.DataFrame(edge_index)
edge_Index.to_csv('edge_index3.csv', index=False)