-
Notifications
You must be signed in to change notification settings - Fork 1
/
build_graph.py
133 lines (112 loc) · 3.74 KB
/
build_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import shutil
from pathlib import Path
import kuzu
from codetiming import Timer
from kuzu import Connection
DATA_PATH = Path(__file__).resolve().parents[1] / "data"
NODES_PATH = DATA_PATH / "output" / "nodes"
EDGES_PATH = DATA_PATH / "output" / "edges"
def create_person_node_table(conn: Connection) -> None:
conn.execute(
"""
CREATE NODE TABLE
Person(
id INT64,
name STRING,
gender STRING,
birthday DATE,
age INT64,
isMarried BOOLEAN,
PRIMARY KEY (id)
)
"""
)
def create_city_node_table(conn: Connection) -> None:
conn.execute(
"""
CREATE NODE TABLE
City(
id INT64,
city STRING,
state STRING,
country STRING,
lat DOUBLE,
lon DOUBLE,
population INT32,
PRIMARY KEY (id)
)
"""
)
def create_state_node_table(conn: Connection) -> None:
conn.execute(
"""
CREATE NODE TABLE
State(
id INT64,
state STRING,
country STRING,
PRIMARY KEY (id)
)
"""
)
def create_country_node_table(conn: Connection) -> None:
conn.execute(
"""
CREATE NODE TABLE
Country(
id INT64,
country STRING,
PRIMARY KEY (id)
)
"""
)
def create_interest_node_table(conn: Connection) -> None:
conn.execute(
"""
CREATE NODE TABLE
Interest(
id INT64,
interest STRING,
PRIMARY KEY (id)
)
"""
)
def create_edge_tables(conn: Connection) -> None:
# Create edge schemas
conn.execute("CREATE REL TABLE Follows(FROM Person TO Person)")
conn.execute("CREATE REL TABLE LivesIn(FROM Person TO City)")
conn.execute("CREATE REL TABLE HasInterest(FROM Person TO Interest)")
conn.execute("CREATE REL TABLE CityIn(FROM City TO State)")
conn.execute("CREATE REL TABLE StateIn(FROM State TO Country)")
def main(conn: Connection) -> None:
with Timer(name="nodes", text="Nodes loaded in {:.4f}s"):
# Nodes
create_person_node_table(conn)
create_city_node_table(conn)
create_state_node_table(conn)
create_country_node_table(conn)
create_interest_node_table(conn)
conn.execute(f"COPY Person FROM '{NODES_PATH}/persons.parquet';")
conn.execute(f"COPY City FROM '{NODES_PATH}/cities.parquet';")
conn.execute(f"COPY State FROM '{NODES_PATH}/states.parquet';")
conn.execute(f"COPY Country FROM '{NODES_PATH}/countries.parquet';")
conn.execute(f"COPY Interest FROM '{NODES_PATH}/interests.parquet';")
with Timer(name="edges", text="Edges loaded in {:.4f}s"):
# Edges
create_edge_tables(conn)
conn.execute(f"COPY Follows FROM '{EDGES_PATH}/follows.parquet';")
conn.execute(f"COPY LivesIn FROM '{EDGES_PATH}/lives_in.parquet';")
conn.execute(f"COPY HasInterest FROM '{EDGES_PATH}/interested_in.parquet';")
conn.execute(f"COPY CityIn FROM '{EDGES_PATH}/city_in.parquet';")
conn.execute(f"COPY StateIn FROM '{EDGES_PATH}/state_in.parquet';")
print("Successfully loaded nodes and edges into KùzuDB!")
if __name__ == "__main__":
DB_NAME = "social_network"
# Delete directory each time till we have MERGE FROM available in kuzu
if os.path.exists(DB_NAME):
shutil.rmtree(DB_NAME)
# Create database
db = kuzu.Database(f"./{DB_NAME}")
CONNECTION = kuzu.Connection(db)
main(CONNECTION)