-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpopulator.py
136 lines (109 loc) · 3.69 KB
/
populator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#%%
# MySQL Connection & Population
import csv
import mysql.connector
from pymongo import MongoClient
from neo4j import GraphDatabase
from tqdm import tqdm
#%%
connection = mysql.connector.connect(
host="localhost",
user="root"
)
print("\n\nConnected to MySQL successfully!")
connection.autocommit = False # for improving performance
cursor = connection.cursor()
# CREATE DATABASE IF NOT EXISTS final_road_network
create_database_query = "CREATE DATABASE IF NOT EXISTS final_road_network"
cursor.execute(create_database_query)
print("Created 'final_road_network' database")
cursor.close()
connection.close()
#%%
connection = mysql.connector.connect(
host="localhost",
user="root",
database = "final_road_network",
)
print("\n\nConnected to MySQL successfully!")
connection.autocommit = False # for improving performance
cursor = connection.cursor()
# Create database in SQL if not exists
# CREATE DATABASE IF NOT EXISTS final_road_network
create_table_query = """
CREATE TABLE IF NOT EXISTS roads (
fromCity VARCHAR(255),
toCity VARCHAR(255),
distance INT
)
"""
cursor.execute(create_table_query)
print("Using 'roads' table in 'final_road_network' database")
# Read data from CSV and insert into MySQL
with open('final_road_network.csv', 'r') as file:
csv_reader = csv.reader(file)
next(csv_reader) # skip header row
batch_size = 25000
batch_data = []
for row in csv_reader:
batch_data.append(row)
if len(batch_data) >= batch_size:
cursor.executemany("INSERT INTO roads (fromCity, toCity, distance) VALUES (%s, %s, %s)", batch_data)
batch_data = []
if batch_data:
cursor.executemany("INSERT INTO roads (fromCity, toCity, distance) VALUES (%s, %s, %s)", batch_data)
print("250,000 rows Data populated into MySQL successfully!")
connection.commit()
cursor.close()
#%%
# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
print("\n\nConnected to MongoDB successfully!")
db = client['final_road_network']
collection = db['roads']
# Read data from CSV and insert into MongoDB
with open('final_road_network.csv', 'r') as file:
reader = csv.DictReader(file)
roads = []
for row in reader:
road = {
'fromCity': row['FromCity'],
'toCity': row['ToCity'],
'distance': int(row['Distance'])
}
roads.append(road)
collection.insert_many(roads)
print("250,000 documents Data populated into MongoDB successfully!")
#%%
# Connect to Neo4j
uri = "neo4j://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "12345678"))
driver.verify_connectivity()
print("\n\nConnected to Neo4j successfully!")
def populate_neo4j(tx, roads):
tx.run("UNWIND $roads AS road "
"MERGE (from:City {name: road.fromCity}) "
"MERGE (to:City {name: road.toCity}) "
"MERGE (from)-[:ROAD {distance: road.distance}]->(to)",
roads=roads)
# Read data from CSV and populate Neo4j in batches
batch_size = 10000
roads = []
with open('final_road_network.csv', 'r') as file:
reader = csv.DictReader(file)
with driver.session() as session:
for row in tqdm(reader):
road = {
'fromCity': row['FromCity'],
'toCity': row['ToCity'],
'distance': int(row['Distance'])
}
roads.append(road)
if len(roads) >= batch_size:
session.execute_write(populate_neo4j, roads)
roads = []
if roads:
session.execute_write(populate_neo4j, roads)
driver.close()
print("Data populated with 500 nodes and 250,000 relationships into Neo4j successfully!")
# %%