-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset_generator.py
50 lines (45 loc) · 1.19 KB
/
dataset_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import csv
import random
from faker import Faker
# Initialize the Faker generator
fake = Faker(["it_IT", "en_US"])
# List of diseases
diseases = [
"Cancer",
"Diabetes",
"Alzheimer's Disease",
"Parkinson's Disease",
"HIV/AIDS",
"Tuberculosis",
"Malaria",
"Ebola",
"Zika Virus",
"Influenza",
"Common Cold",
"Coronavirus",
"Asthma",
"Hepatitis",
"Dengue Fever",
"Cholera",
"Lyme Disease",
"Rabies",
"Measles",
"Chickenpox",
]
# Open the CSV file for writing
with open("data/dataset.csv", "w", newline="") as csvfile:
fieldnames = ["id", "name", "age", "sex", "zip_code", "disease"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
# Generate and write data for 10 entries
for i in range(10):
writer.writerow(
{
"id": i,
"name": fake.name(),
"age": str(random.randint(20, 40)),
"sex": random.choice(["M", "F"]),
"zip_code": str(random.randint(10000, 99999)),
"disease": random.choice(diseases),
}
)