-
Notifications
You must be signed in to change notification settings - Fork 0
/
datacleaning.py
72 lines (48 loc) · 1.53 KB
/
datacleaning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*-
"""DataCleaning.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1PsQP75ZWwhK8FM-L4XbnucURbVHVhS8A
"""
import pandas as pd
import numpy as np
df = pd.read_csv('/content/COVID-19_Treatments_20240818.csv')
a = df.info()
print(a)
a = df.isna()
print(a)
a = df.isna().any()
print(a)
print(df[df["Home Delivery URL"].isna() == True])
df1 = df.dropna(inplace= False)
print(df1)
print()
check = df1.isna().any()
print(check)
# replace null parts with 2 if you want to do it on the main df set True
check = df.isna().any()
print(check)
print()
df1 = df.fillna(2, inplace =False)
print(df1)
print()
check = df1.isna().any()
print(check)
df1 = df["Public Website"].fillna("Www.google.com", inplace =False)
print(df1)
print()
print(df[df["Public Website"] == "Www.google.com"])
# If your DataFrame (df) contains strings or non-numeric data,
# pandas cannot compute the correlation, resulting in error.
df1 = pd.read_csv("/content/sales.csv")
numeric_df = df1.select_dtypes(include=[np.number])
corr_matrix = numeric_df.corr()
print(corr_matrix)
# r = 1: Perfect positive correlation.
# As one variable increases, the other variable also increases in a perfectly linear fashion.
# r = -1: Perfect negative correlation.
# As one variable increases, the other variable decreases in a perfectly linear fashion.
# r = 0: No linear correlation.
# There is no linear relationship between the two variables.
import seaborn as sns
sns.heatmap(numeric_df.corr(), annot = True)