datacleaning.py

# -*- coding: utf-8 -*-
"""DataCleaning.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1PsQP75ZWwhK8FM-L4XbnucURbVHVhS8A
"""

import pandas as pd
import numpy as np

df = pd.read_csv('/content/COVID-19_Treatments_20240818.csv')

a = df.info()
print(a)

a = df.isna()
print(a)

a = df.isna().any()
print(a)

print(df[df["Home Delivery URL"].isna() == True])

df1 = df.dropna(inplace= False)
print(df1)
print()

check = df1.isna().any()
print(check)

# replace null parts with 2 if you want to do it on the main df set True

check = df.isna().any()
print(check)
print()

df1 = df.fillna(2, inplace =False)
print(df1)
print()

check = df1.isna().any()
print(check)

df1 = df["Public Website"].fillna("Www.google.com", inplace =False)
print(df1)
print()

print(df[df["Public Website"] == "Www.google.com"])

# If your DataFrame (df) contains strings or non-numeric data,
# pandas cannot compute the correlation, resulting in  error.

df1 = pd.read_csv("/content/sales.csv")
numeric_df = df1.select_dtypes(include=[np.number])
corr_matrix = numeric_df.corr()
print(corr_matrix)

# r = 1: Perfect positive correlation.
# As one variable increases, the other variable also increases in a perfectly linear fashion.

# r = -1: Perfect negative correlation.
# As one variable increases, the other variable decreases in a perfectly linear fashion.

# r = 0: No linear correlation.
# There is no linear relationship between the two variables.

import seaborn as sns

sns.heatmap(numeric_df.corr(), annot = True)