Skip to content

Commit

Permalink
fix: Fix usage of missing data type in univariate analysis (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
mbelak-dtml committed Sep 6, 2023
1 parent 7dafe1e commit 7e9cd46
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions edvart/report_sections/univariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,6 @@ def univariate_analysis(df: pd.DataFrame, columns: Optional[List[str]] = None) -
df = df[columns]

for col in df.columns:
if df[col].isnull().all():
display(Markdown(f"## *{col} - NULL*"))
display(Markdown("The column contains only null values."))
continue
data_type = infer_data_type(df[col])
data_type_name = str(data_type)
display(Markdown(f"## *{col} - {data_type_name}*"))
Expand All @@ -275,6 +271,8 @@ def univariate_analysis(df: pd.DataFrame, columns: Optional[List[str]] = None) -
UnivariateAnalysis.bar_plot(df[col])
elif data_type == DataType.UNIQUE:
display(Markdown("Each value in the column is unique."))
elif data_type == DataType.MISSING:
display(Markdown("The column contains only missing values."))
else:
UnivariateAnalysis.numeric_statistics(df[col])
UnivariateAnalysis.histogram(df[col])
Expand Down Expand Up @@ -371,10 +369,6 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
cells.append(code_cell)
else:
for col in self.df.columns:
if self.df[col].isnull().all():
display(Markdown(f"## *{col} - NULL*"))
display(Markdown("The column contains only null values."))
continue
data_type = infer_data_type(self.df[col])
data_type_name = str(data_type)
column_header = nbfv4.new_markdown_cell(f"## *{col} - {data_type_name}*")
Expand All @@ -389,6 +383,8 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
)
elif data_type == DataType.UNIQUE:
cell = nbfv4.new_markdown_cell("Each value in the column is unique.")
elif data_type == DataType.MISSING:
cell = nbfv4.new_markdown_cell("The column contains only missing values.")
else:
if self.verbosity == Verbosity.MEDIUM:
cell = nbfv4.new_code_cell(
Expand Down Expand Up @@ -425,10 +421,6 @@ def show(self, df: pd.DataFrame) -> None:

display(Markdown(self.get_title(section_level=1)))
for col in df.columns:
if df[col].isnull().all():
display(Markdown(f"## *{col} - NULL*"))
display(Markdown("The column contains only null values."))
continue
data_type = infer_data_type(df[col])
data_type_name = str(data_type)
display(Markdown(f"## *{col} - {data_type_name}*"))
Expand All @@ -437,6 +429,8 @@ def show(self, df: pd.DataFrame) -> None:
UnivariateAnalysis.bar_plot(df[col])
elif data_type == DataType.UNIQUE:
display(Markdown("Each value in the column is unique."))
elif data_type == DataType.MISSING:
display(Markdown("The column contains only missing values."))
else:
UnivariateAnalysis.numeric_statistics(df[col])
UnivariateAnalysis.histogram(df[col])
Expand Down

0 comments on commit 7e9cd46

Please sign in to comment.