Skip to content

Commit

Permalink
fix: Fix usage of missing type in univariate analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
mbelak-dtml committed Aug 9, 2023
1 parent 53e46fe commit 045fef2
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions edvart/report_sections/univariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,6 @@ def univariate_analysis(df: pd.DataFrame, columns: Optional[List[str]] = None) -
df = df[columns]

for col in df.columns:
if df[col].isnull().all():
display(Markdown(f"## *{col} - NULL*"))
display(Markdown("The column contains only null values."))
continue
data_type_name = infer_data_type(df[col], string_representation=True)
data_type = infer_data_type(df[col])
display(Markdown(f"## *{col} - {data_type_name}*"))
Expand All @@ -280,6 +276,8 @@ def univariate_analysis(df: pd.DataFrame, columns: Optional[List[str]] = None) -
UnivariateAnalysis.bar_plot(df[col])
elif data_type == DataType.UNIQUE:
display(Markdown("Each value in the column is unique."))
elif data_type == DataType.MISSING:
display(Markdown("The column contains only missing values."))
else:
UnivariateAnalysis.numeric_statistics(df[col])
UnivariateAnalysis.histogram(df[col])
Expand Down Expand Up @@ -377,10 +375,6 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
cells.append(code_cell)
else:
for col in self.df.columns:
if self.df[col].isnull().all():
display(Markdown(f"## *{col} - NULL*"))
display(Markdown("The column contains only null values."))
continue
data_type_name = infer_data_type(self.df[col], string_representation=True)
data_type = infer_data_type(self.df[col])
column_header = nbfv4.new_markdown_cell(f"## *{col} - {data_type_name}*")
Expand All @@ -395,6 +389,8 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
)
elif data_type == DataType.UNIQUE:
cell = nbfv4.new_markdown_cell("Each value in the column is unique.")
elif data_type == DataType.MISSING:
cell = nbfv4.new_markdown_cell("The column contains only missing values.")
else:
if self.verbosity == 1:
cell = nbfv4.new_code_cell(
Expand Down Expand Up @@ -431,10 +427,6 @@ def show(self, df: pd.DataFrame) -> None:

display(Markdown(self.get_title(section_level=1)))
for col in df.columns:
if df[col].isnull().all():
display(Markdown(f"## *{col} - NULL*"))
display(Markdown("The column contains only null values."))
continue
data_type_name = infer_data_type(df[col], string_representation=True)
data_type = infer_data_type(df[col])
display(Markdown(f"## *{col} - {data_type_name}*"))
Expand All @@ -443,6 +435,8 @@ def show(self, df: pd.DataFrame) -> None:
UnivariateAnalysis.bar_plot(df[col])
elif data_type == DataType.UNIQUE:
display(Markdown("Each value in the column is unique."))
elif data_type == DataType.MISSING:
display(Markdown("The column contains only missing values."))
else:
UnivariateAnalysis.numeric_statistics(df[col])
UnivariateAnalysis.histogram(df[col])
Expand Down

0 comments on commit 045fef2

Please sign in to comment.