From 555b2cec04132975c8a80d94ac75b582b04e7f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Luk=C3=A1ny?= Date: Mon, 7 Aug 2023 17:36:03 +0200 Subject: [PATCH 1/3] refactor: use Enum for verbosity --- api-example.ipynb | 3 +- docs/advanced.rst | 15 +- edvart/__init__.py | 1 + edvart/report.py | 271 +++++++----------- edvart/report_sections/bivariate_analysis.py | 62 ++-- edvart/report_sections/dataset_overview.py | 110 ++++--- edvart/report_sections/group_analysis.py | 30 +- .../report_sections/multivariate_analysis.py | 70 +++-- edvart/report_sections/section_base.py | 41 ++- edvart/report_sections/table_of_contents.py | 4 +- .../timeseries_analysis/autocorrelation.py | 14 +- .../timeseries_analysis/boxplots_over_time.py | 10 +- .../timeseries_analysis/fourier_transform.py | 10 +- .../timeseries_analysis/rolling_statistics.py | 10 +- .../seasonal_decomposition.py | 10 +- .../timeseries_analysis/short_time_ft.py | 10 +- .../timeseries_analysis/stationarity_tests.py | 8 +- .../timeseries_analysis/time_analysis_plot.py | 10 +- .../timeseries_analysis.py | 62 ++-- edvart/report_sections/umap.py | 10 +- edvart/report_sections/univariate_analysis.py | 36 +-- tests/test_bivariate_analysis.py | 98 ++++--- tests/test_group_analysis.py | 15 +- tests/test_multivariate_analysis.py | 99 ++++--- tests/test_overview_section.py | 95 +++--- tests/test_report.py | 17 +- tests/test_timeseries_analysis.py | 97 ++++--- tests/test_univariate_analysis_section.py | 17 +- 28 files changed, 602 insertions(+), 633 deletions(-) diff --git a/api-example.ipynb b/api-example.ipynb index 8958af3..cb99d8d 100644 --- a/api-example.ipynb +++ b/api-example.ipynb @@ -38,6 +38,7 @@ ], "source": [ "import edvart\n", + "from edvart import Verbosity\n", "\n", "import plotly.offline as py\n", "py.init_notebook_mode()" @@ -223,7 +224,7 @@ "source": [ "report = edvart.DefaultReport(\n", " dataset,\n", - " verbosity=0,\n", + " verbosity=Verbosity.LOW,\n", " columns_overview=['Name', 'Survived'],\n", " columns_univariate_analysis=['Name', 'Age', 'Pclass'],\n", " groupby='Survived',\n", diff --git a/docs/advanced.rst b/docs/advanced.rst index 1a351bc..3d51b70 100644 --- a/docs/advanced.rst +++ b/docs/advanced.rst @@ -163,11 +163,11 @@ The verbosity helps us to generate a code with a specific level of detail. edvart supports three levels of verbosity: -- verbosity 0 +- LOW - High level functions for whole sections are generated. User can modify the markdown description. -- verbosity 1 +- MEDIUM - edvart functions are generated. User can modify parameters of these functions. -- verbosity 2 +- HIGH - Raw code is generated. User can do very advanced modification such as changing visualisations style. The verbosity can be set to whole report or to each section separately. @@ -176,17 +176,18 @@ Examples: .. code-block:: python - # Set default verbosity for all sections to 1 + # Set default verbosity for all sections to Verbosity.MEDIUM import edvart + from edvart import Verbosity df = edvart.example_datasets.dataset_titanic() - edvart.DefaultReport(df, verbosity=1).export_notebook("test-export.ipynb") + edvart.DefaultReport(df, verbosity=Verbosity.MEDIUM).export_notebook("test-export.ipynb") .. code-block:: python - # Set default verbosity to 1 but use verbosity 2 for univariate analysis + # Set default verbosity to Verbosity.MEDIUM but use verbosity Verbosity.HIGH for univariate analysis import edvart df = edvart.example_datasets.dataset_titanic() - edvart.DefaultReport(df, verbosity=1, verbosity_univariate_analysis=2).export_notebook("test-export.ipynb") + edvart.DefaultReport(df, verbosity=Verbosity.MEDIUM, verbosity_univariate_analysis=Verbosity.HIGH).export_notebook("test-export.ipynb") diff --git a/edvart/__init__.py b/edvart/__init__.py index 38a411a..2f15273 100644 --- a/edvart/__init__.py +++ b/edvart/__init__.py @@ -6,6 +6,7 @@ from edvart import example_datasets from edvart.report import DefaultReport, DefaultTimeseriesReport, Report, TimeseriesReport from edvart.report_sections.dataset_overview import Overview +from edvart.report_sections.section_base import Verbosity logging.basicConfig(level=logging.INFO) diff --git a/edvart/report.py b/edvart/report.py index a058560..59676a6 100755 --- a/edvart/report.py +++ b/edvart/report.py @@ -18,6 +18,7 @@ from edvart.report_sections.dataset_overview import Overview from edvart.report_sections.group_analysis import GroupAnalysis from edvart.report_sections.multivariate_analysis import MultivariateAnalysis +from edvart.report_sections.section_base import Verbosity from edvart.report_sections.table_of_contents import TableOfContents from edvart.report_sections.timeseries_analysis import TimeseriesAnalysis from edvart.report_sections.univariate_analysis import UnivariateAnalysis @@ -32,28 +33,19 @@ class ReportBase(ABC): ---------- dataframe : pd.DataFrame Data from which to generate the report. - verbosity : int (default = 0) - The default verbosity for the exported code of the entire report, has to be one of - [0, 1, 2], by default 0. - - Raises - ------ - ValueError - If verbosity is not one of [0, 1, 2]. + verbosity : Verbosity (default = Verbosity.LOW) + The default verbosity for the exported code of the entire report, by default Verbosity.LOW. """ def __init__( self, dataframe: pd.DataFrame, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, ): self._class_logger = logging.getLogger(__name__).getChild(self.__class__.__name__) self.df = dataframe self.sections = [] - # Check for global verbosity validity - if verbosity not in [0, 1, 2]: - raise ValueError(f"Verbosity has to be one of [0, 1, 2], not {verbosity}.") - self.verbosity = verbosity + self.verbosity = Verbosity(verbosity) def show(self) -> None: """Renders the report in the calling notebook.""" @@ -331,14 +323,14 @@ def add_overview( use_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, subsections: Optional[List[Overview.OverviewSubsection]] = None, - verbosity: Optional[int] = None, - verbosity_quick_info: Optional[int] = None, - verbosity_data_types: Optional[int] = None, - verbosity_data_preview: Optional[int] = None, - verbosity_missing_values: Optional[int] = None, - verbosity_rows_with_missing_value: Optional[int] = None, - verbosity_constant_occurrence: Optional[int] = None, - verbosity_duplicate_rows: Optional[int] = None, + verbosity: Optional[Verbosity] = None, + verbosity_quick_info: Optional[Verbosity] = None, + verbosity_data_types: Optional[Verbosity] = None, + verbosity_data_preview: Optional[Verbosity] = None, + verbosity_missing_values: Optional[Verbosity] = None, + verbosity_rows_with_missing_value: Optional[Verbosity] = None, + verbosity_constant_occurrence: Optional[Verbosity] = None, + verbosity_duplicate_rows: Optional[Verbosity] = None, ) -> "ReportBase": """ Adds a dataset overview section to the report. @@ -354,31 +346,22 @@ def add_overview( subsections : List[Overview.OverviewSubsection], optional List of sub-sections to include into the Overview section. If None, all subsections are added. - verbosity : int, optional - Generated code verbosity global to the Overview sections, must be on of [0, 1, 2]. - - 0 - A single cell which generates the overview section is exported. - 1 - Parameterizable function calls for each subsection of the overview section are - exported. - 2 - Similar to 1, but in addition function definitions are also exported. - + verbosity : Verbosity, optional + Generated code verbosity global to the Overview sections. If subsection verbosities are None, then they will be overridden by this parameter. - verbosity_quick_info : int, optional + verbosity_quick_info : Verbosity, optional Quick info sub-section code verbosity. - verbosity_data_types : int, optional + verbosity_data_types : Verbosity, optional Data types sub-section code verbosity. - verbosity_data_preview : int, optional + verbosity_data_preview : Verbosity, optional Data preview sub-section code verbosity. - verbosity_missing_values : int, optional + verbosity_missing_values : Verbosity, optional Missing values sub-section code verbosity. - verbosity_rows_with_missing_value : int, optional + verbosity_rows_with_missing_value : Verbosity, optional Rows with missing value sub-section code verbosity. - verbosity_constant_occurrence : int, optional + verbosity_constant_occurrence : Verbosity, optional Constant values occurrence sub-section code verbosity. - verbosity_duplicate_rows : int, optional + verbosity_duplicate_rows : Verbosity, optional Duplicate rows sub-section code verbosity. """ @@ -403,7 +386,7 @@ def add_univariate_analysis( self, use_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, - verbosity: Optional[int] = None, + verbosity: Optional[Verbosity] = None, ) -> "ReportBase": """Adds univariate section to the report. @@ -415,19 +398,8 @@ def add_univariate_analysis( omit_columns : List[str], optional Columns to exclude from analysis. If None, use_columns dictates column selection. - verbosity : int - The verbosity of the code generated in the exported notebook, - must be one of [0, 1, 2]. - - 0 - A single function call generates the entire univariate analysis section. - 1 - Function calls to parameterizable functions are generated for each column separately - in separate cells. - 2 - Similar to 1, but in addition, function definitions are generated, column - data type inference and default statistics become customizable. - + verbosity : Verbosity + The verbosity of the code generated in the exported notebook. """ self.sections.append( UnivariateAnalysis( @@ -446,10 +418,10 @@ def add_bivariate_analysis( columns_y: Optional[List[str]] = None, columns_pairs: Optional[List[Tuple[str, str]]] = None, subsections: Optional[List[BivariateAnalysis.BivariateAnalysisSubsection]] = None, - verbosity: Optional[int] = None, - verbosity_correlations: Optional[int] = None, - verbosity_pairplot: Optional[int] = None, - verbosity_contingency_table: Optional[int] = None, + verbosity: Optional[Verbosity] = None, + verbosity_correlations: Optional[Verbosity] = None, + verbosity_pairplot: Optional[Verbosity] = None, + verbosity_contingency_table: Optional[Verbosity] = None, color_col: Optional[str] = None, ) -> "ReportBase": """Adds bivariate analysis section to the report. @@ -481,24 +453,13 @@ def add_bivariate_analysis( subsections : List[BivariateAnalysis.BivariateAnalysisSubsection], optional List of sub-sections to include into the BivariateAnalysis section. If None, all subsections are added. - verbosity : int, optional - The verbosity of the code generated in the exported notebook, - must be one of [0, 1, 2]. - - 0 - A single function call generates the entire bivariate analysis section. - 1 - Function calls to parameterizable functions are generated for each column separately - in separate cells. - 2 - Similar to 1, but in addition, function definitions are generated, column - data type inference and default statistics become customizable. - - verbosity_correlations : int, optional + verbosity : Verbosity, optional + The verbosity of the code generated in the exported notebook. + verbosity_correlations : Verbosity, optional Correlation plots subsection code verbosity. - verbosity_pairplot : int, optional + verbosity_pairplot : Verbosity, optional Pairplot subsection code verbosity. - verbosity_contingency_table : int, optional + verbosity_contingency_table : Verbosity, optional Contingency table code verbosity. color_col : str, optional Name of column according to use for coloring of the multivariate analysis subsections. @@ -526,11 +487,11 @@ def add_multivariate_analysis( use_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, subsections: Optional[List[MultivariateAnalysis.MultivariateAnalysisSubsection]] = None, - verbosity: Optional[int] = None, - verbosity_pca: Optional[int] = None, - verbosity_umap: Optional[int] = None, - verbosity_parallel_coordinates: Optional[int] = None, - verbosity_parallel_categories: Optional[int] = None, + verbosity: Optional[Verbosity] = None, + verbosity_pca: Optional[Verbosity] = None, + verbosity_umap: Optional[Verbosity] = None, + verbosity_parallel_coordinates: Optional[Verbosity] = None, + verbosity_parallel_categories: Optional[Verbosity] = None, color_col: Optional[str] = None, ) -> "ReportBase": """Add multivariate analysis section to the report. @@ -546,27 +507,15 @@ def add_multivariate_analysis( subsections : List[MultivariateAnalysis.MultivariateAnalysisSubsection], optional List of sub-sections to include into the BivariateAnalysis section. If None, all subsections are added. - verbosity : int, optional - verbosity : int - The verbosity of the code generated in the exported notebook, - must be one of [0, 1, 2]. - - 0 - A single function call generates the entire univariate analysis section. - 1 - Function calls to parameterizable functions are generated for each column separately - in separate cells. - 2 - Similar to 1, but in addition, function definitions are generated, column - data type inference and default statistics become customizable. - - verbosity_pca : int, optional + verbosity : Verbosity, optional + The verbosity of the code generated in the exported notebook. + verbosity_pca : Verbosity, optional Principal component analysis subsection code verbosity. - verbosity_umap : int, optional + verbosity_umap : Verbosity, optional UMAP subsection code verbosity. - verbosity_parallel_coordinates: int, optional + verbosity_parallel_coordinates: Verbosity, optional Parallel coordinates subsection code verbosity. - verbosity_parallel_categories: int, optional + verbosity_parallel_categories: Verbosity, optional Parallel categories subsection code verbosity. color_col : str, optional Name of column to use for coloring of the multivariate analysis subsections. @@ -593,7 +542,7 @@ def add_group_analysis( groupby: Union[str, List[str]], use_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, - verbosity: Optional[int] = None, + verbosity: Optional[Verbosity] = None, show_within_group_statistics: bool = True, show_group_missing_values: bool = True, show_group_distribution_plots: bool = True, @@ -610,19 +559,8 @@ def add_group_analysis( omit_columns : List[str], optional Columns to exclude from analysis. If None, use_columns dictates column selection. - verbosity : int, optional - verbosity : int - The verbosity of the code generated in the exported notebook, - must be one of [0, 1, 2]. - - 0 - A single function call generates the entire univariate analysis section. - 1 - Function calls to parameterizable functions are generated for each column separately - in separate cells. - 2 - Similar to 1, but in addition, function definitions are generated, column - data type inference and default statistics become customizable. + verbosity : Verbosity, optional + The verbosity of the code generated in the exported notebook. show_within_group_statistics : bool (default = True) Whether to show per-group statistics. show_group_missing_values : bool (default = True) @@ -669,12 +607,15 @@ class Report(ReportBase): ---------- dataframe : pd.DataFrame Data from which to generate the report. - verbosity : int (default = 0) - Verbosity of the exported code of the entire report, has to be one of - [0, 1, 2], by default 0. + verbosity : Verbosity (default = Verbosity.LOW) + Verbosity of the exported code of the entire report. """ - def __init__(self, dataframe: pd.DataFrame, verbosity: int = 0): + def __init__( + self, + dataframe: pd.DataFrame, + verbosity: Verbosity = Verbosity.LOW, + ): super().__init__(dataframe=dataframe, verbosity=verbosity) @@ -692,16 +633,15 @@ class DefaultReport(Report): ---------- dataframe : pd.DataFrame Data from which to generate the report. - verbosity : int (default = 0) - The default verbosity for the exported code of the entire report, has to be one of - [0, 1, 2], by default 0. - verbosity_overview : int, optional + verbosity : Verbosity (default = Verbosity.LOW) + The default verbosity for the exported code of the entire report. + verbosity_overview : Verbosity, optional Verbosity of the overview section - verbosity_univariate_analysis : int, optional + verbosity_univariate_analysis : Verbosity, optional Verbosity of the univariate analysis section - verbosity_bivariate_analysis : int, optiona + verbosity_bivariate_analysis : Verbosity, optiona Verbosity of the bivariate analysis section. - verbosity_multivariate_analysis: int, optional + verbosity_multivariate_analysis: Verbosity, optional Verbosity of the multivariate analysis section columns_overview : List[str], optional Subset of columns to use in overview section @@ -725,12 +665,12 @@ class DefaultReport(Report): def __init__( self, dataframe: pd.DataFrame, - verbosity: int = 0, - verbosity_overview: Optional[int] = None, - verbosity_univariate_analysis: Optional[int] = None, - verbosity_bivariate_analysis: Optional[int] = None, - verbosity_multivariate_analysis: Optional[int] = None, - verbosity_group_analysis: Optional[int] = None, + verbosity: Verbosity = Verbosity.LOW, + verbosity_overview: Optional[Verbosity] = None, + verbosity_univariate_analysis: Optional[Verbosity] = None, + verbosity_bivariate_analysis: Optional[Verbosity] = None, + verbosity_multivariate_analysis: Optional[Verbosity] = None, + verbosity_group_analysis: Optional[Verbosity] = None, columns_overview: Optional[List[str]] = None, columns_univariate_analysis: Optional[List[str]] = None, columns_bivariate_analysis: Optional[List[str]] = None, @@ -794,7 +734,11 @@ class TimeseriesReport(ReportBase): If the input dataframe is not indexed by time. """ - def __init__(self, dataframe: pd.DataFrame, verbosity: int = 0): + def __init__( + self, + dataframe: pd.DataFrame, + verbosity: Verbosity = Verbosity.LOW, + ): super().__init__(dataframe, verbosity) if not is_date(dataframe.index): raise ValueError( @@ -813,17 +757,17 @@ def add_timeseries_analysis( use_columns: Optional[List[str]] = None, omit_columns: Optional[List[str]] = None, subsections: Optional[List[TimeseriesAnalysis.TimeseriesAnalysisSubsection]] = None, - verbosity: Optional[int] = None, - verbosity_time_analysis_plot: Optional[int] = None, - verbosity_rolling_statistics: Optional[int] = None, - verbosity_boxplots_over_time: Optional[int] = None, - verbosity_seasonal_decomposition: Optional[int] = None, - verbosity_autocorrelation: Optional[int] = None, - verbosity_stationarity_tests: Optional[int] = None, - verbosity_fourier_transform: Optional[int] = None, - verbosity_short_time_ft: Optional[int] = None, - sampling_rate: Optional[int] = None, - stft_window_size: Optional[int] = None, + verbosity: Optional[Verbosity] = None, + verbosity_time_analysis_plot: Optional[Verbosity] = None, + verbosity_rolling_statistics: Optional[Verbosity] = None, + verbosity_boxplots_over_time: Optional[Verbosity] = None, + verbosity_seasonal_decomposition: Optional[Verbosity] = None, + verbosity_autocorrelation: Optional[Verbosity] = None, + verbosity_stationarity_tests: Optional[Verbosity] = None, + verbosity_fourier_transform: Optional[Verbosity] = None, + verbosity_short_time_ft: Optional[Verbosity] = None, + sampling_rate: Optional[Verbosity] = None, + stft_window_size: Optional[Verbosity] = None, ) -> "TimeseriesReport": """Add timeseries analysis section to the report. @@ -838,10 +782,8 @@ def add_timeseries_analysis( subsections : List[TimeseriesAnalysis.TimeseriesAnalysisSubsection], optional List of sub-sections to include into the BivariateAnalysis section. If None, all subsections are added. - verbosity : int, optional - The verbosity of the code generated in the exported notebook, - must be one of [0, 1, 2]. - + verbosity : Verbosity, optional + The verbosity of the code generated in the exported notebook. 0 A single function call generates the entire bivariate analysis section. 1 @@ -851,26 +793,26 @@ def add_timeseries_analysis( Similar to 1, but in addition, function definitions are generated, column data type inference and default statistics become customizable. - verbosity_time_analysis_plot : int, optional + verbosity_time_analysis_plot : Verbosity, optional Time analysis interactive plot subsection code verbosity. - verbosity_rolling_statistics : int, optional + verbosity_rolling_statistics : Verbosity, optional Rolling statistics interactive plot subsection code verbosity. - verbosity_boxplots_over_time : int, optional + verbosity_boxplots_over_time : Verbosity, optional Boxplots grouped over time intervals plot subsection code verbosity. - verbosity_seasonal_decomposition : int, optional + verbosity_seasonal_decomposition : Verbosity, optional Decomposition into trend, seasonal and residual components code verbosity. - verbosity_autocorrelation : int, optional + verbosity_autocorrelation : Verbosity, optional Autocorrelation and partial autocorrelation vs. lag code verbosity. - verbosity_stationarity_tests : int, optional + verbosity_stationarity_tests : Verbosity, optional Stationarity tests code verbosity. - verbosity_fourier_transform: int, optional + verbosity_fourier_transform: Verbosity, optional Fourier transform and short-time Fourier transform code verbosity. - verbosity_short_time_ft: int, optional + verbosity_short_time_ft: Verbosity, optional Short-time Fourier transform transform spectrogram code verbosity. - sampling_rate: int, optional + sampling_rate: Verbosity, optional Sampling rate for Fourier transform and Short-time Fourier transform subsections. Needs to be set in order for these two subs to be included. - stft_window_size : int, optional + stft_window_size : Verbosity, optional Window size for Short-time Fourier transform. Needs to be set in order for the STFT subsection to be included. """ @@ -909,14 +851,13 @@ class DefaultTimeseriesReport(TimeseriesReport): Data from which to generate the report. Data needs to be indexed by time: pd.DateTimeIndex or pd.PeriodIndex. The data is assumed to be sorted according to the time index in ascending order. - verbosity : int (default = 0) - The default verbosity for the exported code of the entire report, has to be one of - [0, 1, 2], by default 0. - verbosity_overview : int, optional + verbosity : Verbosity (default = Verbosity.LOW) + The default verbosity for the exported code of the entire report. + verbosity_overview : Verbosity, optional Verbosity of the overview section - verbosity_univariate_analysis : int, optional + verbosity_univariate_analysis : Verbosity, optional Verbosity of the univariate analysis section - verbosity_timeseries_analysis : int, optional + verbosity_timeseries_analysis : Verbosity, optional Verbosity of the timeseries analysis section columns_overview : List[str], optional Subset of columns to use in overview section @@ -936,15 +877,15 @@ class DefaultTimeseriesReport(TimeseriesReport): def __init__( self, dataframe: pd.DataFrame, - verbosity: int = 0, - verbosity_overview: Optional[int] = None, - verbosity_univariate_analysis: Optional[int] = None, - verbosity_timeseries_analysis: Optional[int] = None, + verbosity: Verbosity = Verbosity.LOW, + verbosity_overview: Optional[Verbosity] = None, + verbosity_univariate_analysis: Optional[Verbosity] = None, + verbosity_timeseries_analysis: Optional[Verbosity] = None, columns_overview: Optional[List[str]] = None, columns_univariate_analysis: Optional[List[str]] = None, columns_timeseries_analysis: Optional[List[str]] = None, - sampling_rate: Optional[int] = None, - stft_window_size: Optional[int] = None, + sampling_rate: Optional[Verbosity] = None, + stft_window_size: Optional[Verbosity] = None, ): super().__init__(dataframe, verbosity) diff --git a/edvart/report_sections/bivariate_analysis.py b/edvart/report_sections/bivariate_analysis.py index 894191f..4959f88 100644 --- a/edvart/report_sections/bivariate_analysis.py +++ b/edvart/report_sections/bivariate_analysis.py @@ -13,7 +13,7 @@ from edvart import utils from edvart.data_types import is_boolean, is_categorical, is_numeric from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import ReportSection, Section +from edvart.report_sections.section_base import ReportSection, Section, Verbosity class BivariateAnalysis(ReportSection): @@ -26,16 +26,8 @@ class BivariateAnalysis(ReportSection): subsections : List[BivariateAnalysisSubsection], optional List of subsections to include. All subsection in BivariateAnalysisSubsection are included by default. - verbosity : int (default = 0) - Generated code verbosity global to the Bivariate analysis sections, must be on of [0, 1, 2]. - - 0 - A single cell which generates the bivariate analysis section is exported. - 1 - Parameterizable function calls for each subsection are exported. - 2 - Similar to 1, but in addition function definitions are also exported. - + verbosity : Verbosity (default = Verbosity.LOW) + Generated code verbosity global to the Bivariate analysis sections. If subsection verbosities are None, then they will be overridden by this parameter. columns : List[str], optional Columns on which to do bivariate analysis. @@ -60,11 +52,11 @@ class BivariateAnalysis(ReportSection): `columns`, `columns_x`, `columns_y` is specified. In that case, the first elements of each pair are treated as `columns_x` and the second elements as `columns_y` in pairplots and correlations. - verbosity_correlations : int, optional + verbosity_correlations : Verbosity, optional Correlation plots subsection code verbosity. - verbosity_pairplot: int, optional + verbosity_pairplot: Verbosity, optional Pairplot subsection code verbosity. - verbosity_contingency_table: int, optional + verbosity_contingency_table: Verbosity, optional Contingency table subsection code verbosity. color_col : str, optional Name of column according to use for coloring of the bivariate analysis subsections. @@ -90,14 +82,14 @@ def __str__(self): def __init__( self, subsections: Optional[List[BivariateAnalysisSubsection]] = None, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, columns_x: Optional[List[str]] = None, columns_y: Optional[List[str]] = None, columns_pairs: Optional[List[Tuple[str, str]]] = None, - verbosity_correlations: Optional[int] = None, - verbosity_pairplot: Optional[int] = None, - verbosity_contingency_table: Optional[int] = None, + verbosity_correlations: Optional[Verbosity] = None, + verbosity_pairplot: Optional[Verbosity] = None, + verbosity_contingency_table: Optional[Verbosity] = None, color_col: Optional[str] = None, ): verbosity_correlations = ( @@ -214,7 +206,7 @@ def bivariate_analysis( """ bivariate_analysis = BivariateAnalysis( subsections=subsections, - verbosity=0, + verbosity=Verbosity.LOW, columns=columns, columns_x=columns_x, columns_y=columns_y, @@ -237,7 +229,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: """ section_header = nbfv4.new_markdown_cell(self.get_title(section_level=1)) cells.append(section_header) - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: code = "bivariate_analysis(df=df" if self.subsections_0 is not None: arg_subsections_names = [ @@ -258,7 +250,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: code += ")" cells.append(nbfv4.new_code_cell(code)) for sub in self.subsections: - if sub.verbosity > 0: + if sub.verbosity > Verbosity.LOW: sub.add_cells(cells) else: super().add_cells(cells) @@ -272,7 +264,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np'] """ - if self.verbosity != 0: + if self.verbosity != Verbosity.LOW: return super().required_imports() imports = { @@ -280,7 +272,7 @@ def required_imports(self) -> List[str]: "bivariate_analysis = BivariateAnalysis.bivariate_analysis" } for subsec in self.subsections: - if subsec.verbosity > 0: + if subsec.verbosity > Verbosity.LOW: imports.update(subsec.required_imports()) return list(imports) @@ -302,7 +294,7 @@ class CorrelationPlot(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional Columns on which to plot pair-wise correlation plot. @@ -326,7 +318,7 @@ class CorrelationPlot(Section): def __init__( self, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, columns_x: Optional[List[str]] = None, columns_y: Optional[List[str]] = None, @@ -507,7 +499,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -546,7 +538,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = ( @@ -582,7 +574,7 @@ class PairPlot(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional Columns on which to plot the pairplot. @@ -608,7 +600,7 @@ class PairPlot(Section): def __init__( self, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, columns_x: Optional[List[str]] = None, columns_y: Optional[List[str]] = None, @@ -689,7 +681,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -725,7 +717,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", color_col='{self.color_col}'" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = get_code(PairPlot.plot_pairplot) + "\n\n" + default_call @@ -755,7 +747,7 @@ class ContingencyTable(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional Columns on which to show contingency tables. @@ -782,7 +774,7 @@ class ContingencyTable(Section): def __init__( self, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, columns_x: Optional[List[str]] = None, columns_y: Optional[List[str]] = None, @@ -946,7 +938,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -983,7 +975,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", columns={self.columns}" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = ( diff --git a/edvart/report_sections/dataset_overview.py b/edvart/report_sections/dataset_overview.py index 7166ecc..11140b7 100644 --- a/edvart/report_sections/dataset_overview.py +++ b/edvart/report_sections/dataset_overview.py @@ -16,7 +16,7 @@ ) from edvart.pandas_formatting import hide_index, render_dictionary, series_to_frame from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import ReportSection, Section +from edvart.report_sections.section_base import ReportSection, Section, Verbosity class Overview(ReportSection): @@ -29,32 +29,24 @@ class Overview(ReportSection): subsections : List[OverviewSubsection], optional List of subsections to inlcude into the Overview section. All subsections in OverviewSubsection are used by default. - verbosity : int - Generated code verbosity global to the Overview sections, must be one of [0, 1, 2]. - - 0 - A single cell which generates the overview section is exported. - 1 - Parameterizable function calls for each subsection of the overview section are exported. - 2 - Similar to 1, but in addition function definitions are also exported. - + verbosity : Verbosity + Generated code verbosity global to the Overview sections If subsection verbosities are None, then they will be overridden by this parameter. columns : List[str], optional Columns on which to do overview analysis. All columns are used by default. - verbosity_quick_info : int, optional + verbosity_quick_info : Verbosity, optional Quick info subsection code verbosity. - verbosity_data_types : int, optional + verbosity_data_types : Verbosity, optional Data types subsection code verbosity. - verbosity_data_preview : int, optional + verbosity_data_preview : Verbosity, optional Data preview subsection code verbosity. - verbosity_missing_values : int, optional + verbosity_missing_values : Verbosity, optional Missing values subsection code verbosity. - verbosity_rows_with_missing_value : int, optional + verbosity_rows_with_missing_value : Verbosity, optional Rows with missing value subsection code verbosity. - verbosity_constant_occurence : int, optional + verbosity_constant_occurence : Verbosity, optional Constant values subsection code verbosity. - verbosity_duplicate_rows : int, optional + verbosity_duplicate_rows : Verbosity, optional Duplicate rows subsection code verbosity. """ @@ -78,15 +70,15 @@ def __str__(self): def __init__( self, subsections: Optional[List[OverviewSubsection]] = None, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, - verbosity_quick_info: Optional[int] = None, - verbosity_data_types: Optional[int] = None, - verbosity_data_preview: Optional[int] = None, - verbosity_missing_values: Optional[int] = None, - verbosity_rows_with_missing_value: Optional[int] = None, - verbosity_constant_occurence: Optional[int] = None, - verbosity_duplicate_rows: Optional[int] = None, + verbosity_quick_info: Optional[Verbosity] = None, + verbosity_data_types: Optional[Verbosity] = None, + verbosity_data_preview: Optional[Verbosity] = None, + verbosity_missing_values: Optional[Verbosity] = None, + verbosity_rows_with_missing_value: Optional[Verbosity] = None, + verbosity_constant_occurence: Optional[Verbosity] = None, + verbosity_duplicate_rows: Optional[Verbosity] = None, ): # Propagate global verbosity to subsection verbosities verbosity_quick_info = ( @@ -180,7 +172,7 @@ def overview_analysis( """ if columns is not None: df = df[columns] - overview = Overview(subsections=subsections, verbosity=0, columns=columns) + overview = Overview(subsections=subsections, verbosity=Verbosity.LOW, columns=columns) for sub in overview.subsections: sub.show(df) @@ -193,13 +185,13 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np'] """ - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: imports = { "from edvart.report_sections.dataset_overview import Overview\n" "overview_analysis = Overview.overview_analysis" } for subsec in self.subsections: - if subsec.verbosity > 0: + if subsec.verbosity > Verbosity.LOW: imports.update(subsec.required_imports()) return list(imports) @@ -218,7 +210,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: section_header = nbfv4.new_markdown_cell(self.get_title(section_level=1)) cells.append(section_header) - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: code = "overview_analysis(df=df" if self.subsections_0 is not None: arg_subsections_names = [ @@ -230,7 +222,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: code += ")" cells.append(nbfv4.new_code_cell(code)) for subsec in self.subsections: - if subsec.verbosity > 0: + if subsec.verbosity > Verbosity.LOW: subsec.add_cells(cells) else: super().add_cells(cells) @@ -252,7 +244,7 @@ class QuickInfo(Section): Parameters ---------- - verbosity : int + verbosity : Verbosity Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns to consider in quick info. @@ -313,7 +305,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -340,9 +332,9 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: else: default_call = f"quick_info(df=df, columns={self.columns})" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call - elif self.verbosity == 2: + elif self.verbosity == Verbosity.HIGH: code = ( get_code(render_dictionary) + 2 * "\n" @@ -370,7 +362,7 @@ class DataTypes(Section): Parameters ---------- - verbosity : int + verbosity : Verbosity Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns for which to infer data type. @@ -418,7 +410,7 @@ def required_imports(self) -> List[str]: e.g. ['import pandas as pd', 'import numpy as np']. """ base_imports = ["from edvart.pandas_formatting import hide_index"] - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return base_imports + [ total_dedent( """ @@ -449,9 +441,9 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: else: default_call = f"data_types(df=df, columns={self.columns})" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call - elif self.verbosity == 2: + elif self.verbosity == Verbosity.HIGH: code = ( get_code(series_to_frame) + 2 * "\n" @@ -491,7 +483,7 @@ class DataPreview(Section): Parameters ---------- - verbosity : int + verbosity : Verbosity Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns to preview. @@ -546,7 +538,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -576,9 +568,9 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: else: default_call = f"data_preview(df=df, columns={self.columns})" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call - elif self.verbosity == 2: + elif self.verbosity == Verbosity.HIGH: code = get_code(DataPreview.data_preview) + 2 * "\n" + default_call cells.append(nbfv4.new_code_cell(code)) @@ -600,7 +592,7 @@ class MissingValues(Section): Parameters ---------- - verbosity : int + verbosity : Verbosity Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns for which to count missing values. If None, all columns are used. @@ -700,7 +692,7 @@ def required_imports(self) -> List[str]: e.g. ['import pandas as pd', 'import numpy as np'] """ base_imports = ["from edvart.pandas_formatting import hide_index"] - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return base_imports + [ total_dedent( """ @@ -730,9 +722,9 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: else: default_call = f"missing_values(df=df, columns={self.columns})" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call - elif self.verbosity == 2: + elif self.verbosity == Verbosity.HIGH: code = ( get_code(series_to_frame) + 2 * "\n" @@ -760,7 +752,7 @@ class ConstantOccurence(Section): Parameters ---------- - verbosity : int + verbosity : Verbosity Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns to count constant occurence in. If None, all columns are used. @@ -830,7 +822,7 @@ def required_imports(self) -> List[str]: e.g. ['import pandas as pd', 'import numpy as np'] """ base_imports = ["from edvart.pandas_formatting import hide_index"] - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return base_imports + [ total_dedent( """ @@ -857,9 +849,9 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: else: default_call = f"constant_occurence(df=df, columns={self.columns})" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call - elif self.verbosity == 2: + elif self.verbosity == Verbosity.HIGH: code = ( get_code(series_to_frame) + 2 * "\n" @@ -887,7 +879,7 @@ class RowsWithMissingValue(Section): Parameters ---------- - verbosity : int + verbosity : Verbosity Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns to consider when counting. If None, all columns are used. @@ -939,7 +931,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np'] """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -966,9 +958,9 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: else: default_call = f"missing_value_row_count(df=df, columns={self.columns})" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call - elif self.verbosity == 2: + elif self.verbosity == Verbosity.HIGH: code = ( get_code(render_dictionary) + 2 * "\n" @@ -996,7 +988,7 @@ class DuplicateRows(Section): Parameters ---------- - verbosity : int + verbosity : Verbosity Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns to consider when counting. If None, all columns are used. @@ -1048,7 +1040,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np'] """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -1075,9 +1067,9 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: else: default_call = f"duplicate_row_count(df=df, columns={self.columns})" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call - elif self.verbosity == 2: + elif self.verbosity == Verbosity.HIGH: code = ( get_code(render_dictionary) + 2 * "\n" diff --git a/edvart/report_sections/group_analysis.py b/edvart/report_sections/group_analysis.py index cbc14a5..7d7a9bb 100644 --- a/edvart/report_sections/group_analysis.py +++ b/edvart/report_sections/group_analysis.py @@ -16,7 +16,7 @@ from edvart import utils from edvart.data_types import DataType, infer_data_type from edvart.report_sections.code_string_formatting import code_dedent, get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class GroupAnalysis(Section): @@ -28,16 +28,8 @@ class GroupAnalysis(Section): Data for which to perform analysis. groupby : Union[str, List[str]] Name of column or list of columns names to group by. - verbosity : int (default = 0) - Generated code verbosity global to the Group analysis sections, must be on of [0, 1, 2]. - - 0 - A single cell which generates the group analysis section is exported. - 1 - Parameterizable function calls for each subsection are exported. - 2 - Similar to 1, but in addition function definitions are also exported. - + verbosity : Verbosity (default = Verbosity.LOW) + Generated code verbosity global to the Group analysis sections. If subsection verbosities are None, then they will be overridden by this parameter. columns : List[str], optional Columns on which to do group analysis. All columns are used by default. @@ -58,7 +50,7 @@ def __init__( self, df: pd.DataFrame, groupby: Union[str, List[str]], - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, show_within_group_statistics: bool = True, show_group_missing_values: bool = True, @@ -553,12 +545,12 @@ def required_imports(self) -> List[str]: e.g. ["import pandas as pd", "import numpy as np"] """ ga = "GroupAnalysis" # pylint:disable=invalid-name - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: return [ f"from edvart.report_sections.group_analysis import {ga}\n" f"group_analysis = {ga}.group_analysis" ] - if self.verbosity == 1: + if self.verbosity == Verbosity.MEDIUM: return [ total_dedent( f""" @@ -574,7 +566,7 @@ def required_imports(self) -> List[str]: """ ) ] - # verbosity 2 + # verbosity HIGH return [ "import colorlover as cl", "import matplotlib.pyplot as plt", @@ -621,7 +613,7 @@ def _add_function_defs(self, cells: List[Dict[str, Any]]): cells.append(nbfv4.new_code_cell(code)) def _add_cells_numeric_col(self, cells: List[Dict[str, Any]], column_name: str): - """Add code cells for a numeric column at verbosity 1 or 2. + """Add code cells for a numeric column at verbosity MEDIUM or HIGH. Parameters ---------- @@ -632,7 +624,7 @@ def _add_cells_numeric_col(self, cells: List[Dict[str, Any]], column_name: str): """ code = "" if self.show_statistics: - if self.verbosity == 1: + if self.verbosity == Verbosity.MEDIUM: code += ( f"within_group_stats(df=df, groupby={self.groupby}, column='{column_name}')\n" ) @@ -672,7 +664,7 @@ def add_cells(self, cells: List[Dict[str, Any]]): section_header = nbfv4.new_markdown_cell(self.get_title(section_level=1)) cells.append(section_header) - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: if self.columns is None: code = f"group_analysis(df=df, groupby={self.groupby})" else: @@ -681,7 +673,7 @@ def add_cells(self, cells: List[Dict[str, Any]]): cells.append(code_cell) return - if self.verbosity == 2: + if self.verbosity == Verbosity.HIGH: self._add_function_defs(cells) if self.show_missing_vals: diff --git a/edvart/report_sections/multivariate_analysis.py b/edvart/report_sections/multivariate_analysis.py index 63e458f..7c20bcd 100644 --- a/edvart/report_sections/multivariate_analysis.py +++ b/edvart/report_sections/multivariate_analysis.py @@ -15,7 +15,7 @@ from edvart.data_types import is_numeric from edvart.plots import scatter_plot_2d from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import ReportSection, Section +from edvart.report_sections.section_base import ReportSection, Section, Verbosity from edvart.utils import discrete_colorscale, is_categorical try: @@ -38,27 +38,19 @@ class MultivariateAnalysis(ReportSection): subsections : List[MultivariateAnalysisSubsection], optional List of subsections to include. All subsection in MultivariateAnalysisSubsection are included by default. - verbosity : int - Generated code verbosity global to the Multivariate sections, must be one of [0, 1, 2]. - - 0 - A single cell which generates the multivariate section is exported. - 1 - Parameterizable function calls for each subsection are exported. - 2 - Similar to 1, but in addition function definitions are also exported. - + verbosity : Verbosity + Generated code verbosity global to the Multivariate sections. If subsection verbosities are None, then they will be overridden by this parameter. columns : List[str], optional Columns on which to do multivariate analysis. All columns of df will be used by default. - verbosity_pca : int, optional + verbosity_pca : Verbosity, optional Principal component analysis subsection code verbosity. - verbosity_umap : int, optional + verbosity_umap : Verbosity, optional UMAP subsection code verbosity. - verbosity_parallel_coordinates : int, optional + verbosity_parallel_coordinates : Verbosity, optional Parallel coordinates subsection code verbosity. - verbosity_parallel_categories : int, optional + verbosity_parallel_categories : Verbosity, optional Parallel categories subsection code verbosity. color_col : str, optional Name of the column according to which to color points in the sections. @@ -82,12 +74,12 @@ def __init__( self, df: pd.DataFrame, subsections: Optional[List[MultivariateAnalysisSubsection]] = None, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, - verbosity_pca: Optional[int] = None, - verbosity_umap: Optional[int] = None, - verbosity_parallel_coordinates: Optional[int] = None, - verbosity_parallel_categories: Optional[int] = None, + verbosity_pca: Optional[Verbosity] = None, + verbosity_umap: Optional[Verbosity] = None, + verbosity_parallel_coordinates: Optional[Verbosity] = None, + verbosity_parallel_categories: Optional[Verbosity] = None, color_col: Optional[str] = None, ): verbosity_pca = verbosity_pca if verbosity_pca is not None else verbosity @@ -173,7 +165,11 @@ def multivariate_analysis( df = df[columns] multivariate_analysis = MultivariateAnalysis( - df=df, subsections=subsections, verbosity=0, columns=columns, color_col=color_col + df=df, + subsections=subsections, + verbosity=Verbosity.LOW, + columns=columns, + color_col=color_col, ) for sub in multivariate_analysis.subsections: @@ -188,13 +184,13 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np'] """ - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: imports = { "from edvart.report_sections.multivariate_analysis import MultivariateAnalysis\n" "multivariate_analysis = MultivariateAnalysis.multivariate_analysis" } for subsec in self.subsections: - if subsec.verbosity > 0: + if subsec.verbosity > Verbosity.LOW: imports.update(subsec.required_imports()) return list(imports) @@ -212,7 +208,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: """ section_header = nbfv4.new_markdown_cell(self.get_title(section_level=1)) cells.append(section_header) - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: code = "multivariate_analysis(df=df" if self.subsections_0 is not None: arg_subsections_names = [ @@ -227,7 +223,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: code += ")" cells.append(nbfv4.new_code_cell(code)) for sub in self.subsections: - if sub.verbosity > 0: + if sub.verbosity > Verbosity.LOW: sub.add_cells(cells) else: super().add_cells(cells) @@ -251,7 +247,7 @@ class PCA(Section): ---------- df : pd.DataFrame Data on which to perform PCA. - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional Columns on which to perform PCA. Only numeric columns can be used. @@ -269,7 +265,7 @@ class PCA(Section): def __init__( self, df: pd.DataFrame, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, color_col: Optional[str] = None, standardize: bool = True, @@ -416,7 +412,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -466,7 +462,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: first_vs_second_call += ")" explained_variance_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: cells.append(nbfv4.new_code_cell(first_vs_second_call)) cells.append(explained_variance_header) cells.append(nbfv4.new_code_cell(explained_variance_call)) @@ -508,7 +504,7 @@ class ParallelCoordinates(Section): ---------- df : pd.DataFrame Data for which to generate the parallel coordinates plot. - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional Columns for which to generate parallel coordinates. All columns which are either numeric or @@ -524,7 +520,7 @@ class ParallelCoordinates(Section): def __init__( self, df: pd.DataFrame, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, nunique_max: int = 20, color_col: Optional[str] = None, @@ -646,7 +642,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -680,7 +676,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", color_col='{self.color_col}'" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = ( @@ -715,7 +711,7 @@ class ParallelCategories(Section): ---------- df : pd.DataFrame Data for which to generate the parallel coordinates plot. - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional Columns for which to generate parallel coordinates. @@ -731,7 +727,7 @@ class ParallelCategories(Section): def __init__( self, df: pd.DataFrame, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, nunique_max: int = 20, color_col: Optional[str] = None, @@ -834,7 +830,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -868,7 +864,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", color_col='{self.color_col}'" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = ( diff --git a/edvart/report_sections/section_base.py b/edvart/report_sections/section_base.py index f507f52..eebe80d 100644 --- a/edvart/report_sections/section_base.py +++ b/edvart/report_sections/section_base.py @@ -1,18 +1,36 @@ import uuid from abc import ABC, abstractmethod +from enum import IntEnum from typing import Any, Dict, List, Optional import pandas as pd +class Verbosity(IntEnum): + """ + Verbosity of the exported code. + 0 + A single function call generates the entire bivariate analysis section. + 1 + Function calls to parameterizable functions are generated for each column separately + in separate cells. + 2 + Similar to 1, but in addition, function definitions are generated, column + data type inference and default statistics become customizable. + """ + + LOW: int = 0 + MEDIUM: int = 1 + HIGH: int = 2 + + class Section(ABC): """Base class for report sections and subsections. Parameters ----------- - verbosity : int + verbosity : Verbosity The verbosity of the code generated in the exported notebook. - Must be one of [0, 1, 2]. columns : List[str], optional List of columns that are considered in the analysis of the section. All columns are used by default. @@ -23,11 +41,7 @@ class Section(ABC): * `__init__` initializes your object and accepts `verbosity` and `columns` (in addition to any other section specific parameters). - - `verbosity` is an integer representing the detail level of the exported code. - The value can be either `0`, `1`, or `2`. - * `0` exports a single function call that generates the entire section - * `1` exports a function call for each of the subsection the subsection - * `2` exports the full code of the analysis + - `verbosity` is an enum representing the detail level of the exported code. - `columns` is a list of names of columns which will be used in the analysis. * `required_imports` returns a list of lines of code that import the packages required by the analysis which will get added to a cell at the top of the exported notebook. @@ -41,10 +55,8 @@ class Section(ABC): * `show` renders the analysis in place in the calling notebook. """ - def __init__(self, verbosity: int = 0, columns: Optional[List[str]] = None): - if verbosity not in [0, 1, 2]: - raise ValueError(f"Verbosity must be one of [0, 1, 2], not {verbosity}") - self.verbosity = verbosity + def __init__(self, verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None): + self.verbosity = Verbosity(verbosity) self.columns = columns self._section_id: str = str(uuid.uuid4()) @@ -138,9 +150,8 @@ class ReportSection(Section): ---------- subsections : List[Section] List of subsections that should be contained in this top level section - verbosity : int - The verbosity of the code generated in the exported notebook, - must be one of [0, 1, 2]. + verbosity : Verbosity + The verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns that are considered in the analysis of the section, all columns are used by default @@ -149,7 +160,7 @@ class ReportSection(Section): def __init__( self, subsections: List[Section], - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, ): super().__init__(verbosity, columns) diff --git a/edvart/report_sections/table_of_contents.py b/edvart/report_sections/table_of_contents.py index 69b70dc..cf8aeb5 100644 --- a/edvart/report_sections/table_of_contents.py +++ b/edvart/report_sections/table_of_contents.py @@ -4,7 +4,7 @@ import nbformat.v4 as nbfv4 from IPython.display import Markdown, display -from edvart.report_sections.section_base import ReportSection, Section +from edvart.report_sections.section_base import ReportSection, Section, Verbosity class TableOfContents(Section): @@ -20,7 +20,7 @@ class TableOfContents(Section): def __init__(self, include_subsections: bool): self._include_subsections = include_subsections - super().__init__(verbosity=0, columns=None) + super().__init__(verbosity=Verbosity.LOW, columns=None) def required_imports(self) -> List[str]: return [] diff --git a/edvart/report_sections/timeseries_analysis/autocorrelation.py b/edvart/report_sections/timeseries_analysis/autocorrelation.py index 506060f..dac6d01 100644 --- a/edvart/report_sections/timeseries_analysis/autocorrelation.py +++ b/edvart/report_sections/timeseries_analysis/autocorrelation.py @@ -12,7 +12,7 @@ from edvart.data_types import is_numeric from edvart.decorators import check_index_time_ascending from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class Autocorrelation(Section): @@ -20,7 +20,7 @@ class Autocorrelation(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the generated code in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -154,7 +154,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: return [ total_dedent( """ @@ -163,7 +163,7 @@ def required_imports(self) -> List[str]: """ ) ] - if self.verbosity == 1: + if self.verbosity == Verbosity.MEDIUM: return [ total_dedent( """ @@ -173,7 +173,7 @@ def required_imports(self) -> List[str]: """ ) ] - # verbosity 2 + # Verbosity.HIGH return [ "import functools", "import matplotlib.pyplot as plt", @@ -192,7 +192,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: cells : List[Dict[str, Any]] List of generated notebook cells which are represented as dictionaries. """ - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: section_header = nbfv4.new_markdown_cell(self.get_title(section_level=2)) default_call = "autocorrelation(df=df" if self.columns is not None: @@ -214,7 +214,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call_pacf += f", columns={self.columns}" default_call_pacf += ")" - if self.verbosity == 1: + if self.verbosity == Verbosity.MEDIUM: code_acf = default_call_acf code_pacf = default_call_pacf else: diff --git a/edvart/report_sections/timeseries_analysis/boxplots_over_time.py b/edvart/report_sections/timeseries_analysis/boxplots_over_time.py index 3d74c0a..7638227 100644 --- a/edvart/report_sections/timeseries_analysis/boxplots_over_time.py +++ b/edvart/report_sections/timeseries_analysis/boxplots_over_time.py @@ -13,7 +13,7 @@ from edvart.data_types import is_numeric from edvart.decorators import check_index_time_ascending from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class BoxplotsOverTime(Section): @@ -24,7 +24,7 @@ class BoxplotsOverTime(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the generated code in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -46,7 +46,7 @@ class BoxplotsOverTime(Section): def __init__( self, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, grouping_function: Callable[[Any], str] = None, grouping_name: Optional[str] = None, @@ -196,7 +196,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -248,7 +248,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", grouping_name='{self.grouping_name}'" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: if self.grouping_function is None: diff --git a/edvart/report_sections/timeseries_analysis/fourier_transform.py b/edvart/report_sections/timeseries_analysis/fourier_transform.py index 9dad882..6d80704 100644 --- a/edvart/report_sections/timeseries_analysis/fourier_transform.py +++ b/edvart/report_sections/timeseries_analysis/fourier_transform.py @@ -11,7 +11,7 @@ from edvart.data_types import is_numeric # noqa:I100 from edvart.decorators import check_index_time_ascending from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class FourierTransform(Section): @@ -22,7 +22,7 @@ class FourierTransform(Section): sampling_rate : int The time series will be considered as samples from a lower-frequency at this rate, i.e. frequencies in multiples of (1 / sampling rate) will be analyzed. - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the generated code in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -32,7 +32,7 @@ class FourierTransform(Section): def __init__( self, sampling_rate: int, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, ): if sampling_rate <= 0: @@ -121,7 +121,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -153,7 +153,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", columns={self.columns}" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = get_code(FourierTransform.fourier_transform) + "\n\n" + default_call diff --git a/edvart/report_sections/timeseries_analysis/rolling_statistics.py b/edvart/report_sections/timeseries_analysis/rolling_statistics.py index b4e7bc6..367d3ea 100644 --- a/edvart/report_sections/timeseries_analysis/rolling_statistics.py +++ b/edvart/report_sections/timeseries_analysis/rolling_statistics.py @@ -11,7 +11,7 @@ from edvart.data_types import is_numeric from edvart.decorators import check_index_time_ascending from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class RollingStatistics(Section): @@ -19,7 +19,7 @@ class RollingStatistics(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the generated code in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -30,7 +30,7 @@ class RollingStatistics(Section): def __init__( self, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, window_size: int = 20, ): @@ -171,7 +171,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -206,7 +206,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", columns={self.columns}" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = get_code(RollingStatistics.rolling_statistics) + "\n\n" + default_call diff --git a/edvart/report_sections/timeseries_analysis/seasonal_decomposition.py b/edvart/report_sections/timeseries_analysis/seasonal_decomposition.py index 3a38424..c35de55 100644 --- a/edvart/report_sections/timeseries_analysis/seasonal_decomposition.py +++ b/edvart/report_sections/timeseries_analysis/seasonal_decomposition.py @@ -12,7 +12,7 @@ from edvart.data_types import is_numeric from edvart.decorators import check_index_time_ascending from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class SeasonalDecomposition(Section): @@ -25,7 +25,7 @@ class SeasonalDecomposition(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the generated code in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -42,7 +42,7 @@ class SeasonalDecomposition(Section): def __init__( self, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, period: Optional[int] = None, model: str = "additive", @@ -120,7 +120,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -155,7 +155,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", period={self.period}" default_call += f", model='{self.model}')" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = get_code(SeasonalDecomposition.seasonal_decomposition) + "\n\n" + default_call diff --git a/edvart/report_sections/timeseries_analysis/short_time_ft.py b/edvart/report_sections/timeseries_analysis/short_time_ft.py index 505f945..e722bde 100644 --- a/edvart/report_sections/timeseries_analysis/short_time_ft.py +++ b/edvart/report_sections/timeseries_analysis/short_time_ft.py @@ -12,7 +12,7 @@ from edvart.data_types import is_numeric from edvart.decorators import check_index_time_ascending from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class ShortTimeFT(Section): @@ -25,7 +25,7 @@ class ShortTimeFT(Section): frequencies in multiples of (1 / sampling rate) will be analyzed. window_size : int Size of window to perform DFT on to obtain Short-time Fourier transform. - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the generated code in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -36,7 +36,7 @@ def __init__( self, sampling_rate: int, window_size: int, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, ): if sampling_rate <= 0: @@ -155,7 +155,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -191,7 +191,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", columns={self.columns}" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = get_code(ShortTimeFT.short_time_ft) + "\n\n" + default_call diff --git a/edvart/report_sections/timeseries_analysis/stationarity_tests.py b/edvart/report_sections/timeseries_analysis/stationarity_tests.py index 4cf9a55..8a7ab08 100644 --- a/edvart/report_sections/timeseries_analysis/stationarity_tests.py +++ b/edvart/report_sections/timeseries_analysis/stationarity_tests.py @@ -13,7 +13,7 @@ from edvart.decorators import check_index_time_ascending from edvart.pandas_formatting import format_number from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class StationarityTests(Section): @@ -21,7 +21,7 @@ class StationarityTests(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the generated code in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -128,7 +128,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -162,7 +162,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += f", columns={self.columns}" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = ( diff --git a/edvart/report_sections/timeseries_analysis/time_analysis_plot.py b/edvart/report_sections/timeseries_analysis/time_analysis_plot.py index 274a3cb..ea1bc9c 100644 --- a/edvart/report_sections/timeseries_analysis/time_analysis_plot.py +++ b/edvart/report_sections/timeseries_analysis/time_analysis_plot.py @@ -12,7 +12,7 @@ from edvart.data_types import is_numeric from edvart.decorators import check_index_time_ascending from edvart.report_sections.code_string_formatting import get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class TimeAnalysisPlot(Section): @@ -20,7 +20,7 @@ class TimeAnalysisPlot(Section): Parameters ---------- - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns to analyze. Only numeric column can be analyzed. @@ -36,7 +36,7 @@ class TimeAnalysisPlot(Section): def __init__( self, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, separate_plots: bool = False, color_col: Optional[str] = None, @@ -144,7 +144,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"]. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -184,7 +184,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += ", separate_plots=True" default_call += ")" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = ( diff --git a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py index a6d671b..dac793e 100644 --- a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py +++ b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py @@ -8,7 +8,7 @@ from IPython.display import Markdown, display from edvart.decorators import check_index_time_ascending -from edvart.report_sections.section_base import ReportSection +from edvart.report_sections.section_base import ReportSection, Verbosity from edvart.report_sections.timeseries_analysis import ( Autocorrelation, BoxplotsOverTime, @@ -33,35 +33,27 @@ class TimeseriesAnalysis(ReportSection): All subsection in TimeseriesAnalysisSubsection are included by default, except for FourierTransform, which is only included if `sampling_rate` is set and ShortTimeFT, which is only included if `sampling_rate` and `stft_window_size` are both set. - verbosity : int - Generated code verbosity global to the Overview sections, must be one of [0, 1, 2]. - - 0 - A single cell which generates the timeseries analysis section is exported. - 1 - Parameterizable function calls for each subsection are exported. - 2 - Similar to 1, but in addition function definitions are also exported. - + verbosity : Verbosity + Generated code verbosity global to the Overview sections. If subsection verbosities are None, then they will be overridden by this parameter. columns : List[str], optional Columns to include in timeseries analysis. Each column is treated as a separate time series. All columns are used by default. - verbosity_time_analysis_plot : int, optional + verbosity_time_analysis_plot : Verbosity, optional Time analysis interactive plot subsection code verbosity. - verbosity_rolling_statistics: int, optional + verbosity_rolling_statistics: Verbosity, optional Rolling statistics interactive plot subsection code verbosity. - verbosity_boxplots_over_time: int, optional + verbosity_boxplots_over_time: Verbosity, optional Boxplots grouped over time intervals subsection code verbosity. - verbosity_seasonal_decomposition: int, optional + verbosity_seasonal_decomposition: Verbosity, optional Seasonal decomposition subsection code verbosity. - verbosity_stationarity_tests: int, optional + verbosity_stationarity_tests: Verbosity, optional Stationarity tests subsection code verbosity. - verbosity_autocorrelation: int, optional + verbosity_autocorrelation: Verbosity, optional Autocorrelation and partial autocorrelation plot subsection code verbosity. - verbosity_fourier_transform: int, optional + verbosity_fourier_transform: Verbosity, optional Discrete Fourier transform plot subsection code verbosity. - verbosity_short_time_ft: int, optional + verbosity_short_time_ft: Verbosity, optional Short-time discrete Fourier transform plot subsection code verbosity. sampling_rate: int, optional Sampling rate of the time-series, i.e., how many samples form one period. For example, @@ -91,16 +83,16 @@ def __str__(self): def __init__( self, subsections: Optional[List[TimeseriesAnalysisSubsection]] = None, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, - verbosity_time_analysis_plot: Optional[int] = None, - verbosity_rolling_statistics: Optional[int] = None, - verbosity_boxplots_over_time: Optional[int] = None, - verbosity_seasonal_decomposition: Optional[int] = None, - verbosity_stationarity_tests: Optional[int] = None, - verbosity_autocorrelation: Optional[int] = None, - verbosity_fourier_transform: Optional[int] = None, - verbosity_short_time_ft: Optional[int] = None, + verbosity_time_analysis_plot: Optional[Verbosity] = None, + verbosity_rolling_statistics: Optional[Verbosity] = None, + verbosity_boxplots_over_time: Optional[Verbosity] = None, + verbosity_seasonal_decomposition: Optional[Verbosity] = None, + verbosity_stationarity_tests: Optional[Verbosity] = None, + verbosity_autocorrelation: Optional[Verbosity] = None, + verbosity_fourier_transform: Optional[Verbosity] = None, + verbosity_short_time_ft: Optional[Verbosity] = None, sampling_rate: Optional[int] = None, stft_window_size: Optional[int] = None, ): @@ -173,8 +165,8 @@ def __init__( else: subsections_all = subsections - # Store subsections with 0 verbosity - self.subsections_0 = [sub for sub in subsections_all if verbosities[sub] == 0] + # Store subsections with Verbosity.LOW + self.subsections_0 = [sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW] if len(self.subsections_0) == len(subsections_all) and subsections is None: self.subsections_0 = None @@ -240,7 +232,7 @@ def timeseries_analysis( timeseries_analysis = TimeseriesAnalysis( subsections=subsections, - verbosity=0, + verbosity=Verbosity.LOW, columns=columns, sampling_rate=sampling_rate, stft_window_size=stft_window_size, @@ -262,7 +254,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: section_header = nbfv4.new_markdown_cell(self.get_title(section_level=1)) cells.append(section_header) - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection code = "timeseries_analysis(df=df" @@ -290,7 +282,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: cells.append(nbfv4.new_code_cell(code)) for sub in self.subsections: - if sub.verbosity > 0: + if sub.verbosity > Verbosity.LOW: sub.add_cells(cells) else: super().add_cells(cells) @@ -304,13 +296,13 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ["import pandas as pd", "import numpy as np"] """ - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: imports = { "from edvart.report_sections.timeseries_analysis import TimeseriesAnalysis\n" "timeseries_analysis = TimeseriesAnalysis.timeseries_analysis" } for sub in self.subsections: - if sub.verbosity > 0: + if sub.verbosity > Verbosity.LOW: imports.update(sub.required_imports()) return list(imports) diff --git a/edvart/report_sections/umap.py b/edvart/report_sections/umap.py index 1d687af..ee6523b 100644 --- a/edvart/report_sections/umap.py +++ b/edvart/report_sections/umap.py @@ -8,7 +8,7 @@ from edvart.data_types import is_numeric from edvart.plots import scatter_plot_2d from edvart.report_sections.code_string_formatting import code_dedent, get_code, total_dedent -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity try: with warnings.catch_warnings(): @@ -27,7 +27,7 @@ class UMAP(Section): ---------- df : pd.DataFrame Data to analyze. - verbosity : int (default = 0) + verbosity : Verbosity (default = Verbosity.LOW) Verbosity of the code generated in the exported notebook. columns : List[str], optional Columns to use in computing in the UMAP embedding. Only numeric columns can be used. @@ -55,7 +55,7 @@ class UMAP(Section): def __init__( self, df: pd.DataFrame, - verbosity: int = 0, + verbosity: Verbosity = Verbosity.LOW, columns: Optional[List[str]] = None, color_col: Optional[str] = None, interactive: bool = True, @@ -181,7 +181,7 @@ def required_imports(self) -> List[str]: List of import strings to be added at the top of the generated notebook, e.g. ['import pandas as pd', 'import numpy as np']. """ - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: return [ total_dedent( """ @@ -227,7 +227,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: default_call += " interactive=False," default_call += "\n)" - if self.verbosity <= 1: + if self.verbosity <= Verbosity.MEDIUM: code = default_call else: code = get_code(UMAP.plot_umap) + "\n\n" + default_call diff --git a/edvart/report_sections/univariate_analysis.py b/edvart/report_sections/univariate_analysis.py index d3ccc6b..ab98771 100644 --- a/edvart/report_sections/univariate_analysis.py +++ b/edvart/report_sections/univariate_analysis.py @@ -12,7 +12,7 @@ from edvart.data_types import DataType, infer_data_type from edvart.pandas_formatting import add_html_heading, dict_to_html, format_number, subcells_html from edvart.report_sections.code_string_formatting import code_dedent, get_code -from edvart.report_sections.section_base import Section +from edvart.report_sections.section_base import Section, Verbosity class UnivariateAnalysis(Section): @@ -22,25 +22,19 @@ class UnivariateAnalysis(Section): ----------- df : pd.DataFrame Dataframe to be analyzed - verbosity : int - The verbosity of the code generated in the exported notebook, - must be one of [0, 1, 2]. - - 0 - A single function call generates the entire univariate analysis section. - 1 - Function calls to parameterizable functions are generated for each column separately - in separate cells. - 2 - Similar to 1, but in addition, function definitions are generated, column - data type inference and default statistics become customizable. - + verbosity : Verbosity + The verbosity of the code generated in the exported notebook. columns : List[str], optional List of columns for which to do univariate analysis, all columns are used by default """ - def __init__(self, df: pd.DataFrame, verbosity: int = 0, columns: Optional[List[str]] = None): + def __init__( + self, + df: pd.DataFrame, + verbosity: Verbosity = Verbosity.LOW, + columns: Optional[List[str]] = None, + ): self.df = df super().__init__(verbosity, columns) @@ -290,12 +284,12 @@ def required_imports(self) -> List[str]: List[str] List of import strings to be added at the top of the generated notebook. """ - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: return [ "from edvart.report_sections.univariate_analysis import UnivariateAnalysis\n" "univariate_analysis = UnivariateAnalysis.univariate_analysis" ] - if self.verbosity == 1: + if self.verbosity == Verbosity.MEDIUM: return [ "from edvart.report_sections.univariate_analysis import UnivariateAnalysis\n" "top_most_frequent = UnivariateAnalysis.top_most_frequent\n" @@ -304,7 +298,7 @@ def required_imports(self) -> List[str]: "histogram = UnivariateAnalysis.histogram", "from edvart import utils", ] - # verbosity 2 + # Verbosity.HIGH return [ "from edvart import utils", "from IPython.display import display", @@ -329,7 +323,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: section_header = nbfv4.new_markdown_cell(self.get_title(section_level=1)) cells.append(section_header) - if self.verbosity == 2: + if self.verbosity == Verbosity.HIGH: # Add cell with default stats dictionaries default_stats_dicts = nbfv4.new_code_cell( "# Default statistics dictionaries" @@ -366,7 +360,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: ) cells.append(frame_rendering) - if self.verbosity == 0: + if self.verbosity == Verbosity.LOW: if self.columns is None: code = "univariate_analysis(df=df)" else: @@ -389,7 +383,7 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: top_most_frequent(df['{col}']) bar_plot(df['{col}'])""" ) - elif self.verbosity == 1: + elif self.verbosity == Verbosity.MEDIUM: code = code_dedent( f""" numeric_statistics(df['{col}']) diff --git a/tests/test_bivariate_analysis.py b/tests/test_bivariate_analysis.py index b75bc03..cf1bdd8 100644 --- a/tests/test_bivariate_analysis.py +++ b/tests/test_bivariate_analysis.py @@ -7,6 +7,7 @@ from edvart.report_sections import bivariate_analysis from edvart.report_sections.bivariate_analysis import BivariateAnalysis from edvart.report_sections.code_string_formatting import get_code +from edvart.report_sections.section_base import Verbosity def get_test_df() -> pd.DataFrame: @@ -17,9 +18,9 @@ def get_test_df() -> pd.DataFrame: def test_default_config_verbosity(): bivariate_section = bivariate_analysis.BivariateAnalysis() - assert bivariate_section.verbosity == 0, "Verbosity should be 0" + assert bivariate_section.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" for s in bivariate_section.subsections: - assert s.verbosity == 0, "Verbosity should be 0" + assert s.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" def test_high_verobisities(): @@ -35,32 +36,47 @@ def test_high_verobisities(): def test_global_verbosity_overriding(): bivariate_section = bivariate_analysis.BivariateAnalysis( - verbosity=0, verbosity_pairplot=1, verbosity_correlations=2, verbosity_contingency_table=1 + verbosity=Verbosity.LOW, + verbosity_pairplot=Verbosity.MEDIUM, + verbosity_correlations=Verbosity.HIGH, + verbosity_contingency_table=Verbosity.MEDIUM, ) - assert bivariate_section.verbosity == 0 + assert bivariate_section.verbosity == Verbosity.LOW for subsec in bivariate_section.subsections: if isinstance(subsec, bivariate_analysis.PairPlot): - assert subsec.verbosity == 1, "Verbosity of pairplot should be 1" + assert ( + subsec.verbosity == Verbosity.MEDIUM + ), "Verbosity of pairplot should be Verbosity.MEDIUM" elif isinstance(subsec, bivariate_analysis.CorrelationPlot): - assert subsec.verbosity == 2, "Verbosity of correlation plot should be 2" + assert ( + subsec.verbosity == Verbosity.HIGH + ), "Verbosity of correlation plot should be Verbosity.HIGH" elif isinstance(subsec, bivariate_analysis.ContingencyTable): - assert subsec.verbosity == 1, "Verbosity of contingency table should be 1" + assert ( + subsec.verbosity == Verbosity.MEDIUM + ), "Verbosity of contingency table should be Verbosity.MEDIUM" else: pytest.fail("Unexpected subsection type.") def test_verbosity_propagation(): - bivariate_section = bivariate_analysis.BivariateAnalysis(verbosity=2) - assert bivariate_section.verbosity == 2, "Bivariate analysis global verbosity should be 2." + bivariate_section = bivariate_analysis.BivariateAnalysis(verbosity=Verbosity.HIGH) + assert ( + bivariate_section.verbosity == Verbosity.HIGH + ), "Bivariate analysis global verbosity should be Verbosity.HIGH." for subsec in bivariate_section.subsections: if isinstance(subsec, bivariate_analysis.PairPlot): - assert subsec.verbosity == 2, "PairPlot verbosity should be 2" + assert subsec.verbosity == Verbosity.HIGH, "PairPlot verbosity should be Verbosity.HIGH" elif isinstance(subsec, bivariate_analysis.ContingencyTable): - assert subsec.verbosity == 2, "ContingencyTable verbosity should be 2." + assert ( + subsec.verbosity == Verbosity.HIGH + ), "ContingencyTable verbosity should be Verbosity.HIGH." elif isinstance(subsec, bivariate_analysis.CorrelationPlot): - assert subsec.verbosity == 2, "Correlation plot verbosity should be 2." + assert ( + subsec.verbosity == Verbosity.HIGH + ), "Correlation plot verbosity should be Verbosity.HIGH." else: pytest.fail("Unexpected subsection type") @@ -103,8 +119,8 @@ def test_section_adding(): ), "Subsection should be ContingencyTable" -def test_code_export_verbosity_0(): - bivariate_section = bivariate_analysis.BivariateAnalysis(verbosity=0) +def test_code_export_verbosity_low(): + bivariate_section = bivariate_analysis.BivariateAnalysis(verbosity=Verbosity.LOW) # Export code exported_cells = [] bivariate_section.add_cells(exported_cells) @@ -117,13 +133,13 @@ def test_code_export_verbosity_0(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_0_with_subsections(): +def test_code_export_verbosity_low_with_subsections(): bivariate_section = bivariate_analysis.BivariateAnalysis( subsections=[ bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.ContingencyTable, bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, ], - verbosity=0, + verbosity=Verbosity.LOW, ) # Export code exported_cells = [] @@ -141,7 +157,7 @@ def test_code_export_verbosity_0_with_subsections(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_generated_code_verbosity_0_columns(): +def test_generated_code_verbosity_low_columns(): columns = [f"col{i}" for i in range(5)] columns_x = [f"col_x{i}" for i in range(6)] columns_y = [f"col_y{i}" for i in range(4)] @@ -151,7 +167,7 @@ def test_generated_code_verbosity_0_columns(): columns_x=columns_x, columns_y=columns_y, columns_pairs=columns_pairs, - verbosity=0, + verbosity=Verbosity.LOW, color_col="col3", ) # Export code @@ -169,9 +185,9 @@ def test_generated_code_verbosity_0_columns(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_generated_code_verobsity_1(): +def test_generated_code_verobsity_medium(): bivariate_section = bivariate_analysis.BivariateAnalysis( - verbosity=1, + verbosity=Verbosity.MEDIUM, subsections=[ bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.CorrelationPlot, @@ -194,11 +210,11 @@ def test_generated_code_verobsity_1(): assert expected_line == exported_line, "Exported code mismatch" -def test_generated_code_verbosity_1_columns_x_y(): +def test_generated_code_verbosity_medium_columns_x_y(): columns_x = ["a", "b"] columns_y = ["c", "d"] bivariate_section = bivariate_analysis.BivariateAnalysis( - verbosity=1, + verbosity=Verbosity.MEDIUM, columns_x=columns_x, columns_y=columns_y, subsections=[ @@ -224,12 +240,12 @@ def test_generated_code_verbosity_1_columns_x_y(): assert expected_line == exported_line, "Exported code mismatch" -def test_generated_code_verbosity_1_columns_pairs(): +def test_generated_code_verbosity_medium_columns_pairs(): columns_pairs = [("a", "b"), ("c", "d")] columns_x_correct = ["a", "c"] columns_y_correct = ["b", "d"] bivariate_section = bivariate_analysis.BivariateAnalysis( - verbosity=1, + verbosity=Verbosity.MEDIUM, columns_pairs=columns_pairs, subsections=[ bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, @@ -253,9 +269,9 @@ def test_generated_code_verbosity_1_columns_pairs(): assert expected_line == exported_line, "Exported code mismatch" -def test_generated_code_verbosity_2(): +def test_generated_code_verbosity_high(): bivariate_section = bivariate_analysis.BivariateAnalysis( - verbosity=2, + verbosity=Verbosity.HIGH, subsections=[ bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, bivariate_analysis.BivariateAnalysis.BivariateAnalysisSubsection.CorrelationPlot, @@ -298,17 +314,17 @@ def test_generated_code_verbosity_2(): assert expected_line == exported_line, "Exported code mismatch" -def test_verbosity_0_different_subsection_verbosities(): +def test_verbosity_low_different_subsection_verbosities(): bivariate_section = BivariateAnalysis( - verbosity=0, + verbosity=Verbosity.LOW, subsections=[ BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, BivariateAnalysis.BivariateAnalysisSubsection.ContingencyTable, BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, BivariateAnalysis.BivariateAnalysisSubsection.CorrelationPlot, ], - verbosity_pairplot=2, - verbosity_correlations=1, + verbosity_pairplot=Verbosity.HIGH, + verbosity_correlations=Verbosity.MEDIUM, ) bivariate_cells = [] @@ -328,8 +344,8 @@ def test_verbosity_0_different_subsection_verbosities(): assert expected_line == exported_line, "Exported code mismatch" -def test_imports_verbosity_0(): - bivariate_section = BivariateAnalysis(verbosity=0) +def test_imports_verbosity_low(): + bivariate_section = BivariateAnalysis(verbosity=Verbosity.LOW) exported_imports = bivariate_section.required_imports() expected_imports = [ @@ -343,8 +359,8 @@ def test_imports_verbosity_0(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_1(): - bivariate_section = BivariateAnalysis(verbosity=1) +def test_imports_verbosity_medium(): + bivariate_section = BivariateAnalysis(verbosity=Verbosity.MEDIUM) exported_imports = bivariate_section.required_imports() expected_imports = list( @@ -357,8 +373,8 @@ def test_imports_verbosity_1(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_2(): - bivariate_section = BivariateAnalysis(verbosity=2) +def test_imports_verbosity_high(): + bivariate_section = BivariateAnalysis(verbosity=Verbosity.HIGH) exported_imports = bivariate_section.required_imports() expected_imports = list( @@ -371,17 +387,17 @@ def test_imports_verbosity_2(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_0_different_subsection_verbosities(): +def test_imports_verbosity_low_different_subsection_verbosities(): bivariate_section = BivariateAnalysis( - verbosity=0, + verbosity=Verbosity.LOW, subsections=[ BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, BivariateAnalysis.BivariateAnalysisSubsection.ContingencyTable, BivariateAnalysis.BivariateAnalysisSubsection.PairPlot, BivariateAnalysis.BivariateAnalysisSubsection.CorrelationPlot, ], - verbosity_pairplot=2, - verbosity_correlations=1, + verbosity_pairplot=Verbosity.HIGH, + verbosity_correlations=Verbosity.MEDIUM, ) exported_imports = bivariate_section.required_imports() @@ -391,7 +407,7 @@ def test_imports_verbosity_0_different_subsection_verbosities(): "bivariate_analysis = BivariateAnalysis.bivariate_analysis" } for s in bivariate_section.subsections: - if s.verbosity > 0: + if s.verbosity > Verbosity.LOW: expected_imports.update(s.required_imports()) assert isinstance(exported_imports, list) diff --git a/tests/test_group_analysis.py b/tests/test_group_analysis.py index be1ba75..a5394b9 100644 --- a/tests/test_group_analysis.py +++ b/tests/test_group_analysis.py @@ -8,6 +8,7 @@ from edvart.report_sections.code_string_formatting import code_dedent, get_code, total_dedent from edvart.report_sections.group_analysis import GroupAnalysis +from edvart.report_sections.section_base import Verbosity # Workaround to prevent multiple browser tabs opening with figures plotly.io.renderers.default = "json" @@ -25,7 +26,7 @@ def get_test_df(): def test_default_config_verbosity(): group_section = GroupAnalysis(df=pd.DataFrame(), groupby=[]) - assert group_section.verbosity == 0, "Verbosity should be 0" + assert group_section.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" def test_invalid_verbosities(): @@ -74,9 +75,9 @@ def test_static_methods(): GroupAnalysis.overlaid_histograms(df, groupby=["B"], column="B") -def test_code_export_verbosity_0(): +def test_code_export_verbosity_low(): df = get_test_df() - group_section = GroupAnalysis(df=df, groupby="B", verbosity=0) + group_section = GroupAnalysis(df=df, groupby="B", verbosity=Verbosity.LOW) # Export code exported_cells = [] @@ -90,9 +91,9 @@ def test_code_export_verbosity_0(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_1(): +def test_code_export_verbosity_medium(): df = get_test_df() - group_section = GroupAnalysis(df=df, groupby="A", verbosity=1) + group_section = GroupAnalysis(df=df, groupby="A", verbosity=Verbosity.MEDIUM) # Export code exported_cells = [] @@ -116,9 +117,9 @@ def test_code_export_verbosity_1(): assert expected_line == exported_line, "Exported code mismatch" -def test_code_export_verbosity_2(): +def test_code_export_verbosity_high(): df = get_test_df() - group_section = GroupAnalysis(df=df, groupby="A", verbosity=2) + group_section = GroupAnalysis(df=df, groupby="A", verbosity=Verbosity.HIGH) # Export code exported_cells = [] diff --git a/tests/test_multivariate_analysis.py b/tests/test_multivariate_analysis.py index dbd9996..a10bf38 100644 --- a/tests/test_multivariate_analysis.py +++ b/tests/test_multivariate_analysis.py @@ -12,6 +12,7 @@ from edvart.report_sections import multivariate_analysis from edvart.report_sections.code_string_formatting import code_dedent, get_code from edvart.report_sections.multivariate_analysis import UMAP_AVAILABLE, MultivariateAnalysis +from edvart.report_sections.section_base import Verbosity def get_test_df() -> pd.DataFrame: @@ -31,9 +32,9 @@ def get_test_df() -> pd.DataFrame: def test_default_config_verbosity(): multivariate_section = MultivariateAnalysis(get_test_df()) - assert multivariate_section.verbosity == 0, "Verbosity should be 0" + assert multivariate_section.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" for s in multivariate_section.subsections: - assert s.verbosity == 0, "Verbosity should be 0" + assert s.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" def test_high_verobisities(): @@ -46,34 +47,42 @@ def test_high_verobisities(): def test_global_verbosity_overriding(): multivariate_section = MultivariateAnalysis( get_test_df(), - verbosity=0, - verbosity_pca=2, - verbosity_umap=1, - verbosity_parallel_categories=1, - verbosity_parallel_coordinates=2, + verbosity=Verbosity.LOW, + verbosity_pca=Verbosity.HIGH, + verbosity_umap=Verbosity.MEDIUM, + verbosity_parallel_categories=Verbosity.MEDIUM, + verbosity_parallel_coordinates=Verbosity.HIGH, ) - assert multivariate_section.verbosity == 0 + assert multivariate_section.verbosity == Verbosity.LOW for subsec in multivariate_section.subsections: if isinstance(subsec, multivariate_analysis.PCA): - assert subsec.verbosity == 2, "Verbosity of PCA should be 2" + assert subsec.verbosity == Verbosity.HIGH, "Verbosity of PCA should be Verbosity.HIGH" if UMAP_AVAILABLE: if isinstance(subsec, multivariate_analysis.UMAP): - assert subsec.verbosity == 1, "Verbosity of UMAP should be 1" + assert ( + subsec.verbosity == Verbosity.MEDIUM + ), "Verbosity of UMAP should be Verbosity.MEDIUM" if isinstance(subsec, multivariate_analysis.ParallelCategories): - assert subsec.verbosity == 1, "Verbosity of Par cats should be 1" + assert ( + subsec.verbosity == Verbosity.MEDIUM + ), "Verbosity of Par cats should be Verbosity.MEDIUM" if isinstance(subsec, multivariate_analysis.ParallelCoordinates): - assert subsec.verbosity == 2, "Verbosity of Par coords should be 2" + assert ( + subsec.verbosity == Verbosity.HIGH + ), "Verbosity of Par coords should be Verbosity.HIGH" def test_verbosity_propagation(): - multivariate_section = MultivariateAnalysis(get_test_df(), verbosity=2) + multivariate_section = MultivariateAnalysis(get_test_df(), verbosity=Verbosity.HIGH) assert ( - multivariate_section.verbosity == 2 - ), "Multivariate analysis global verbosity should be 2." + multivariate_section.verbosity == Verbosity.HIGH + ), "Multivariate analysis global verbosity should be Verbosity.HIGH." for subsec in multivariate_section.subsections: - assert subsec.verbosity == 2, f"Subsection {type(subsec)} verbosity should be 2" + assert ( + subsec.verbosity == Verbosity.HIGH + ), f"Subsection {type(subsec)} verbosity should be Verbosity.HIGH" def test_negative_verbosities(): @@ -116,8 +125,10 @@ def test_section_adding(): ), "Subsection should be UMAP" -def test_code_export_verbosity_0(): - multivariate_section = multivariate_analysis.MultivariateAnalysis(df=get_test_df(), verbosity=0) +def test_code_export_verbosity_low(): + multivariate_section = multivariate_analysis.MultivariateAnalysis( + df=get_test_df(), verbosity=Verbosity.LOW + ) # Export code exported_cells = [] multivariate_section.add_cells(exported_cells) @@ -130,13 +141,13 @@ def test_code_export_verbosity_0(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_0_with_subsections(): +def test_code_export_verbosity_low_with_subsections(): subsec = multivariate_analysis.MultivariateAnalysis.MultivariateAnalysisSubsection subsections = [subsec.ParallelCategories, subsec.PCA, subsec.ParallelCoordinates, subsec.PCA] if UMAP_AVAILABLE: subsections.append(subsec.UMAP) multivariate_section = multivariate_analysis.MultivariateAnalysis( - df=get_test_df(), subsections=subsections, verbosity=0 + df=get_test_df(), subsections=subsections, verbosity=Verbosity.LOW ) # Export code @@ -167,14 +178,16 @@ def test_code_export_verbosity_0_with_subsections(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_1_all_cols_valid(): +def test_code_export_verbosity_medium_all_cols_valid(): all_numeric_df = pd.DataFrame( data=[[1.1, 1, -2], [2.2, 2, -5.3], [3.3, 3, 4]], columns=["col1", "col2", "col3"] ) subsec = multivariate_analysis.MultivariateAnalysis.MultivariateAnalysisSubsection multivariate_section = multivariate_analysis.MultivariateAnalysis( - df=all_numeric_df, subsections=[subsec.PCA, subsec.ParallelCategories], verbosity=1 + df=all_numeric_df, + subsections=[subsec.PCA, subsec.ParallelCategories], + verbosity=Verbosity.MEDIUM, ) exported_cells = [] @@ -193,7 +206,9 @@ def test_code_export_verbosity_1_all_cols_valid(): def test_generated_code_verobsity_1(): - multivariate_section = multivariate_analysis.MultivariateAnalysis(df=get_test_df(), verbosity=1) + multivariate_section = multivariate_analysis.MultivariateAnalysis( + df=get_test_df(), verbosity=Verbosity.MEDIUM + ) exported_cells = [] multivariate_section.add_cells(exported_cells) @@ -229,7 +244,9 @@ def test_generated_code_verobsity_1(): def test_generated_code_verobsity_2(): - multivariate_section = multivariate_analysis.MultivariateAnalysis(df=get_test_df(), verbosity=2) + multivariate_section = multivariate_analysis.MultivariateAnalysis( + df=get_test_df(), verbosity=Verbosity.HIGH + ) multivariate_cells = [] multivariate_section.add_cells(multivariate_cells) @@ -286,13 +303,13 @@ def test_generated_code_verobsity_2(): assert expected_line == exported_line, "Exported code mismatch" -def test_verbosity_1_non_categorical_col(): +def test_verbosity_medium_non_categorical_col(): random_array = np.random.randint(low=1, high=40, size=(100, 3)) random_df = pd.DataFrame(data=random_array, columns=["integral", "floating", "cat"]) random_df = random_df.astype({"integral": int, "floating": float, "cat": "category"}) subsec = multivariate_analysis.MultivariateAnalysis.MultivariateAnalysisSubsection multivariate_section = multivariate_analysis.MultivariateAnalysis( - df=random_df, subsections=[subsec.ParallelCategories], verbosity=1 + df=random_df, subsections=[subsec.ParallelCategories], verbosity=Verbosity.MEDIUM ) multivariate_cells = [] @@ -306,7 +323,7 @@ def test_verbosity_1_non_categorical_col(): assert expected_line == exported_line, "Exported code mismatch" -def test_verbosity_0_different_subsection_verbosities(): +def test_verbosity_low_different_subsection_verbosities(): subsections = [ MultivariateAnalysis.MultivariateAnalysisSubsection.PCA, MultivariateAnalysis.MultivariateAnalysisSubsection.PCA, @@ -317,10 +334,10 @@ def test_verbosity_0_different_subsection_verbosities(): subsections.insert(2, MultivariateAnalysis.MultivariateAnalysisSubsection.UMAP) multivariate_section = MultivariateAnalysis( df=get_test_df(), - verbosity=0, + verbosity=Verbosity.LOW, subsections=subsections, - verbosity_parallel_categories=1, - verbosity_parallel_coordinates=2, + verbosity_parallel_categories=Verbosity.MEDIUM, + verbosity_parallel_coordinates=Verbosity.HIGH, ) multivariate_cells = [] @@ -350,8 +367,8 @@ def test_verbosity_0_different_subsection_verbosities(): assert expected_line == exported_line, "Exported code mismatch" -def test_imports_verbosity_0(): - multivariate_section = MultivariateAnalysis(df=get_test_df(), verbosity=0) +def test_imports_verbosity_low(): + multivariate_section = MultivariateAnalysis(df=get_test_df(), verbosity=Verbosity.LOW) exported_imports = multivariate_section.required_imports() expected_imports = [ @@ -365,8 +382,8 @@ def test_imports_verbosity_0(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_1(): - multivariate_section = MultivariateAnalysis(df=get_test_df(), verbosity=1) +def test_imports_verbosity_medium(): + multivariate_section = MultivariateAnalysis(df=get_test_df(), verbosity=Verbosity.MEDIUM) exported_imports = multivariate_section.required_imports() expected_imports = list( @@ -379,8 +396,8 @@ def test_imports_verbosity_1(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_2(): - multivariate_section = MultivariateAnalysis(df=get_test_df(), verbosity=2) +def test_imports_verbosity_high(): + multivariate_section = MultivariateAnalysis(df=get_test_df(), verbosity=Verbosity.HIGH) exported_imports = multivariate_section.required_imports() expected_imports = list( @@ -393,7 +410,7 @@ def test_imports_verbosity_2(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_0_different_subsection_verbosities(): +def test_imports_verbosity_low_different_subsection_verbosities(): subsections = [ MultivariateAnalysis.MultivariateAnalysisSubsection.PCA, MultivariateAnalysis.MultivariateAnalysisSubsection.PCA, @@ -404,10 +421,10 @@ def test_imports_verbosity_0_different_subsection_verbosities(): subsections.insert(3, MultivariateAnalysis.MultivariateAnalysisSubsection.UMAP) multivariate_section = MultivariateAnalysis( df=get_test_df(), - verbosity=0, + verbosity=Verbosity.LOW, subsections=subsections, - verbosity_parallel_categories=1, - verbosity_parallel_coordinates=2, + verbosity_parallel_categories=Verbosity.MEDIUM, + verbosity_parallel_coordinates=Verbosity.HIGH, ) exported_imports = multivariate_section.required_imports() @@ -417,7 +434,7 @@ def test_imports_verbosity_0_different_subsection_verbosities(): "multivariate_analysis = MultivariateAnalysis.multivariate_analysis" } for s in multivariate_section.subsections: - if s.verbosity > 0: + if s.verbosity > Verbosity.LOW: expected_imports.update(s.required_imports()) assert isinstance(exported_imports, list) diff --git a/tests/test_overview_section.py b/tests/test_overview_section.py index 674771c..ec195e3 100644 --- a/tests/test_overview_section.py +++ b/tests/test_overview_section.py @@ -16,6 +16,7 @@ from edvart.report_sections import dataset_overview from edvart.report_sections.code_string_formatting import get_code from edvart.report_sections.dataset_overview import Overview +from edvart.report_sections.section_base import Verbosity def get_test_df() -> pd.DataFrame: @@ -26,52 +27,52 @@ def get_test_df() -> pd.DataFrame: def test_default_verbosity(): overview_section = Overview() - assert overview_section.verbosity == 0, "Verbosity should be 0" + assert overview_section.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" for s in overview_section.subsections: - assert s.verbosity == 0, "Verbosity should be 0" + assert s.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" def test_global_section_verbosity(): - overview_section = Overview(verbosity=1) - assert overview_section.verbosity == 1, "Verbosity should be 1" + overview_section = Overview(verbosity=Verbosity.MEDIUM) + assert overview_section.verbosity == Verbosity.MEDIUM, "Verbosity should be Verbosity.MEDIUM" for s in overview_section.subsections: - assert s.verbosity == 1, "Verbosity should be 1" + assert s.verbosity == Verbosity.MEDIUM, "Verbosity should be Verbosity.MEDIUM" def test_subsection_verbosity_overriding(): - overview_section = Overview(verbosity=0, verbosity_quick_info=1) - assert overview_section.verbosity == 0, "Verbosity should be 0" + overview_section = Overview(verbosity=Verbosity.LOW, verbosity_quick_info=Verbosity.MEDIUM) + assert overview_section.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" for s in overview_section.subsections: if isinstance(s, dataset_overview.QuickInfo): - assert s.verbosity == 1, "Verbosity should be 1" + assert s.verbosity == Verbosity.MEDIUM, "Verbosity should be Verbosity.MEDIUM" else: - assert s.verbosity == 0, "Verbosity should be 0" + assert s.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" overview_section = Overview( - verbosity=0, - verbosity_quick_info=1, - verbosity_constant_occurence=0, - verbosity_data_preview=1, - verbosity_data_types=2, - verbosity_rows_with_missing_value=1, - verbosity_duplicate_rows=1, + verbosity=Verbosity.LOW, + verbosity_quick_info=Verbosity.MEDIUM, + verbosity_constant_occurence=Verbosity.LOW, + verbosity_data_preview=Verbosity.MEDIUM, + verbosity_data_types=Verbosity.HIGH, + verbosity_rows_with_missing_value=Verbosity.MEDIUM, + verbosity_duplicate_rows=Verbosity.MEDIUM, ) - assert overview_section.verbosity == 0, "Verbosity should be 0" + assert overview_section.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" for s in overview_section.subsections: if isinstance(s, dataset_overview.QuickInfo): - assert s.verbosity == 1, "Verbosity should be 1" + assert s.verbosity == Verbosity.MEDIUM, "Verbosity should be Verbosity.MEDIUM" elif isinstance(s, dataset_overview.ConstantOccurence): - assert s.verbosity == 0, "Verbosity should be 0" + assert s.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" elif isinstance(s, dataset_overview.DataPreview): - assert s.verbosity == 1, "Verbosity should be 1" + assert s.verbosity == Verbosity.MEDIUM, "Verbosity should be Verbosity.MEDIUM" elif isinstance(s, dataset_overview.MissingValues): - assert s.verbosity == 0, "Verbosity should be 0" + assert s.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" elif isinstance(s, dataset_overview.DataTypes): - assert s.verbosity == 2, "Verbosity should be 2" + assert s.verbosity == Verbosity.HIGH, "Verbosity should be 2" elif isinstance(s, dataset_overview.RowsWithMissingValue): - assert s.verbosity == 1, "Verbosity should be 1" + assert s.verbosity == Verbosity.MEDIUM, "Verbosity should be Verbosity.MEDIUM" elif isinstance(s, dataset_overview.DuplicateRows): - assert s.verbosity == 1, "Verbosity should be 1" + assert s.verbosity == Verbosity.MEDIUM, "Verbosity should be Verbosity.MEDIUM" else: pytest.fail("Invalid overview subsection type") @@ -121,8 +122,8 @@ def test_section_adding(): ), "Subsection should be DuplicateRows" -def test_code_export_verbosity_0(): - overview_section = Overview(verbosity=0) +def test_code_export_verbosity_low(): + overview_section = Overview(verbosity=Verbosity.LOW) # Export code exported_cells = [] overview_section.add_cells(exported_cells) @@ -134,13 +135,13 @@ def test_code_export_verbosity_0(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_0_with_subsections(): +def test_code_export_verbosity_low_with_subsections(): overview_section = Overview( subsections=[ Overview.OverviewSubsection.QuickInfo, Overview.OverviewSubsection.MissingValues, ], - verbosity=0, + verbosity=Verbosity.LOW, ) # Export code exported_cells = [] @@ -156,7 +157,7 @@ def test_code_export_verbosity_0_with_subsections(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_1(): +def test_code_export_verbosity_medium(): # Construct overview section overview_section = Overview( subsections=[ @@ -168,7 +169,7 @@ def test_code_export_verbosity_1(): Overview.OverviewSubsection.ConstantOccurence, Overview.OverviewSubsection.DuplicateRows, ], - verbosity=1, + verbosity=Verbosity.MEDIUM, ) # Export code exported_cells = [] @@ -190,7 +191,7 @@ def test_code_export_verbosity_1(): assert exported_code[i] == expected_code[i], "Exported code mismatch" -def test_code_export_verbosity_2(): +def test_code_export_verbosity_high(): # Construct overview section overview_section = Overview( subsections=[ @@ -202,7 +203,7 @@ def test_code_export_verbosity_2(): Overview.OverviewSubsection.ConstantOccurence, Overview.OverviewSubsection.DuplicateRows, ], - verbosity=2, + verbosity=Verbosity.HIGH, ) # Export code exported_cells = [] @@ -266,11 +267,11 @@ def test_code_export_verbosity_2(): assert exported_code[i] == expected_code[i], "Exported code mismatch" -def test_verbosity_0_different_subsection_verbosities(): +def test_verbosity_low_different_subsection_verbosities(): overview_section = Overview( - verbosity=0, - verbosity_quick_info=1, - verbosity_duplicate_rows=2, + verbosity=Verbosity.LOW, + verbosity_quick_info=Verbosity.MEDIUM, + verbosity_duplicate_rows=Verbosity.HIGH, ) overview_cells = [] @@ -299,8 +300,8 @@ def test_verbosity_0_different_subsection_verbosities(): assert expected_line == exported_line, "Exported code mismatch" -def test_imports_verbosity_0(): - overview_section = Overview(verbosity=0) +def test_imports_verbosity_low(): + overview_section = Overview(verbosity=Verbosity.LOW) exported_imports = overview_section.required_imports() expected_imports = [ @@ -314,8 +315,8 @@ def test_imports_verbosity_0(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_1(): - multivariate_section = Overview(verbosity=1) +def test_imports_verbosity_medium(): + multivariate_section = Overview(verbosity=Verbosity.MEDIUM) exported_imports = multivariate_section.required_imports() expected_imports = list( @@ -328,8 +329,8 @@ def test_imports_verbosity_1(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_2(): - multivariate_section = Overview(verbosity=2) +def test_imports_verbosity_high(): + multivariate_section = Overview(verbosity=Verbosity.HIGH) exported_imports = multivariate_section.required_imports() expected_imports = list( @@ -342,11 +343,11 @@ def test_imports_verbosity_2(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_0_different_subsection_verbosities(): +def test_imports_verbosity_low_different_subsection_verbosities(): overview_section = Overview( - verbosity=0, - verbosity_quick_info=1, - verbosity_duplicate_rows=2, + verbosity=Verbosity.LOW, + verbosity_quick_info=Verbosity.MEDIUM, + verbosity_duplicate_rows=Verbosity.HIGH, ) exported_imports = overview_section.required_imports() @@ -356,7 +357,7 @@ def test_imports_verbosity_0_different_subsection_verbosities(): "overview_analysis = Overview.overview_analysis" } for s in overview_section.subsections: - if s.verbosity > 0: + if s.verbosity > Verbosity.LOW: expected_imports.update(s.required_imports()) assert isinstance(exported_imports, list) diff --git a/tests/test_report.py b/tests/test_report.py index bcf4da7..b40e48f 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -5,6 +5,7 @@ import pandas as pd from edvart.report import DefaultReport, Report +from edvart.report_sections.section_base import Verbosity def _get_test_df() -> pd.DataFrame: @@ -17,14 +18,14 @@ def test_report(): report = Report(dataframe=_get_test_df()) assert len(report.sections) == 0, "Report should be empty" - report.add_overview(verbosity=1) + report.add_overview(verbosity=Verbosity.MEDIUM) assert len(report.sections) == 1, "Report should have one section" - report.add_bivariate_analysis(verbosity=2, use_columns=["Col1", "Col2", "Col3"]) + report.add_bivariate_analysis(verbosity=Verbosity.HIGH, use_columns=["Col1", "Col2", "Col3"]) assert len(report.sections) == 2, "Report should have two sections" assert report.sections[0].name == "Overview", "Wrong section name" - assert report.sections[0].verbosity == 1, "Wrong section verbosity" + assert report.sections[0].verbosity == Verbosity.MEDIUM, "Wrong section verbosity" assert report.sections[0].columns is None, "Default column selection should be None" assert report.sections[1].columns == ["Col1", "Col2", "Col3"], "Wrong columns" @@ -33,19 +34,19 @@ def test_report(): def test_default_report(): report = DefaultReport( dataframe=_get_test_df(), - verbosity_overview=1, - verbosity_univariate_analysis=2, + verbosity_overview=Verbosity.MEDIUM, + verbosity_univariate_analysis=Verbosity.HIGH, columns_bivariate_analysis=["Col1", "Col2", "Col3"], ) assert len(report.sections) > 0, "Default report should not be empty" - assert report.sections[1].verbosity == 1, "Wrong section verbosity" + assert report.sections[1].verbosity == Verbosity.MEDIUM, "Wrong section verbosity" assert report.sections[1].columns is None, "Default column selection should be None" - assert report.sections[2].verbosity == 2, "Wrong section verbosity" + assert report.sections[2].verbosity == Verbosity.HIGH, "Wrong section verbosity" assert report.sections[2].columns is None, "Default column selection should be None" - assert report.sections[3].verbosity == 0, "Wrong section verbosity" + assert report.sections[3].verbosity == Verbosity.LOW, "Wrong section verbosity" assert report.sections[3].columns == ["Col1", "Col2", "Col3"], "Wrong columns" diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py index baba655..5825895 100644 --- a/tests/test_timeseries_analysis.py +++ b/tests/test_timeseries_analysis.py @@ -7,14 +7,15 @@ import edvart from edvart.report_sections import timeseries_analysis from edvart.report_sections.code_string_formatting import get_code +from edvart.report_sections.section_base import Verbosity from edvart.report_sections.timeseries_analysis import BoxplotsOverTime, TimeseriesAnalysis def test_default_config_verbosity(): timeseries_section = TimeseriesAnalysis() - assert timeseries_section.verbosity == 0, "Verbosity should be 0" + assert timeseries_section.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" for s in timeseries_section.subsections: - assert s.verbosity == 0, "Verbosity should be 0" + assert s.verbosity == Verbosity.LOW, "Verbosity should be Verbosity.LOW" def test_high_verobisities(): @@ -30,33 +31,47 @@ def test_high_verobisities(): def test_global_verbosity_overriding(): timeseries_section = TimeseriesAnalysis( - verbosity=0, - verbosity_autocorrelation=2, - verbosity_stationarity_tests=1, - verbosity_rolling_statistics=2, - verbosity_time_analysis_plot=1, + verbosity=Verbosity.LOW, + verbosity_autocorrelation=Verbosity.HIGH, + verbosity_stationarity_tests=Verbosity.MEDIUM, + verbosity_rolling_statistics=Verbosity.HIGH, + verbosity_time_analysis_plot=Verbosity.MEDIUM, ) - assert timeseries_section.verbosity == 0 + assert timeseries_section.verbosity == Verbosity.LOW for subsec in timeseries_section.subsections: if isinstance(subsec, timeseries_analysis.Autocorrelation): - assert subsec.verbosity == 2, "Verbosity of autocorrelation should be 2" + assert ( + subsec.verbosity == Verbosity.HIGH + ), "Verbosity of autocorrelation should be Verbosity.HIGH" elif isinstance(subsec, timeseries_analysis.StationarityTests): - assert subsec.verbosity == 1, "Verbosity of stationarity tests should be 1" + assert ( + subsec.verbosity == Verbosity.MEDIUM + ), "Verbosity of stationarity tests should be Verbosity.MEDIUM" elif isinstance(subsec, timeseries_analysis.RollingStatistics): - assert subsec.verbosity == 2, "Verbosity of rolling stats should be 2" + assert ( + subsec.verbosity == Verbosity.HIGH + ), "Verbosity of rolling stats should be Verbosity.HIGH" elif isinstance(subsec, timeseries_analysis.TimeAnalysisPlot): - assert subsec.verbosity == 1, "Verbosity of timeanalysis plot should be 1" + assert ( + subsec.verbosity == Verbosity.MEDIUM + ), "Verbosity of timeanalysis plot should be 1" else: - assert subsec.verbosity == 0, "Verbosity of other sections should be 0" + assert ( + subsec.verbosity == Verbosity.LOW + ), "Verbosity of other sections should be Verbosity.LOW" def test_verbosity_propagation(): - timeseries_section = TimeseriesAnalysis(verbosity=2) - assert timeseries_section.verbosity == 2, "Timeseries analysis global verbosity should be 2." + timeseries_section = TimeseriesAnalysis(verbosity=Verbosity.HIGH) + assert ( + timeseries_section.verbosity == Verbosity.HIGH + ), "Timeseries analysis global verbosity should be Verbosity.HIGH." for subsec in timeseries_section.subsections: - assert subsec.verbosity == 2, f"{type(subsec)} verbosity should be 2." + assert ( + subsec.verbosity == Verbosity.HIGH + ), f"{type(subsec)} verbosity should be Verbosity.HIGH." def test_negative_verbosities(): @@ -151,8 +166,8 @@ def test_ft_no_sampling_rate_error(): ) -def test_code_export_verbosity_0(): - ts_section = TimeseriesAnalysis(verbosity=0) +def test_code_export_verbosity_low(): + ts_section = TimeseriesAnalysis(verbosity=Verbosity.LOW) # Export code exported_cells = [] ts_section.add_cells(exported_cells) @@ -165,13 +180,13 @@ def test_code_export_verbosity_0(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_0_with_subsections(): +def test_code_export_verbosity_low_with_subsections(): ts_section = TimeseriesAnalysis( subsections=[ TimeseriesAnalysis.TimeseriesAnalysisSubsection.RollingStatistics, TimeseriesAnalysis.TimeseriesAnalysisSubsection.StationarityTests, ], - verbosity=0, + verbosity=Verbosity.LOW, ) # Export code exported_cells = [] @@ -189,13 +204,13 @@ def test_code_export_verbosity_0_with_subsections(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_0_with_fft_stft(): +def test_code_export_verbosity_low_with_fft_stft(): ts_section = TimeseriesAnalysis( subsections=[ TimeseriesAnalysis.TimeseriesAnalysisSubsection.FourierTransform, TimeseriesAnalysis.TimeseriesAnalysisSubsection.ShortTimeFT, ], - verbosity=0, + verbosity=Verbosity.LOW, sampling_rate=1, stft_window_size=1, ) @@ -216,8 +231,8 @@ def test_code_export_verbosity_0_with_fft_stft(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_generated_code_verobsity_1(): - ts_section = TimeseriesAnalysis(verbosity=1) +def test_generated_code_verobsity_medium(): + ts_section = TimeseriesAnalysis(verbosity=Verbosity.MEDIUM) exported_cells = [] ts_section.add_cells(exported_cells) @@ -238,8 +253,8 @@ def test_generated_code_verobsity_1(): assert expected_line == exported_line, "Exported code mismatch" -def test_generated_code_verobsity_2(): - ts_section = TimeseriesAnalysis(verbosity=2, sampling_rate=1, stft_window_size=1) +def test_generated_code_verobsity_high(): + ts_section = TimeseriesAnalysis(verbosity=Verbosity.HIGH, sampling_rate=1, stft_window_size=1) pairplot_cells = [] ts_section.add_cells(pairplot_cells) @@ -316,9 +331,9 @@ def test_generated_code_verobsity_2(): assert expected_line == exported_line, "Exported code mismatch" -def test_verbosity_0_different_subsection_verbosities(): +def test_verbosity_low_different_subsection_verbosities(): ts_section = TimeseriesAnalysis( - verbosity=0, + verbosity=Verbosity.LOW, subsections=[ TimeseriesAnalysis.TimeseriesAnalysisSubsection.TimeAnalysisPlot, TimeseriesAnalysis.TimeseriesAnalysisSubsection.FourierTransform, @@ -329,8 +344,8 @@ def test_verbosity_0_different_subsection_verbosities(): ], sampling_rate=1, stft_window_size=2, - verbosity_rolling_statistics=1, - verbosity_short_time_ft=2, + verbosity_rolling_statistics=Verbosity.MEDIUM, + verbosity_short_time_ft=Verbosity.HIGH, ) ts_cells = [] @@ -398,8 +413,8 @@ def test_boxplots_over_time_lambda(): assert expected_line == exported_line, "Exported code mismatch" -def test_imports_verbosity_0(): - ts_section = TimeseriesAnalysis(verbosity=0) +def test_imports_verbosity_low(): + ts_section = TimeseriesAnalysis(verbosity=Verbosity.LOW) exported_imports = ts_section.required_imports() expected_imports = [ @@ -413,8 +428,8 @@ def test_imports_verbosity_0(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_1(): - ts_section = TimeseriesAnalysis(verbosity=1) +def test_imports_verbosity_medium(): + ts_section = TimeseriesAnalysis(verbosity=Verbosity.MEDIUM) exported_imports = ts_section.required_imports() expected_imports = list(set().union(*[s.required_imports() for s in ts_section.subsections])) @@ -425,8 +440,8 @@ def test_imports_verbosity_1(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_2(): - ts_section = TimeseriesAnalysis(verbosity=2) +def test_imports_verbosity_high(): + ts_section = TimeseriesAnalysis(verbosity=Verbosity.HIGH) exported_imports = ts_section.required_imports() expected_imports = list(set().union(*[s.required_imports() for s in ts_section.subsections])) @@ -437,9 +452,9 @@ def test_imports_verbosity_2(): assert expected_import == exported_import, "Exported import mismatch" -def test_imports_verbosity_0_different_subsection_verbosities(): +def test_imports_verbosity_low_different_subsection_verbosities(): ts_section = TimeseriesAnalysis( - verbosity=0, + verbosity=Verbosity.LOW, subsections=[ TimeseriesAnalysis.TimeseriesAnalysisSubsection.TimeAnalysisPlot, TimeseriesAnalysis.TimeseriesAnalysisSubsection.FourierTransform, @@ -450,8 +465,8 @@ def test_imports_verbosity_0_different_subsection_verbosities(): ], sampling_rate=1, stft_window_size=2, - verbosity_rolling_statistics=1, - verbosity_short_time_ft=2, + verbosity_rolling_statistics=Verbosity.MEDIUM, + verbosity_short_time_ft=Verbosity.HIGH, ) exported_imports = ts_section.required_imports() @@ -461,7 +476,7 @@ def test_imports_verbosity_0_different_subsection_verbosities(): "timeseries_analysis = TimeseriesAnalysis.timeseries_analysis" } for s in ts_section.subsections: - if s.verbosity > 0: + if s.verbosity > Verbosity.LOW: expected_imports.update(s.required_imports()) assert isinstance(exported_imports, list) diff --git a/tests/test_univariate_analysis_section.py b/tests/test_univariate_analysis_section.py index 2a6638c..6e2dbce 100644 --- a/tests/test_univariate_analysis_section.py +++ b/tests/test_univariate_analysis_section.py @@ -7,6 +7,7 @@ from edvart.pandas_formatting import add_html_heading, dict_to_html, format_number, subcells_html from edvart.report_sections import univariate_analysis from edvart.report_sections.code_string_formatting import code_dedent, get_code +from edvart.report_sections.section_base import Verbosity def test_invalid_verbosity(): @@ -21,10 +22,10 @@ def test_invalid_verbosity(): univariate_analysis.UnivariateAnalysis(df=test_df, verbosity="1") -def test_code_export_verbosity_0(): +def test_code_export_verbosity_low(): test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"]) # Construct univariate analysis section - univariate_section = univariate_analysis.UnivariateAnalysis(df=test_df, verbosity=0) + univariate_section = univariate_analysis.UnivariateAnalysis(df=test_df, verbosity=Verbosity.LOW) # Export code exported_cells = [] univariate_section.add_cells(exported_cells) @@ -36,10 +37,12 @@ def test_code_export_verbosity_0(): assert exported_code[0] == expected_code[0], "Exported code mismatch" -def test_code_export_verbosity_1(): +def test_code_export_verbosity_medium(): test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"]) # Construct univariate analysis section - univariate_section = univariate_analysis.UnivariateAnalysis(df=test_df, verbosity=1) + univariate_section = univariate_analysis.UnivariateAnalysis( + df=test_df, verbosity=Verbosity.MEDIUM + ) # Export code exported_cells = [] univariate_section.add_cells(exported_cells) @@ -55,10 +58,12 @@ def test_code_export_verbosity_1(): assert exported_code[i] == expected_code[i], "Exported code mismatch" -def test_code_export_verbosity_2(): +def test_code_export_verbosity_high(): test_df = pd.DataFrame(data=[[1.9, "a"], [2.1, "b"], [3.3, "c"]], columns=["A", "B"]) # Construct univariate analysis section - univariate_section = univariate_analysis.UnivariateAnalysis(df=test_df, verbosity=2) + univariate_section = univariate_analysis.UnivariateAnalysis( + df=test_df, verbosity=Verbosity.HIGH + ) # Export code exported_cells = [] univariate_section.add_cells(exported_cells) From 76a4098ea5ce20ce9acc26839ef0f3fa233cb4ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Luk=C3=A1ny?= Date: Wed, 9 Aug 2023 15:22:42 +0200 Subject: [PATCH 2/3] fix(review): verbosity docstring --- edvart/report_sections/section_base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/edvart/report_sections/section_base.py b/edvart/report_sections/section_base.py index eebe80d..c5f9aa2 100644 --- a/edvart/report_sections/section_base.py +++ b/edvart/report_sections/section_base.py @@ -9,13 +9,13 @@ class Verbosity(IntEnum): """ Verbosity of the exported code. - 0 - A single function call generates the entire bivariate analysis section. - 1 + LOW + A single function call generates the entire section. + MEDIUM Function calls to parameterizable functions are generated for each column separately in separate cells. - 2 - Similar to 1, but in addition, function definitions are generated, column + HIGH + Similar to MEDIUM, but in addition, function definitions are generated, column data type inference and default statistics become customizable. """ From c53af867bb84dc6994dbec35a0fb2d57201940a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Luk=C3=A1ny?= Date: Wed, 9 Aug 2023 15:23:16 +0200 Subject: [PATCH 3/3] fix(review): remove redundant type hints --- edvart/report_sections/section_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/edvart/report_sections/section_base.py b/edvart/report_sections/section_base.py index c5f9aa2..923d9f2 100644 --- a/edvart/report_sections/section_base.py +++ b/edvart/report_sections/section_base.py @@ -19,9 +19,9 @@ class Verbosity(IntEnum): data type inference and default statistics become customizable. """ - LOW: int = 0 - MEDIUM: int = 1 - HIGH: int = 2 + LOW = 0 + MEDIUM = 1 + HIGH = 2 class Section(ABC):