Merge pull request #142 from pepkit/dev

v0.8.0 release
pepkit · Jan 25, 2024 · 6510c35 · 6510c35
2 parents 37c7311 + d3d4c13
commit 6510c35
Show file tree

Hide file tree

Showing 18 changed files with 427 additions and 26 deletions.
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,4 @@ dmypy.json
 /tests/data/test_file_links/link_results/
 /tests/data/default_pipeline_name_objs_summary.yaml
 /tests/data/default_pipeline_name_stats_summary.tsv
+/tests/data/portable_reports/
diff --git a/codecov.yml b/codecov.yml
@@ -14,3 +14,4 @@ ignore:
   - "*/cli.py"
   - "*/__main__.py"
   - "setup.py"
+  - "tests/"
diff --git a/docs/api_docs.md b/docs/api_docs.md
@@ -669,7 +669,7 @@ Pipestat standardizes reporting of pipeline results and pipeline status manageme
 
 
 ```python
-def __init__(self, project_name: Optional[str]=None, record_identifier: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=<function default_formatter at 0x7f47f614fbe0>, multi_pipelines: bool=False, output_dir: Optional[str]=None)
+def __init__(self, project_name: Optional[str]=None, record_identifier: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=<function default_formatter at 0x7f876c474040>, multi_pipelines: bool=False, output_dir: Optional[str]=None)
 ```
 
 Initialize the PipestatManager object
@@ -1024,4 +1024,4 @@ def table(self, *args, **kwargs)
 
 
 
-*Version Information: `pipestat` v0.6.0a11, generated by `lucidoc` v0.4.4*
+*Version Information: `pipestat` v0.8.0, generated by `lucidoc` v0.4.4*
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -2,6 +2,12 @@
 
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.
 
+## [0.8.0] - 2024-01-25
+### Added
+- added `portable` flag to pipestat summarize to create a shareable version of the html report.
+- added setting `index: True` within output schema to index specific results for DB backend.
+### Fixed
+- pipestat summarize: objects drop down now only shows sample-level
 
 ## [0.7.0] - 2024-01-17
 ### Added

diff --git a/docs/pipestat_specification.md b/docs/pipestat_specification.md
@@ -196,6 +196,24 @@ log_file:
 
 The highlighted results can be later retrieved by pipestat clients via `PipestatManager.highlighted_results` property, which simply returns a list of result identifiers.
 
+## Add indexing to a Result (database backend only)
+If you would like to index a specific result, you can add `index: True` to the result:
+
+```yaml
+title: An example Pipestat output schema
+description: A pipeline that uses pipestat to report sample and project level results.
+type: object
+properties:
+  pipeline_name: "default_pipeline_name"
+  samples:
+    type: object
+    properties:
+      number_of_things:
+        type: integer
+        description: "Number of things"
+        index: True
+```
+
 # Status schema
 
 Apart from results reporting pipestat provides a robust pipeline status management system, which can be used to report pipeline status from within the pipeline and monitor pipeline's status in other software. Status schema file defines the possible pipeline status identifiers and provides other metadata, like `description` or `color` for display purposes.

diff --git a/docs/usage.md b/docs/usage.md
@@ -6,7 +6,7 @@ Here you can see the command-line usage instructions for the main command and fo
 ## `pipestat --help`
 ```console
 Configure by setting PIPESTAT_CONFIG env var
-version: 0.6.0
+version: 0.8.0
 usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]
                 {report,inspect,remove,retrieve,status,init,summarize,link,serve}
                 ...

diff --git a/pipestat/_version.py b/pipestat/_version.py
@@ -1 +1 @@
-__version__ = "0.7.0"
+__version__ = "0.8.0"
diff --git a/pipestat/argparser.py b/pipestat/argparser.py
@@ -334,6 +334,12 @@ def add_subparser(
             help="project or sample level pipeline type. ",
         )
 
+        sps[cmd].add_argument(
+            "--portable",
+            action="store_true",
+            help="Makes html report portable.",
+        )
+
     # LINK
     for cmd in [LINK_CMD]:
         sps[cmd].add_argument(

diff --git a/pipestat/backends/db_backend/db_helpers.py b/pipestat/backends/db_backend/db_helpers.py
@@ -2,10 +2,9 @@
 from typing import Any, Dict, List, Optional, Union
 from urllib.parse import quote_plus
 
-try:
-    from sqlmodel import and_, or_, Integer, Float, String, Boolean
-except ImportError as e:
-    pass
+
+from sqlmodel import and_, or_, Integer, Float, String, Boolean
+
 
 from pipestat.exceptions import MissingConfigDataError
 

diff --git a/pipestat/backends/db_backend/db_parsed_schema.py b/pipestat/backends/db_backend/db_parsed_schema.py
@@ -151,6 +151,7 @@ def _make_field_definitions(self, data: Dict[str, Any], require_type: bool):
         # TODO: parse "required" ?
         defs = {}
         for name, subdata in data.items():
+            result_indexed = False
             try:
                 typename = subdata[SCHEMA_TYPE_KEY]
             except KeyError:
@@ -162,18 +163,26 @@ def _make_field_definitions(self, data: Dict[str, Any], require_type: bool):
             else:
                 data_type = self._get_data_type(typename)
             if data_type == CLASSES_BY_TYPE["object"] or data_type == CLASSES_BY_TYPE["array"]:
+                if "index" in subdata and subdata["index"] is True:
+                    _LOGGER.warning(f"Cannot index JSONB Column, ignoring index: True for {name} ")
                 defs[name] = (
                     data_type,
-                    Field(sa_column=Column(JSONB), default=null()),
+                    Field(
+                        sa_column=Column(JSONB),
+                        default=null(),
+                    ),
                 )
             else:
+                if "index" in subdata:
+                    if isinstance(subdata["index"], bool):
+                        result_indexed = subdata["index"]
                 defs[name] = (
-                    # Optional[subdata[SCHEMA_TYPE_KEY]],
-                    # subdata[SCHEMA_TYPE_KEY],
-                    # Optional[str],
-                    # CLASSES_BY_TYPE[subdata[SCHEMA_TYPE_KEY]],
                     data_type,
-                    Field(default=subdata.get("default"), nullable=True),
+                    Field(
+                        default=subdata.get("default"),
+                        nullable=True,
+                        index=result_indexed,
+                    ),
                 )
         return defs
 

diff --git a/pipestat/cli.py b/pipestat/cli.py
@@ -82,7 +82,8 @@ def main(test_args=None):
             pipeline_type=args.pipeline_type,
         )
         results_path = args.config or args.results_file
-        html_report_path = psm.summarize()
+        portable = args.portable or False
+        html_report_path = psm.summarize(portable=portable)
         _LOGGER.info(f"\nGenerating HTML Report from {results_path} at: {html_report_path}\n")
 
         sys.exit(0)

diff --git a/pipestat/helpers.py b/pipestat/helpers.py
@@ -4,15 +4,18 @@
 import glob
 import os
 import errno
-import yaml
+
 import jsonschema
 from json import dumps
 from pathlib import Path
+from shutil import make_archive
 from typing import Any, Dict, Optional, Tuple, Union, List
 
-from oyaml import safe_load
+from oyaml import safe_load, dump
 from ubiquerg import expandpath
 
+from zipfile import ZipFile, ZIP_DEFLATED
+
 from .const import (
     PIPESTAT_GENERIC_CONFIG,
     SCHEMA_PROP_KEY,
@@ -164,7 +167,7 @@ def init_generic_config():
     # Write file
     if not os.path.exists(dest_file):
         with open(dest_file, "w") as file:
-            yaml.dump(generic_config_dict, file)
+            dump(generic_config_dict, file)
         print(f"Generic configuration file successfully created at: {dest_file}")
     else:
         print(f"Generic configuration file already exists `{dest_file}`. Skipping creation..")
@@ -229,3 +232,26 @@ def get_all_result_files(results_file_path: str) -> List:
     files = glob.glob(results_file_path + "**/*.yaml")
 
     return files
+
+
+def zip_report(report_dir_name: str):
+    """
+
+    Walks through files and attempts to zip them into a Zip object using default compression.
+    Gracefully fails and informs user if compression library is not available.
+
+    :param report_dir_name: directory name of report directory
+    :return: None
+    """
+
+    zip_file_name = f"{report_dir_name}_report_portable"
+
+    try:
+        make_archive(zip_file_name, "zip", report_dir_name)
+    except RuntimeError as e:
+        _LOGGER.warning("Report zip file not created! \n {e}")
+
+    if os.path.exists(zip_file_name + ".zip"):
+        _LOGGER.info(f"Report zip file successfully created: {zip_file_name}")
+    else:
+        _LOGGER.warning("Report zip file not created.")
diff --git a/pipestat/pipestat.py b/pipestat/pipestat.py
@@ -22,7 +22,7 @@
 )
 from pipestat.backends.file_backend.filebackend import FileBackend
 from .reports import HTMLReportBuilder, _create_stats_objs_summaries
-from .helpers import validate_type, mk_abs_via_cfg, read_yaml_data, default_formatter
+from .helpers import validate_type, mk_abs_via_cfg, read_yaml_data, default_formatter, zip_report
 from .const import (
     PKG_NAME,
     DEFAULT_PIPELINE_NAME,
@@ -766,23 +766,29 @@ def summarize(
         self,
         looper_samples: Optional[list] = None,
         amendment: Optional[str] = None,
+        portable: Optional[bool] = False,
     ) -> None:
         """
         Builds a browsable html report for reported results.
         :param Iterable[str] looper_samples: list of looper Samples from PEP
         :param Iterable[str] amendment: name indicating amendment to use, optional
+        :param bool portable: moves figures and report files to directory for easy sharing
         :return str: report_path
 
         """
 
         self.check_multi_results()
 
-        html_report_builder = HTMLReportBuilder(prj=self)
+        html_report_builder = HTMLReportBuilder(prj=self, portable=portable)
         report_path = html_report_builder(
             pipeline_name=self.cfg[PIPELINE_NAME],
             amendment=amendment,
             looper_samples=looper_samples,
         )
+
+        if portable is True:
+            zip_report(report_dir_name=os.path.dirname(report_path))
+
         return report_path
 
     def check_multi_results(self):