Skip to content

Commit

Permalink
Merge pull request #142 from pepkit/dev
Browse files Browse the repository at this point in the history
v0.8.0 release
  • Loading branch information
donaldcampbelljr authored Jan 25, 2024
2 parents 37c7311 + d3d4c13 commit 6510c35
Show file tree
Hide file tree
Showing 18 changed files with 427 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,4 @@ dmypy.json
/tests/data/test_file_links/link_results/
/tests/data/default_pipeline_name_objs_summary.yaml
/tests/data/default_pipeline_name_stats_summary.tsv
/tests/data/portable_reports/
1 change: 1 addition & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ ignore:
- "*/cli.py"
- "*/__main__.py"
- "setup.py"
- "tests/"
4 changes: 2 additions & 2 deletions docs/api_docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ Pipestat standardizes reporting of pipeline results and pipeline status manageme


```python
def __init__(self, project_name: Optional[str]=None, record_identifier: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=<function default_formatter at 0x7f47f614fbe0>, multi_pipelines: bool=False, output_dir: Optional[str]=None)
def __init__(self, project_name: Optional[str]=None, record_identifier: Optional[str]=None, schema_path: Optional[str]=None, results_file_path: Optional[str]=None, database_only: Optional[bool]=True, config_file: Optional[str]=None, config_dict: Optional[dict]=None, flag_file_dir: Optional[str]=None, show_db_logs: bool=False, pipeline_type: Optional[str]=None, pipeline_name: Optional[str]='default_pipeline_name', result_formatter: staticmethod=<function default_formatter at 0x7f876c474040>, multi_pipelines: bool=False, output_dir: Optional[str]=None)
```

Initialize the PipestatManager object
Expand Down Expand Up @@ -1024,4 +1024,4 @@ def table(self, *args, **kwargs)



*Version Information: `pipestat` v0.6.0a11, generated by `lucidoc` v0.4.4*
*Version Information: `pipestat` v0.8.0, generated by `lucidoc` v0.4.4*
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.8.0] - 2024-01-25
### Added
- added `portable` flag to pipestat summarize to create a shareable version of the html report.
- added setting `index: True` within output schema to index specific results for DB backend.
### Fixed
- pipestat summarize: objects drop down now only shows sample-level

## [0.7.0] - 2024-01-17
### Added
Expand Down
18 changes: 18 additions & 0 deletions docs/pipestat_specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,24 @@ log_file:

The highlighted results can be later retrieved by pipestat clients via `PipestatManager.highlighted_results` property, which simply returns a list of result identifiers.

## Add indexing to a Result (database backend only)
If you would like to index a specific result, you can add `index: True` to the result:

```yaml
title: An example Pipestat output schema
description: A pipeline that uses pipestat to report sample and project level results.
type: object
properties:
pipeline_name: "default_pipeline_name"
samples:
type: object
properties:
number_of_things:
type: integer
description: "Number of things"
index: True
```

# Status schema

Apart from results reporting pipestat provides a robust pipeline status management system, which can be used to report pipeline status from within the pipeline and monitor pipeline's status in other software. Status schema file defines the possible pipeline status identifiers and provides other metadata, like `description` or `color` for display purposes.
Expand Down
2 changes: 1 addition & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Here you can see the command-line usage instructions for the main command and fo
## `pipestat --help`
```console
Configure by setting PIPESTAT_CONFIG env var
version: 0.6.0
version: 0.8.0
usage: pipestat [-h] [--version] [--silent] [--verbosity V] [--logdev]
{report,inspect,remove,retrieve,status,init,summarize,link,serve}
...
Expand Down
2 changes: 1 addition & 1 deletion pipestat/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.0"
__version__ = "0.8.0"
6 changes: 6 additions & 0 deletions pipestat/argparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,12 @@ def add_subparser(
help="project or sample level pipeline type. ",
)

sps[cmd].add_argument(
"--portable",
action="store_true",
help="Makes html report portable.",
)

# LINK
for cmd in [LINK_CMD]:
sps[cmd].add_argument(
Expand Down
7 changes: 3 additions & 4 deletions pipestat/backends/db_backend/db_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
from typing import Any, Dict, List, Optional, Union
from urllib.parse import quote_plus

try:
from sqlmodel import and_, or_, Integer, Float, String, Boolean
except ImportError as e:
pass

from sqlmodel import and_, or_, Integer, Float, String, Boolean


from pipestat.exceptions import MissingConfigDataError

Expand Down
21 changes: 15 additions & 6 deletions pipestat/backends/db_backend/db_parsed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def _make_field_definitions(self, data: Dict[str, Any], require_type: bool):
# TODO: parse "required" ?
defs = {}
for name, subdata in data.items():
result_indexed = False
try:
typename = subdata[SCHEMA_TYPE_KEY]
except KeyError:
Expand All @@ -162,18 +163,26 @@ def _make_field_definitions(self, data: Dict[str, Any], require_type: bool):
else:
data_type = self._get_data_type(typename)
if data_type == CLASSES_BY_TYPE["object"] or data_type == CLASSES_BY_TYPE["array"]:
if "index" in subdata and subdata["index"] is True:
_LOGGER.warning(f"Cannot index JSONB Column, ignoring index: True for {name} ")
defs[name] = (
data_type,
Field(sa_column=Column(JSONB), default=null()),
Field(
sa_column=Column(JSONB),
default=null(),
),
)
else:
if "index" in subdata:
if isinstance(subdata["index"], bool):
result_indexed = subdata["index"]
defs[name] = (
# Optional[subdata[SCHEMA_TYPE_KEY]],
# subdata[SCHEMA_TYPE_KEY],
# Optional[str],
# CLASSES_BY_TYPE[subdata[SCHEMA_TYPE_KEY]],
data_type,
Field(default=subdata.get("default"), nullable=True),
Field(
default=subdata.get("default"),
nullable=True,
index=result_indexed,
),
)
return defs

Expand Down
3 changes: 2 additions & 1 deletion pipestat/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def main(test_args=None):
pipeline_type=args.pipeline_type,
)
results_path = args.config or args.results_file
html_report_path = psm.summarize()
portable = args.portable or False
html_report_path = psm.summarize(portable=portable)
_LOGGER.info(f"\nGenerating HTML Report from {results_path} at: {html_report_path}\n")

sys.exit(0)
Expand Down
32 changes: 29 additions & 3 deletions pipestat/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
import glob
import os
import errno
import yaml

import jsonschema
from json import dumps
from pathlib import Path
from shutil import make_archive
from typing import Any, Dict, Optional, Tuple, Union, List

from oyaml import safe_load
from oyaml import safe_load, dump
from ubiquerg import expandpath

from zipfile import ZipFile, ZIP_DEFLATED

from .const import (
PIPESTAT_GENERIC_CONFIG,
SCHEMA_PROP_KEY,
Expand Down Expand Up @@ -164,7 +167,7 @@ def init_generic_config():
# Write file
if not os.path.exists(dest_file):
with open(dest_file, "w") as file:
yaml.dump(generic_config_dict, file)
dump(generic_config_dict, file)
print(f"Generic configuration file successfully created at: {dest_file}")
else:
print(f"Generic configuration file already exists `{dest_file}`. Skipping creation..")
Expand Down Expand Up @@ -229,3 +232,26 @@ def get_all_result_files(results_file_path: str) -> List:
files = glob.glob(results_file_path + "**/*.yaml")

return files


def zip_report(report_dir_name: str):
"""
Walks through files and attempts to zip them into a Zip object using default compression.
Gracefully fails and informs user if compression library is not available.
:param report_dir_name: directory name of report directory
:return: None
"""

zip_file_name = f"{report_dir_name}_report_portable"

try:
make_archive(zip_file_name, "zip", report_dir_name)
except RuntimeError as e:
_LOGGER.warning("Report zip file not created! \n {e}")

if os.path.exists(zip_file_name + ".zip"):
_LOGGER.info(f"Report zip file successfully created: {zip_file_name}")
else:
_LOGGER.warning("Report zip file not created.")
10 changes: 8 additions & 2 deletions pipestat/pipestat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from pipestat.backends.file_backend.filebackend import FileBackend
from .reports import HTMLReportBuilder, _create_stats_objs_summaries
from .helpers import validate_type, mk_abs_via_cfg, read_yaml_data, default_formatter
from .helpers import validate_type, mk_abs_via_cfg, read_yaml_data, default_formatter, zip_report
from .const import (
PKG_NAME,
DEFAULT_PIPELINE_NAME,
Expand Down Expand Up @@ -766,23 +766,29 @@ def summarize(
self,
looper_samples: Optional[list] = None,
amendment: Optional[str] = None,
portable: Optional[bool] = False,
) -> None:
"""
Builds a browsable html report for reported results.
:param Iterable[str] looper_samples: list of looper Samples from PEP
:param Iterable[str] amendment: name indicating amendment to use, optional
:param bool portable: moves figures and report files to directory for easy sharing
:return str: report_path
"""

self.check_multi_results()

html_report_builder = HTMLReportBuilder(prj=self)
html_report_builder = HTMLReportBuilder(prj=self, portable=portable)
report_path = html_report_builder(
pipeline_name=self.cfg[PIPELINE_NAME],
amendment=amendment,
looper_samples=looper_samples,
)

if portable is True:
zip_report(report_dir_name=os.path.dirname(report_path))

return report_path

def check_multi_results(self):
Expand Down
Loading

0 comments on commit 6510c35

Please sign in to comment.