Skip to content

Commit

Permalink
Parse legacy reports
Browse files Browse the repository at this point in the history
  • Loading branch information
vladsavelyev committed Feb 19, 2024
1 parent 1b99b9f commit 63dc17d
Showing 1 changed file with 180 additions and 83 deletions.
263 changes: 180 additions & 83 deletions megaqc/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,33 +199,25 @@ def handle_report_data(user, report_data):
]:
continue
# Save the plot config as a JSON string
config = json.dumps(report_data["report_plot_data"][plot]["config"])
for dst_idx, dataset in enumerate(
report_data["report_plot_data"][plot]["datasets"]
):
try:
if isinstance(
report_data["report_plot_data"][plot]["config"]["data_labels"][
dst_idx
],
dict,
):
dataset_name = report_data["report_plot_data"][plot]["config"][
"data_labels"
][dst_idx]["ylab"]
config = copy.deepcopy(report_data["report_plot_data"][plot]["config"])
dls = None
dataset_name = None
if "data_labels" in config and dst_idx < len(config["data_labels"]):
dls = config["data_labels"][dst_idx]
if isinstance(dls, dict):
if "ylab" in dls:
dataset_name = dls["ylab"]
for k, v in dls.items():
config[k] = v
else:
dataset_name = report_data["report_plot_data"][plot]["config"][
"data_labels"
][dst_idx]
except KeyError:
try:
dataset_name = report_data["report_plot_data"][plot]["config"][
"ylab"
]
except KeyError:
dataset_name = report_data["report_plot_data"][plot]["config"][
"title"
]
dataset_name = dls
if not dataset_name and "ylab" in config:
dataset_name = config["ylab"]
elif not dataset_name and "title" in config:
dataset_name = config["title"]

plot_config = (
db.session.query(PlotConfig)
Expand All @@ -242,27 +234,77 @@ def handle_report_data(user, report_data):
config_type=report_data["report_plot_data"][plot]["plot_type"],
config_name=plot,
config_dataset=dataset_name,
data=config,
data=json.dumps(config),
)
plot_config.save()
new_plotcfg_cnt += 1
config_id = plot_config.config_id

# Save bar graph data
if report_data["report_plot_data"][plot]["plot_type"] == "bar_graph":
for cat_data in dataset["cats"]:
data_key = str(cat_data["name"])
if "cats" in dataset:
# Plotly-backed MultiQC 1.20+
for cat_data in dataset["cats"]:
data_key = str(cat_data["name"])
existing_category = (
db.session.query(PlotCategory)
.filter(PlotCategory.category_name == data_key)
.first()
)
data = json.dumps(
{
x: y
for x, y in list(cat_data.items())
if x not in ["data", "data_pct"]
}
)
if not existing_category:
existing_category = PlotCategory(
report_id=report_id,
config_id=config_id,
category_name=data_key,
data=data,
)
existing_category.save()
else:
existing_category.data = data
existing_category.save()
for sname, actual_data in zip(
dataset["samples"], cat_data["data"]
):
existing_sample = (
db.session.query(Sample)
.filter(Sample.sample_name == sname)
.first()
)
if existing_sample:
sample_id = existing_sample.sample_id
else:
new_sample = Sample(
sample_name=sname, report_id=report_id
)
new_sample.save()
sample_id = new_sample.sample_id
new_dataset_row = PlotData(
report_id=report_id,
config_id=config_id,
sample_id=sample_id,
plot_category_id=existing_category.plot_category_id,
data=json.dumps(actual_data),
)
new_dataset_row.save()
new_plotdata_cnt += 1
continue
# Legacy MultiQC < 1.20:
for sub_dict in dataset:
data_key = str(sub_dict["name"])
existing_category = (
db.session.query(PlotCategory)
.filter(PlotCategory.category_name == data_key)
.first()
)
data = json.dumps(
{
x: y
for x, y in list(cat_data.items())
if x not in ["data", "data_pct"]
}
{x: y for x, y in list(sub_dict.items()) if x != "data"}
)
if not existing_category:
existing_category = PlotCategory(
Expand All @@ -275,52 +317,107 @@ def handle_report_data(user, report_data):
else:
existing_category.data = data
existing_category.save()
for sname, actual_data in zip(dataset["samples"], cat_data["data"]):
existing_sample = (
db.session.query(Sample)
.filter(Sample.sample_name == sname)
category_id = existing_category.plot_category_id
for sa_idx, actual_data in enumerate(sub_dict["data"]):
try:
existing_sample = (
db.session.query(Sample)
.filter(
Sample.sample_name
== report_data["report_plot_data"][plot]["samples"][
dst_idx
][sa_idx]
)
.first()
)
if existing_sample:
sample_id = existing_sample.sample_id
else:
new_sample = Sample(
sample_name=sub_dict["name"], report_id=report_id
)
new_sample.save()
sample_id = new_sample.sample_id
new_dataset_row = PlotData(
report_id=report_id,
config_id=config_id,
sample_id=sample_id,
plot_category_id=existing_category.plot_category_id,
data=json.dumps(actual_data),
)
new_dataset_row.save()
new_plotdata_cnt += 1
except Exception as e:
current_app.logger.error(
"Error saving plot data: {}".format(str(e))
)
raise

# Save line plot data
elif report_data["report_plot_data"][plot]["plot_type"] == "xy_line":
if dls and "ylab" in dls:
data_key = dls["ylab"]
elif "ylab" in config:
data_key = config["ylab"]
else:
data_key = config["title"]

if "lines" in dataset:
# Plotly-backed MultiQC 1.20+
for line_data in dataset["lines"]:
existing_category = (
db.session.query(PlotCategory)
.filter(PlotCategory.category_name == data_key)
.first()
)
if existing_sample:
sample_id = existing_sample.sample_id
data = json.dumps(
{x: y for x, y in list(line_data.items()) if x != "data"}
)
if not existing_category:
existing_category = PlotCategory(
report_id=report_id,
config_id=config_id,
category_name=data_key,
data=data,
)
existing_category.save()
else:
new_sample = Sample(sample_name=sname, report_id=report_id)
new_sample.save()
sample_id = new_sample.sample_id
existing_category.data = data
existing_category.save()
category_id = existing_category.plot_category_id

sample = (
db.session.query(Sample)
.filter(Sample.sample_name == line_data["name"])
.first()
)
if not sample:
sample = Sample(
sample_name=line_data["name"], report_id=report_id
)
sample.save()
sample_id = sample.sample_id

new_dataset_row = PlotData(
report_id=report_id,
config_id=config_id,
sample_id=sample_id,
plot_category_id=existing_category.plot_category_id,
data=json.dumps(actual_data),
plot_category_id=category_id,
data=json.dumps(line_data["data"]),
)
new_dataset_row.save()
new_plotdata_cnt += 1

# Save line plot data
elif report_data["report_plot_data"][plot]["plot_type"] == "xy_line":
for line_data in dataset["lines"]:
try:
data_key = report_data["report_plot_data"][plot]["config"][
"data_labels"
][dst_idx]["ylab"]
except (KeyError, TypeError):
try:
data_key = report_data["report_plot_data"][plot]["config"][
"ylab"
]
except KeyError:
data_key = report_data["report_plot_data"][plot]["config"][
"title"
]
new_dataset_row.save()
new_plotdata_cnt += 1
continue

# Legacy MultiQC < 1.20
for sub_dict in dataset:
existing_category = (
db.session.query(PlotCategory)
.filter(PlotCategory.category_name == data_key)
.first()
)
data = json.dumps(
{x: y for x, y in list(line_data.items()) if x != "data"}
{x: y for x, y in list(sub_dict.items()) if x != "data"}
)
if not existing_category:
existing_category = PlotCategory(
Expand All @@ -335,27 +432,28 @@ def handle_report_data(user, report_data):
existing_category.save()
category_id = existing_category.plot_category_id

sample = (
db.session.query(Sample)
.filter(Sample.sample_name == line_data["name"])
.first()
)
if not sample:
sample = Sample(
sample_name=line_data["name"], report_id=report_id
for sa_idx, actual_data in enumerate(sub_dict["data"]):
sample = (
db.session.query(Sample)
.filter(Sample.sample_name == sub_dict["name"])
.first()
)
sample.save()
sample_id = sample.sample_id

new_dataset_row = PlotData(
report_id=report_id,
config_id=config_id,
sample_id=sample_id,
plot_category_id=category_id,
data=json.dumps(line_data["data"]),
)
new_dataset_row.save()
new_plotdata_cnt += 1
if not sample:
sample = Sample(
sample_name=sub_dict["name"], report_id=report_id
)
sample.save()
sample_id = sample.sample_id

new_dataset_row = PlotData(
report_id=report_id,
config_id=config_id,
sample_id=sample_id,
plot_category_id=category_id,
data=json.dumps(actual_data),
)
new_dataset_row.save()
new_plotdata_cnt += 1
current_app.logger.info(
"Finished writing plot data ({} cfg, {} data points) for report {}".format(
new_plotcfg_cnt, new_plotdata_cnt, new_report.report_id
Expand Down Expand Up @@ -509,7 +607,6 @@ def generate_report_plot(plot_type, sample_names):
for d_idx, d in enumerate(data):
xs.append(" " + str(config["categories"][d_idx]))
ys.append(d)

else:
for d in data:
xs.append(d[0])
Expand Down

0 comments on commit 63dc17d

Please sign in to comment.