Skip to content

Commit

Permalink
pythonise interval join and fix LEFT join issue
Browse files Browse the repository at this point in the history
  • Loading branch information
nils-drechsel committed Dec 7, 2023
1 parent 3b2c480 commit 726e1d9
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 107 deletions.
12 changes: 6 additions & 6 deletions Preparation/Join/Interval Join/manifest.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"@visokiotype": "CustomBlockSchema.CustomBlockManifest",
"name": "Interval Join",
"scriptFilename": "script.R",
"language": "R",
"scriptFilename": "script.py",
"language": "PYTHON",
"executableVersion": null,
"minVersions": [
null
Expand All @@ -21,10 +21,10 @@
"join"
],
"introductoryText": "The first input contains values and the second input intervals. The rows of the first input are now joined with rows of the second, if the value is contained in the interval.\n\nThe interval start and end values can be chosen to be either inclusive or exclusive. If the number is e.g. 5, and an interval is 5..10, then if inclusive is chosen, the rows are joined. If exclusive is chosen, they are not joined.\n\nIn addition, it is possible to specify an optional equality criterium. Only those records are interval-joined if they also contain the same value in those specified fields.",
"dependencies": "",
"dependencies": "pandasql",
"options": [
{
"name": "joinType",
"name": "join_type",
"title": "Type of join",
"description": null,
"groupTitle": "Interval Join",
Expand Down Expand Up @@ -82,7 +82,7 @@
"fieldTypes": []
},
{
"name": "startOperator",
"name": "start_comparator",
"title": "Interval start value inclusiveness",
"description": "If value is e.g. 5 and the interval is 5..10: If inclusive is chosen, then value is considered contained in the interval. If exclusive is chosen, then the value is not contained.",
"groupTitle": "Interval Join",
Expand Down Expand Up @@ -128,7 +128,7 @@
"fieldTypes": []
},
{
"name": "endOperator",
"name": "end_comparator",
"title": "Interval end value inclusiveness",
"description": "If value is e.g. 10 and the interval is 5..10: If inclusive is chosen, then value is considered contained in the interval. If exclusive is chosen, then the value is not contained.",
"groupTitle": "Interval Join",
Expand Down
101 changes: 0 additions & 101 deletions Preparation/Join/Interval Join/script.R

This file was deleted.

65 changes: 65 additions & 0 deletions Preparation/Join/Interval Join/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from omniscope.api import OmniscopeApi
omniscope_api = OmniscopeApi()
from pandasql import sqldf
import pandas as pd

# read the records associated to the first block input
left = omniscope_api.read_input_records(input_number=0)
right = omniscope_api.read_input_records(input_number=1)

# read the value of the option called "my_option"
# my_option = omniscope_api.get_option("my_option")

value_field = omniscope_api.get_option("value")
start_field = omniscope_api.get_option("start")
start_comparator = omniscope_api.get_option("start_comparator")
end_field = omniscope_api.get_option("end")
end_comparator = omniscope_api.get_option("end_comparator")
join_type = omniscope_api.get_option("join_type")

left_eq_field = omniscope_api.get_option("left")
right_eq_field = omniscope_api.get_option("right")

if left is None:
omniscope_api.abort("missing first input")

if right is None:
omniscope_api.abort("missing right input")

q = f"""
SELECT A.*, B.*
FROM
left A
{join_type} JOIN
right B
ON
A."{value_field}" {start_comparator} B."{start_field}" AND
A."{value_field}" {end_comparator} B."{end_field}"
"""

if left_eq_field is not None and right_eq_field is not None:
q = q + f""" AND A."{left_eq_field}" = B."{right_eq_field}";"""
else:
q = q + ";"

df = sqldf(q, globals())




cols=pd.Series(df.columns)
for dup in df.columns[df.columns.duplicated(keep=False)]:
cols[df.columns.get_loc(dup)] = ([dup + '.' + str(d_idx)
if d_idx != 0
else dup
for d_idx in range(df.columns.get_loc(dup).sum())]
)
df.columns=cols

output_data = df


#write the output records in the first output
if output_data is not None:
omniscope_api.write_output_records(output_data, output_number=0)
omniscope_api.close()

0 comments on commit 726e1d9

Please sign in to comment.