Skip to content

Commit

Permalink
New Json Expand Fields block
Browse files Browse the repository at this point in the history
  • Loading branch information
toniopoggi authored Jun 5, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent fec34ed commit 0296f1e
Showing 3 changed files with 118 additions and 0 deletions.
12 changes: 12 additions & 0 deletions Preparation/JSON/Expand_Fields/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# JSON Expand Fields

## Expands JSON strings in a specified field into separate columns, optionally including the original input data.

## Language
Python

## Dependencies
n/a

## Source
[script.py](https://github.com/visokio/omniscope-custom-blocks/blob/master/Preparation/JSON/Expand_Fields/script.py)
68 changes: 68 additions & 0 deletions Preparation/JSON/Expand_Fields/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{
"@visokiotype": "CustomBlockSchema.CustomBlockManifest",
"name": "JSON Expand fields",
"scriptFilename": "script.py",
"language": "PYTHON",
"executableVersion": null,
"minVersions": [
null
],
"optionsVersion": 1,
"apiVersion": "VERSION_0",
"isResourceIntensiveScript": false,
"showPartitioning": false,
"icon": "",
"description": "Expands JSON strings in a specified field into separate columns, optionally including the original input data",
"category": "Preparation",
"subcategory": "JSON",
"tags": [
"JSON",
"expand",
"fields",
"semi",
"structured"
],
"introductoryText": "### Expands JSON strings in a specified field into separate columns, optionally including the original input data",
"dependencies": "",
"options": [
{
"name": "jsonField",
"title": "JSON Strings",
"description": "The field containing JSON strings to be expanded as additional fields",
"groupTitle": null,
"width": "ONE",
"@visokiotype": "CustomBlockSchema.FieldCustomBlockPublicOption",
"mandatory": true,
"inputIndex": 0,
"defaultValue": null,
"fieldTypes": []
},
{
"name": "includeInput",
"title": "Include input dataset",
"description": null,
"groupTitle": null,
"width": "ONE",
"@visokiotype": "CustomBlockSchema.BooleanCustomBlockPublicOption",
"defaultValue": true
}
],
"blockOutputs": [
{
"@visokiotype": "CustomBlockSchema.BlockOutputPublicOption",
"id": "Output Data",
"label": "Expanded dataset",
"displayName": "Expanded dataset",
"tooltip": null
}
],
"docker": {
"@visokiotype": "CustomBlockSchema.DockerCustomBlockPublicOption",
"customBaseImage": null,
"useCustomBaseImage": false,
"customSystemLibraries": "",
"installVisokioRepLibraries": false
},
"designLock": false,
"apiMode": "BATCH"
}
38 changes: 38 additions & 0 deletions Preparation/JSON/Expand_Fields/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from omniscope.api import OmniscopeApi
import pandas as pd
import json
omniscope_api = OmniscopeApi()

# read the records associated to the first block input
input_data = omniscope_api.read_input_records(input_number=0)

jsonField = omniscope_api.get_option("jsonField")
includeInput = omniscope_api.get_option("includeInput")

output_data = None
dataframes_list = [] # List to store each dataframe

for index, row in input_data.iterrows():
if not(isinstance(row[jsonField], str)):
continue
jsonString = str(row[jsonField])
if not jsonString:
continue

dictJson = json.loads(jsonString)
dataframe = pd.json_normalize(dictJson)
dataframe = dataframe.add_prefix(jsonField+'_')

if includeInput:
new_cols = list(input_data.columns.values)
dataframe[new_cols] = row.values.tolist()

dataframes_list.append(dataframe)

# Concatenate all dataframes in the list
output_data = pd.concat(dataframes_list, ignore_index=True)

#write the output records in the first output
if output_data is not None:
omniscope_api.write_output_records(output_data, output_number=0)
omniscope_api.close()

0 comments on commit 0296f1e

Please sign in to comment.