Skip to content

Commit

Permalink
Pipeline: Namespace management update. (#29)
Browse files Browse the repository at this point in the history
* Version update for v4.0

* Pipeline update for namespace management, version files will need to be corrected with core and computation commit.

update_commits.py is updated but not tested.

* Pipeline update for namespace management, version files will need to be corrected with core, computation, controlledTerms last PRs.

update_commits.py is updated but not tested.

* Code simplification, variables renaming, version.json/dev file modifications.

* Code simplification, lowercase of "acronym"-alike modules, versions.json update.

* Typo in the pipeline name.

* Signatures cleanup.

* Addition of the attribute "_module".

* Missing line break.

---------

Co-authored-by: raphaelgazzotti <[email protected]>
  • Loading branch information
Raphael-Gazzotti and raphaelgazzotti authored Nov 7, 2024
1 parent 8c61e7e commit 65c8706
Show file tree
Hide file tree
Showing 10 changed files with 399 additions and 161 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# MIT licensed
name: openMINDS_build_pipline
name: openMINDS_build_pipeline

on:
push:
Expand Down Expand Up @@ -55,4 +55,3 @@ jobs:
else
echo "Nothing to commit"
fi
7 changes: 2 additions & 5 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse

import sys

from openMINDS_pipeline.models import DirectoryStructure, Trigger
Expand All @@ -22,18 +21,17 @@
clone_central(True)

# Step 1 - find the versions to be (re-)built
relevant_versions = evaluate_versions_to_be_built(args["config"], trigger)
relevant_versions, namespaces = evaluate_versions_to_be_built(args["config"], trigger)

for version, modules in relevant_versions.items():

DirectoryStructure.clear_directory(directory_structure.expanded_directory)
DirectoryStructure.clear_directory(directory_structure.source_directory)

# Step 2 - Clone all required resources for the aggregation
clone_sources(modules, version)

# Step 3 - Find all involved schemas
all_schemas = find_schemas(directory_structure, modules)
all_schemas = find_schemas(directory_structure, modules, namespaces[version])

# Step 4 - Resolve all "_extends" directives and save to target directory
resolve_extends(all_schemas, directory_structure)
Expand All @@ -56,7 +54,6 @@
# Step 10 - Copy results to the target directory
copy_to_target_directory(directory_structure, version)


if not trigger:
# We've built everything - this is the only chance to do a proper cleanup at the end because we know all versions have been processed.
Types(directory_structure).clean_types()
Expand Down
3 changes: 1 addition & 2 deletions openMINDS_pipeline/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
OPENMINDS_VOCAB="https://openminds.ebrains.eu/vocab/"
SCHEMA_FILE_ENDING = ".schema.tpl.json"
INSTANCE_FILE_ENDING = ".jsonld"
INSTANCE_FILE_ENDING = ".jsonld"
5 changes: 3 additions & 2 deletions openMINDS_pipeline/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ class Trigger:


class SchemaStructure:
def __init__(self, type, schema_group, version, file):
def __init__(self, type, schema_group, version, file, namespaces):
self.type = type
self.schema_group = schema_group
self.file = file
self.version = version
self.namespaces = namespaces
self.categories = None
self.absolute_path = None

Expand Down Expand Up @@ -75,7 +76,7 @@ def find_resource_directories(self, file_ending) -> List[str]:
resource_directories = set()
for source in glob.glob(os.path.join(self.source_directory, f'**/*{file_ending}'), recursive=True):
resource_dir = os.path.dirname(source)[len(self.source_directory) + 1:]
if ("target" not in resource_dir and "expanded" not in resource_dir):
if "target" not in resource_dir and "expanded" not in resource_dir:
path_split = resource_dir.split("/")
if len(path_split) == 1:
resource_directories.add(path_split[0])
Expand Down
40 changes: 36 additions & 4 deletions openMINDS_pipeline/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
TEMPLATE_PROPERTY_CATEGORIES = "_categories"
TEMPLATE_PROPERTY_LINKED_CATEGORIES = "_linkedCategories"
TEMPLATE_PROPERTY_EMBEDDED_CATEGORIES = "_embeddedCategories"
TEMPLATE_PROPERTY_MODULE = "_module"


def resolve_extends(schemas: List[SchemaStructure], directory_structure: DirectoryStructure):
Expand Down Expand Up @@ -41,7 +42,7 @@ def resolve_categories(version:str, directory_structure: DirectoryStructure, sch
schemas_by_category = _schemas_by_category(schemas)
for schema in schemas:
print(f"resolving categories for {schema.type}")
_do_resolve_categories(schema, schemas_by_category)
_do_resolve_categories(version, schema, schemas_by_category)
categories[version] = schemas_by_category
_save_categories(directory_structure, categories)

Expand Down Expand Up @@ -71,12 +72,23 @@ def _schemas_by_category(schemas: List[SchemaStructure]) -> Dict[str, List[str]]
for c in s.categories:
if c not in result:
result[c] = []
result[c].append(s.type)
# lowercase "acronym"-alike modules
schema_group_normalized = s.schema_group.lower() if s.schema_group.isupper() else s.schema_group
result[c].append(schema_group_normalized + ':' + s.type)
result[c].sort()
return result


def _do_resolve_extends(source_schema, schema, schema_group, directory_structure: DirectoryStructure):
# Autocomplete with the correct namespace, just rebuild it for older versions (replace part)
if TEMPLATE_PROPERTY_TYPE in schema:
schema_group_normalized = source_schema.schema_group.lower() if source_schema.schema_group.isupper() else source_schema.schema_group
schema[TEMPLATE_PROPERTY_TYPE] = source_schema.namespaces['types'].replace('{MODULE}',
schema_group_normalized) + \
schema[TEMPLATE_PROPERTY_TYPE].split(":")[-1].split("/")[-1]
# Add schema module
schema[TEMPLATE_PROPERTY_MODULE] = source_schema.schema_group

if TEMPLATE_PROPERTY_EXTENDS in schema:
if schema[TEMPLATE_PROPERTY_EXTENDS].startswith("/"):
extends_split = schema[TEMPLATE_PROPERTY_EXTENDS].split("/")
Expand Down Expand Up @@ -129,7 +141,20 @@ def _apply_extension(source, extension):
source["properties"][k] = extension["properties"][k]


def _do_resolve_categories(schema: SchemaStructure, schemas_by_category):
def _do_resolve_categories(version:str, schema: SchemaStructure, schemas_by_category):

def _namespace_completion_categories(schema_payload, schema, p, template_property):
def _build_namespace_type(_type):
# if _type is an URI rebuild it
# else _type consists of prefix:name_type
module = _type.split("/")[-2] if '/' in _type else _type.split(":")[0]
name_type = _type.split("/")[-1] if '/' in _type else _type.split(":")[-1]
return schema.namespaces['types'].replace('{MODULE}', module) + name_type

schema_payload["properties"][p][template_property] = [
_build_namespace_type(_type) for _type in schema_payload["properties"][p][template_property]]
return schema_payload

with open(schema.absolute_path, "r") as schema_file:
schema_payload = json.load(schema_file)
if "properties" in schema_payload:
Expand All @@ -150,6 +175,13 @@ def _do_resolve_categories(schema: SchemaStructure, schemas_by_category):
embedded_types.extend(schemas_by_category[embedded_category])
schema_payload["properties"][p][TEMPLATE_PROPERTY_EMBEDDED_TYPES] = sorted(embedded_types)
del schema_payload["properties"][p][TEMPLATE_PROPERTY_EMBEDDED_CATEGORIES]

# Write namespace for '_linkedTypes' and '_embeddedTypes'
if TEMPLATE_PROPERTY_LINKED_TYPES in schema_payload["properties"][p]:
_namespace_completion_categories(schema_payload, schema, p, TEMPLATE_PROPERTY_LINKED_TYPES)

if TEMPLATE_PROPERTY_EMBEDDED_TYPES in schema_payload["properties"][p]:
_namespace_completion_categories(schema_payload, schema, p, TEMPLATE_PROPERTY_EMBEDDED_TYPES)

with open(schema.absolute_path, "w") as target_file:
target_file.write(json.dumps(schema_payload, indent=2))

42 changes: 25 additions & 17 deletions openMINDS_pipeline/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from packaging.utils import canonicalize_version
from packaging.version import Version

from openMINDS_pipeline.constants import SCHEMA_FILE_ENDING, OPENMINDS_VOCAB
from openMINDS_pipeline.constants import SCHEMA_FILE_ENDING
from openMINDS_pipeline.models import Trigger, OpenMINDSModule, DirectoryStructure, SchemaStructure
from openMINDS_pipeline.resolver import TEMPLATE_PROPERTY_TYPE

Expand Down Expand Up @@ -47,7 +47,7 @@ def get_basic_type(property_definition:dict) -> Optional[str]:
return basic_type


def evaluate_versions_to_be_built(version_config: str, trigger:Optional[Trigger]) -> Dict[str, Dict[str, OpenMINDSModule]]:
def evaluate_versions_to_be_built(version_config: str, trigger:Optional[Trigger]) -> (Dict[str, Dict[str, OpenMINDSModule]], Dict[str, str]):
"""
:return: the dictionary describing all versions supposed to be built either because of a change or because of a build of everything.
"""
Expand All @@ -65,22 +65,29 @@ def evaluate_versions_to_be_built(version_config: str, trigger:Optional[Trigger]
if os.path.exists("pipeline"):
shutil.rmtree("pipeline")
relevant_versions = {}
for version, modules in versions.items():
namespaces = {}

for version, bundle in versions.items():
triggering_module = None
is_dynamic = False
new_modules = {}
for module, module_spec in modules.items():
m = OpenMINDSModule(**module_spec)
if not m.commit:
is_dynamic = True
_evaluate_branch_and_commit_for_dynamic_instances(m)
if trigger and m.repository and m.repository.endswith(f"{trigger.repository}.git"):
triggering_module = m
new_modules[module] = m

for entry, entry_spec in bundle.items():
if entry == "namespaces":
namespaces[version] = bundle.get("namespaces", {})
if entry == "modules":
for module_name, module_spec in bundle[entry].items():
m = OpenMINDSModule(**module_spec)
if not m.commit:
is_dynamic = True
_evaluate_branch_and_commit_for_dynamic_instances(m)
if trigger and m.repository and m.repository.endswith(f"{trigger.repository}.git"):
triggering_module = m
new_modules[module_name] = m
# The version is only relevant if the process was not launched by a submodule change (so everything is built) or if the triggering module is specified with the given branch
if not trigger or (is_dynamic and triggering_module and triggering_module.branch and triggering_module.branch == trigger.branch):
relevant_versions[version] = new_modules
return relevant_versions
return relevant_versions, namespaces


def _evaluate_branch_and_commit_for_dynamic_instances(module_spec:OpenMINDSModule):
Expand All @@ -102,7 +109,7 @@ def _evaluate_branch_and_commit_for_dynamic_instances(module_spec:OpenMINDSModul
module_spec.commit = branch_to_commit[module_spec.branch]


def find_schemas(directory_structure: DirectoryStructure, modules: Dict[str, OpenMINDSModule]) -> List[SchemaStructure]:
def find_schemas(directory_structure: DirectoryStructure, modules: Dict[str, OpenMINDSModule], namespaces: Dict[str, str]) -> List[SchemaStructure]:
schema_information = []
for schema_group in directory_structure.find_resource_directories(file_ending=SCHEMA_FILE_ENDING):
schema_group = schema_group.split("/")[0]
Expand All @@ -116,7 +123,9 @@ def find_schemas(directory_structure: DirectoryStructure, modules: Dict[str, Ope
with open(schema_path, "r") as schema_file:
schema = json.load(schema_file)
if TEMPLATE_PROPERTY_TYPE in schema:
schema_information.append(SchemaStructure(schema[TEMPLATE_PROPERTY_TYPE], schema_group, version, relative_schema_path))
# remove namespace, will be rebuilt in resolve_extends and resolve_categories
schema[TEMPLATE_PROPERTY_TYPE] = schema[TEMPLATE_PROPERTY_TYPE].split(":")[-1].split("/")[-1]
schema_information.append(SchemaStructure(schema[TEMPLATE_PROPERTY_TYPE], schema_group, version, relative_schema_path, namespaces))
else:
print(f"Skipping schema {relative_schema_path} because it doesn't contain a valid type")
except JSONDecodeError:
Expand All @@ -133,12 +142,11 @@ def qualify_property_names(schemas:List[SchemaStructure]):
if "properties" in schema_payload:
new_properties = {}
for p, v in schema_payload["properties"].items():
new_properties[f"{OPENMINDS_VOCAB}{p}"] = v
new_properties[f"{schema.namespaces['props']}{p}"] = v
schema_payload["properties"] = new_properties
if "required" in schema_payload:
schema_payload["required"] = [f"{OPENMINDS_VOCAB}{p}" for p in schema_payload["required"]]
schema_payload["required"] = [f"{schema.namespaces['props']}{p}" for p in schema_payload["required"]]
schema_payload["required"].sort()

with open(schema.absolute_path, "w") as target_file:
target_file.write(json.dumps(schema_payload, indent=2, sort_keys=True))

Expand Down
Loading

0 comments on commit 65c8706

Please sign in to comment.