Implement Cookiecutter templates for relevant MLCubes (#396)

* Add Data Preparator cookiecutter template * Rename cookiecutter folder * Temporarily remove possibly offending files * Remove cookicutter conditionals * Inclube back missing pieces of template * remove cookiecutter typo * Use project_name attribute * Change cookiecutter fields order * Create empty directories on hook * Fix empty folders paths * Create evaluator mlcube cookiecutter template * Fix JSON Syntax Error * Update template default values * Remove reference to undefined template variable * Implement model mlcube cookiecutter template * Update cookiecutter variable default values * Create medperf CLI command for creating MLCubes * Provide additional options for mlcube create * Start working on tests * Add tests for cube create * Ignore invalid syntax on cookiecutter conditionals * Ignore more flake8 errors * Remove unused import * Empty commit for cloudbuild * Fix inconsistency with labels paths * Update mlcube.yaml so it can be commented on docs * Don't render noqa comments on template * Remove flake8 specific ignores * Exclude templates from lint checks * Remove specific flake8 ignores * Fix labels_paht being passed in he wrong situation * Add requirements to cookiecutters * Set separate labels as true by default * Ignore templates contents for linter * Ignore templates on linting * Apply exclude to all flake8 calls * Make template name required * Move templates to package. Use local templates
mlcommons · Mar 15, 2023 · 96377ee · 96377ee
1 parent cdebf89
commit 96377ee
Show file tree

Hide file tree

Showing 26 changed files with 557 additions and 2 deletions.
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -26,14 +26,14 @@ jobs:
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --max-line-length 127 --select=E9,F63,F7 --show-source --statistics
+        flake8 . --count --max-line-length 127 --select=E9,F63,F7 --show-source --statistics --exclude=cli/medperf/templates/
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         # ignore warnings about undefined names due to using future annotations
         # W503 is no longer recommended. https://www.flake8rules.com/rules/W503.html
         # Exclude examples folder as it doesn't contain code related to medperf tools
         # Exclude migrations folder as it contains autogenerated code
         # Ignore E231, as it is raising warnings with auto-generated code.
-        flake8 . --count --max-complexity=10 --max-line-length=127 --ignore F821,W503,E231 --statistics --exclude=examples/,"*/migrations/*"
+        flake8 . --count --max-complexity=10 --max-line-length=127 --ignore F821,W503,E231 --statistics --exclude=examples/,"*/migrations/*",cli/medperf/templates/
     - name: Test with pytest
       run: |
         pytest

diff --git a/cli/medperf/commands/mlcube/create.py b/cli/medperf/commands/mlcube/create.py
@@ -0,0 +1,40 @@
+from os.path import abspath
+from pathlib import Path
+from cookiecutter.main import cookiecutter
+
+from medperf import config
+from medperf.exceptions import InvalidArgumentError
+
+
+class CreateCube:
+    @classmethod
+    def run(cls, template_name: str, output_path: str = ".", config_file: str = None):
+        """Creates a new MLCube based on one of the provided templates
+
+        Args:
+            template_name (str): The name of the template to use
+            output_path (str, Optional): The desired path for the MLCube. Defaults to current path.
+            config_file (str, Optional): Path to a JSON configuration file. If not passed, user is prompted.
+        """
+        template_dirs = config.templates
+        if template_name not in template_dirs:
+            templates = list(template_dirs.keys())
+            raise InvalidArgumentError(
+                f"Invalid template name. Available templates: [{' | '.join(templates)}]"
+            )
+
+        no_input = False
+        if config_file is not None:
+            no_input = True
+
+        # Get package parent path
+        path = abspath(Path(__file__).parent.parent.parent)
+
+        template_dir = template_dirs[template_name]
+        cookiecutter(
+            path,
+            directory=template_dir,
+            output_dir=output_path,
+            config_file=config_file,
+            no_input=no_input,
+        )
diff --git a/cli/medperf/commands/mlcube/mlcube.py b/cli/medperf/commands/mlcube/mlcube.py
@@ -7,6 +7,7 @@
 from medperf.entities.cube import Cube
 from medperf.commands.list import EntityList
 from medperf.commands.view import EntityView
+from medperf.commands.mlcube.create import CreateCube
 from medperf.commands.mlcube.submit import SubmitCube
 from medperf.commands.mlcube.associate import AssociateCube
 
@@ -28,6 +29,27 @@ def list(
     )
 
 
+@app.command("create")
+@clean_except
+def create(
+    template: str = typer.Argument(
+        ...,
+        help=f"MLCube template name. Available templates: [{' | '.join(config.templates.keys())}]",
+    ),
+    output_path: str = typer.Option(
+        ".", "--output", "-o", help="Save the generated MLCube to the specified path"
+    ),
+    config_file: str = typer.Option(
+        None,
+        "--config-file",
+        "-c",
+        help="JSON Configuration file. If not present then user is prompted for configuration",
+    ),
+):
+    """Creates an MLCube based on one of the specified templates"""
+    CreateCube.run(template, output_path, config_file)
+
+
 @app.command("submit")
 @clean_except
 def submit(

diff --git a/cli/medperf/config.py b/cli/medperf/config.py
@@ -70,3 +70,9 @@
     "platform",
     "cleanup",
 ]
+
+templates = {
+    "data_preparator": "templates/data_preparator_mlcube",
+    "model": "templates/model_mlcube",
+    "evaluator": "templates/evaluator_mlcube",
+}
diff --git a/cli/medperf/templates/data_preparator_mlcube/cookiecutter.json b/cli/medperf/templates/data_preparator_mlcube/cookiecutter.json
@@ -0,0 +1,9 @@
+{
+    "project_name": "Data Preparator MLCube",
+    "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}",
+    "description": "Data Preparator MLCube Template. Provided by MLCommons",
+    "author_name": "John Smith",
+    "accelerator_count": "0",
+    "docker_image_name": "docker/image:latest",
+    "use_separate_output_labels": "y"
+}
diff --git a/cli/medperf/templates/data_preparator_mlcube/hooks/post_gen_project.py b/cli/medperf/templates/data_preparator_mlcube/hooks/post_gen_project.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+import os
+
+PROJECT_DIRECTORY = os.path.realpath(os.path.curdir)
+
+
+if __name__ == "__main__":
+    input_data_path = "mlcube/workspace/input_data"
+    input_labels_path = "mlcube/workspace/input_labels"
+    data_path = "mlcube/workspace/data"
+
+    paths = [input_data_path, input_labels_path, data_path]
+
+    if "{{ cookiecutter.use_separate_output_labels }}" == "y":
+        labels_path = "mlcube/workspace/labels"
+        paths.append(labels_path)
+
+    for path in paths:
+        os.makedirs(path, exist_ok=True)
diff --git a/...medperf/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/mlcube/mlcube.yaml b/...medperf/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/mlcube/mlcube.yaml
@@ -0,0 +1,57 @@
+name: {{ cookiecutter.project_name }}
+description: {{ cookiecutter.description }}
+authors:
+ - {name: {{ cookiecutter.author_name }}}
+
+platform:
+  accelerator_count: {{ cookiecutter.accelerator_count }}
+
+docker:
+  # Image name
+  image: {{ cookiecutter.docker_image_name }}
+  # Docker build context relative to $MLCUBE_ROOT. Default is `build`.
+  build_context: "../project"
+  # Docker file name within docker build context, default is `Dockerfile`.
+  build_file: "Dockerfile"
+
+tasks:
+  prepare:
+    parameters:
+      inputs: {
+        data_path: input_data,
+        labels_path: input_labels,
+        parameters_file: parameters.yaml
+      }
+      outputs: {
+        output_path: data/,
+        {% if cookiecutter.use_separate_output_labels == 'y' -%}
+        output_labels_path: labels/,
+        {% else %}
+        # output_labels_path: labels/,
+        {% endif %}
+      }
+  sanity_check:
+    parameters:
+      inputs: {
+        data_path: data/,
+        {% if cookiecutter.use_separate_output_labels == 'y' -%}
+        labels_path: labels/,
+        {% else %}
+        # labels_path: labels/,
+        {% endif %}
+        parameters_file: parameters.yaml
+      }
+  statistics:
+    parameters:
+      inputs: {
+        data_path: data/,
+        {% if cookiecutter.use_separate_output_labels == 'y' -%}
+        labels_path: labels/,
+        {% else %}
+        # labels_path: labels/,
+        {% endif %}
+        parameters_file: parameters.yaml
+      }
+      outputs: {
+        output_path: {type: file, default: statistics.yaml}
+      }
diff --git a/...tes/data_preparator_mlcube/{{cookiecutter.project_slug}}/mlcube/workspace/parameters.yaml b/...tes/data_preparator_mlcube/{{cookiecutter.project_slug}}/mlcube/workspace/parameters.yaml
diff --git a/...medperf/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/project/Dockerfile b/...medperf/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/project/Dockerfile
@@ -0,0 +1,31 @@
+FROM ubuntu:18.04
+MAINTAINER MLPerf MLBox Working Group
+
+RUN apt-get update && \
+	apt-get install -y --no-install-recommends \
+	software-properties-common \
+	python3-dev \
+	curl && \
+	rm -rf /var/lib/apt/lists/*
+
+RUN add-apt-repository ppa:deadsnakes/ppa -y && apt-get update
+
+RUN apt-get install python3 -y
+
+RUN curl -fSsL -O https://bootstrap.pypa.io/pip/3.6/get-pip.py && \
+	python3 get-pip.py && \
+	rm get-pip.py
+
+COPY ./requirements.txt project/requirements.txt 
+
+RUN pip3 install --upgrade pip
+
+RUN pip3 install --no-cache-dir -r project/requirements.txt
+
+ENV LANG C.UTF-8
+
+COPY . /project
+
+WORKDIR /project
+
+ENTRYPOINT ["python3", "mlcube.py"]
diff --git a/cli/medperf/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/project/mlcube.py b/cli/medperf/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/project/mlcube.py
@@ -0,0 +1,48 @@
+"""MLCube handler file"""
+import typer
+
+
+app = typer.Typer()
+
+
+@app.command("prepare")
+def prepare(
+    data_path: str = typer.Option(..., "--data_path"),
+    labels_path: str = typer.Option(..., "--labels_path"),
+    parameters_file: str = typer.Option(..., "--parameters_file"),
+    output_path: str = typer.Option(..., "--output_path"),
+    {% if cookiecutter.use_separate_output_labels == 'y' -%}
+    output_labels_path: str = typer.Option(..., "--output_labels_path"),
+    {% endif %}
+):
+    # Modify the prepare command as needed
+    raise NotImplementedError("The prepare method is not yet implemented")
+
+
+@app.command("sanity_check")
+def sanity_check(
+    data_path: str = typer.Option(..., "--data_path"),
+    {% if cookiecutter.use_separate_output_labels == 'y' -%}
+    labels_path: str = typer.Option(..., "--labels_path"),
+    {% endif %}
+    parameters_file: str = typer.Option(..., "--parameters_file"),
+):
+    # Modify the sanity_check command as needed
+    raise NotImplementedError("The sanity check method is not yet implemented")
+
+
+@app.command("statistics")
+def sanity_check(
+    data_path: str = typer.Option(..., "--data_path"),
+    {% if cookiecutter.use_separate_output_labels == 'y' -%}
+    labels_path: str = typer.Option(..., "--labels_path"),
+    {% endif %}
+    parameters_file: str = typer.Option(..., "--parameters_file"),
+    out_path: str = typer.Option(..., "--output_path"),
+):
+    # Modify the statistics command as needed
+    raise NotImplementedError("The statistics method is not yet implemented")
+
+
+if __name__ == "__main__":
+    app()
diff --git a/...f/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/project/requirements.txt b/...f/templates/data_preparator_mlcube/{{cookiecutter.project_slug}}/project/requirements.txt
@@ -0,0 +1,2 @@
+typer
+# Include all your requirements here
diff --git a/cli/medperf/templates/evaluator_mlcube/cookiecutter.json b/cli/medperf/templates/evaluator_mlcube/cookiecutter.json
@@ -0,0 +1,8 @@
+{
+    "project_name": "Evaluator MLCube",
+    "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_').replace('-', '_') }}",
+    "description": "Evaluator MLCube Template. Provided by MLCommons",
+    "author_name": "John Smith",
+    "accelerator_count": "0",
+    "docker_image_name": "docker/image:latest"
+}
diff --git a/cli/medperf/templates/evaluator_mlcube/hooks/post_gen_project.py b/cli/medperf/templates/evaluator_mlcube/hooks/post_gen_project.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+import os
+
+PROJECT_DIRECTORY = os.path.realpath(os.path.curdir)
+
+
+if __name__ == "__main__":
+    preds_path = "mlcube/workspace/predictions"
+    labels_path = "mlcube/workspace/labels"
+
+    paths = [preds_path, labels_path]
+    for path in paths:
+        os.makedirs(path, exist_ok=True)
diff --git a/cli/medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/mlcube/mlcube.yaml b/cli/medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/mlcube/mlcube.yaml
@@ -0,0 +1,22 @@
+name: {{ cookiecutter.project_name }}
+description: {{ cookiecutter.description }}
+authors:
+ - {name: {{ cookiecutter.author_name }}}
+
+platform:
+  accelerator_count: {{ cookiecutter.accelerator_count }}
+
+docker:
+  # Image name
+  image: {{ cookiecutter.docker_image_name }}
+  # Docker build context relative to $MLCUBE_ROOT. Default is `build`.
+  build_context: "../project"
+  # Docker file name within docker build context, default is `Dockerfile`.
+  build_file: "Dockerfile"
+
+tasks:
+  evaluate:
+  # Computes evaluation metrics on the given predictions and ground truths
+    parameters: 
+      inputs: {predictions: predictions, labels: labels, parameters_file: parameters.yaml}
+      outputs: {output_path: {type: "file", default: "results.yaml"}}
diff --git a/...templates/evaluator_mlcube/{{cookiecutter.project_slug}}/mlcube/workspace/parameters.yaml b/...templates/evaluator_mlcube/{{cookiecutter.project_slug}}/mlcube/workspace/parameters.yaml
diff --git a/cli/medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/project/Dockerfile b/cli/medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/project/Dockerfile
@@ -0,0 +1,31 @@
+FROM ubuntu:18.04
+MAINTAINER MLPerf MLBox Working Group
+
+RUN apt-get update && \
+	apt-get install -y --no-install-recommends \
+	software-properties-common \
+	python3-dev \
+	curl && \
+	rm -rf /var/lib/apt/lists/*
+
+RUN add-apt-repository ppa:deadsnakes/ppa -y && apt-get update
+
+RUN apt-get install python3 -y
+
+RUN curl -fSsL -O https://bootstrap.pypa.io/pip/3.6/get-pip.py && \
+	python3 get-pip.py && \
+	rm get-pip.py
+
+COPY ./requirements.txt project/requirements.txt 
+
+RUN pip3 install --upgrade pip
+
+RUN pip3 install --no-cache-dir -r project/requirements.txt
+
+ENV LANG C.UTF-8
+
+COPY . /project
+
+WORKDIR /project
+
+ENTRYPOINT ["python3", "mlcube.py"]
diff --git a/cli/medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/project/mlcube.py b/cli/medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/project/mlcube.py
@@ -0,0 +1,26 @@
+"""MLCube handler file"""
+import typer
+
+
+app = typer.Typer()
+
+
+@app.command("evaluate")
+def prepare(
+    labels: str = typer.Option(..., "--labels"),
+    predictions: str = typer.Option(..., "--predictions"),
+    parameters_file: str = typer.Option(..., "--parameters_file"),
+    output_path: str = typer.Option(..., "--output_path"),
+):
+    # Modify the prepare command as needed
+    raise NotImplementedError("The evaluate method is not yet implemented")
+
+
+@app.command("hotfix")
+def hotfix():
+    # NOOP command for typer to behave correctly. DO NOT REMOVE OR MODIFY
+    pass
+
+
+if __name__ == "__main__":
+    app()
diff --git a/...medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/project/requirements.txt b/...medperf/templates/evaluator_mlcube/{{cookiecutter.project_slug}}/project/requirements.txt
@@ -0,0 +1,2 @@
+typer
+# Include all your requirements here