Merge pull request #15 from ocsf/mark-observables

Mark observables
ocsf · Jul 30, 2024 · 898764a · 898764a
2 parents d233307 + 34c83ee
commit 898764a
Show file tree

Hide file tree

Showing 10 changed files with 393 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -2,6 +2,18 @@
 
 Tools for building Python scripts and applications leveraging the OCSF.
 
+## Quick Start
+
+If you just want to use this library as a CLI tool, install it with `pip` or
+`poetry` and try the following commands:
+
+```sh
+python -m ocsf.compile path/to/ocsf-schema
+python -m ocsf.compare my-schema-export.json path/to/ocsf-schema
+python -m ocsf.schema 1.2.0
+python -m ocsf.validate.compatibility path/to/ocsf-schema 1.2.0
+```
+
 ## About
 
 This project began with two goals:
@@ -25,11 +37,13 @@ This library is divided into several discrete packages.
 
 The `ocsf.util` package provides the `get_schema` function. This function
 leverages the functionality in the `ocsf.schema` and `ocsf.api` packages (below)
-to easily build an OCSF schema from a file on disk or from the API.
+to easily build an OCSF schema from a file on disk, a working copy of an OCSF
+repository, or from the API.
 
 ```python
 schema = get_schema("1.1.0")
 schema = get_schema("./1.3.0-dev.json")
+schema = get_schema("path/to/ocsf-schema")
 ```
 
 ### ocsf.schema: The Schema Package
@@ -40,6 +54,24 @@ OCSF schema as represented from the OCSF server's API endpoints. See the
 
 It also includes utilities to parse the schema from a JSON string or file.
 
+### ocsf.repository: The Repository Package
+
+The `ocsf.repository` package contains a typed Python representation of a
+working copy of an OCSF schema repository. Said another way, it represents the
+OCSF metaschema and repository contents in Python.
+
+It also includes the `read_repo` function to read a repository from disk.
+
+### ocsf.compile: An OCSF Compiler
+
+The `ocsf.compile` package "compiles" the OCSF schema from a repository just as
+the OCSF server does (with very few exceptions). It is meant to provide:
+
+ 1. An easy to use CLI tool to compile a repository into a single JSON schema
+    file.
+ 2. A reference implementation for others looking to better understand OCSF
+    compilation or to create their own compiler.
+
 ### ocsf.api: The API Package
 
 The `ocsf.api` package exports an `OcsfApiClient`, which is a lightweight HTTP
@@ -98,10 +130,15 @@ for name, obj in diff.objects.items():
 
 ### ocsf.validate.framework: The Validation Framework Package 
 
-The `ocsf.valide.framework` package provides a lightweight framework for
+The `ocsf.validate.framework` package provides a lightweight framework for
 validators. It was inspired by the needs of `ocsf-validator`, which may be
 ported to this framework in the future.
 
+### ocsf.validate.compatibility: The Backwards Compatibility Validator
+
+The `ocsf.validate.compatibility` provides a backwards compatibility validator
+for OCSF schema. This compares the changes between two OCSF schemata and reports
+any breaking changes between the old and new version.
 
 ## Getting Started
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ocsf-lib"
-version = "0.4.0"
+version = "0.5.0"
 description = "Tools for working with the OCSF schema"
 authors = ["Jeremy Fisher <[email protected]>"]
 readme = "README.md"
@@ -23,6 +23,9 @@ pytest-env = "^1.1.3"
 
 [tool.poetry.scripts]
 compare = "ocsf.compare.__main__:main"
+validate-compatibility = "ocsf.validate.compatibility.__main__:main"
+compile = "ocsf.compile.__main__:main"
+schema = "ocsf.schema.__main__:main"
 
 [build-system]
 requires = ["poetry-core"]
@@ -31,7 +34,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.pyright]
 typeCheckingMode = "strict"
 strict = ["src/ocsf"]
-reportPrivateUsage = false
+reportPrivateUsage = false # Unit testing private/protected things is A-OK in my book
 
 [tool.pytest.ini_options]
 markers = [

diff --git a/src/ocsf/compare/__main__.py b/src/ocsf/compare/__main__.py
@@ -2,7 +2,9 @@
 
 Example:
 
-    $ python -m ocsf_diff old_schema.json new_schema.json
+    $ python -m ocsf.compare old_schema.json new_schema.json
+    $ python -m ocsf.compare 1.2.0 new_schema.json
+    $ python -m ocsf.compare new_schema.json path/to/ocsf-schema
 
 """
 
@@ -21,8 +23,8 @@
 def main():
     parser = ArgumentParser(description="Compare two OCSF schemata")
 
-    parser.add_argument("old_schema", help="Path to the old schema file or the old schema version.")
-    parser.add_argument("new_schema", help="Path to the new schema file or the new schema version.")
+    parser.add_argument("old_schema", help="Path to the old schema file, old schema repository, or the old schema version.")
+    parser.add_argument("new_schema", help="Path to the new schema file, new schema repository, or the new schema version.")
     parser.add_argument(
         "--expand-changes",
         dest="collapse_changes",

diff --git a/src/ocsf/compile/__main__.py b/src/ocsf/compile/__main__.py
@@ -1,56 +1,123 @@
-# TODO this file is a stub for testing and should be converted into an example
+"""Compile a repository into a schema and dump it as JSON to STDOUT.
 
-from pprint import pprint
-from ocsf.repository import read_repo
+Valid command line arguments are:
+```
+positional arguments:
+  path                  Path to the OCSF repository
 
-from .compiler import Compilation
+options:
+  -h, --help            show this help message and exit
+  --profile [PROFILE ...]
+                        The name of a profile to be enabled (defaults to all)
+  --ignore-profile [IGNORE_PROFILE ...]
+                        The name of a profile to be disabled
+  --extension [EXTENSION ...]
+                        The short path name (e.g. 'windows') of an extension to be enabled (defaults to all)
+  --ignore-extension [IGNORE_EXTENSION ...]
+                        The short path name of an extension to be disabled
+  --prefix-extensions   Prefix object and event names and any attributes that reference them as their type with the extension name
+  --no-prefix-extensions
+                        Do not prefix object and event names and any attributes that reference them as their type with the extension name
+  --set-object-types    Set type to 'object' and object_type to the object name for type references to objects
+  --no-set-object-types
+                        Do not set type to 'object' and object_type to the object name for type references to objects
+  --set-observable      Set the observable field on attributes to the corresponding Observable Type ID where applicable
+  --no-set-observable   Do not set the observable field on attributes to the corresponding Observable Type ID where applicable
+```
+
+Examples:
+
+Build the schema:
+
+    $ python -m ocsf.compile /path/to/repo
+
+Build the schema with the data_security profile disabled:
 
+    $ python -m ocsf.compile /path/to/repo --ignore-profile=data_security
 
-PATH = "/Users/jfisher/Source/ocsf/ocsf-schema"
+Build the schema with only the windows extension enabled:
 
-repo = read_repo(PATH, preserve_raw_data=True)
-compiler = Compilation(repo)
+    $ python -m ocsf.compile /path/to/repo --extension=windows
 
-# TARGET = "events/iam/authentication.json"
-# TARGET = "events/base_event.json"
-TARGET = "objects/process.json"
-# TARGET = "extensions/windows/events/prefetch_query.json"
-# TARGET = "includes/classification.json"
+"""
+
+from argparse import ArgumentParser
+
+from ocsf.repository import read_repo
+from ocsf.schema import to_json
+
+from .compiler import Compilation
+from .options import CompilationOptions
 
-analysis = compiler.analyze()
-order = compiler.order()
-compile = compiler.compile()
-schema = compiler.build()
+def main():
+    parser = ArgumentParser(description="Compile an OCSF repository into a schema and dump it as JSON to STDOUT")
+    parser.add_argument("path", help="Path to the OCSF repository")
+    parser.add_argument("--profile", nargs="*", help="The name of a profile to be enabled (defaults to all)")
+    parser.add_argument("--ignore-profile", nargs="*", help="The name of a profile to be disabled")
+    parser.add_argument(
+        "--extension",
+        nargs="*",
+        help="The short path name (e.g. 'windows') of an extension to be enabled (defaults to all)",
+    )
+    parser.add_argument("--ignore-extension", nargs="*", help="The short path name of an extension to be disabled")
+    parser.add_argument(
+        "--prefix-extensions",
+        default=True,
+        action="store_true",
+        help="Prefix object and event names and any attributes that reference them as their type with the extension name",
+    )
+    parser.add_argument(
+        "--no-prefix-extensions",
+        dest="prefix_extensions",
+        action="store_false",
+        help="Do not prefix object and event names and any attributes that reference them as their type with the extension name",
+    )
+    parser.add_argument(
+        "--set-object-types",
+        default=True,
+        action="store_true",
+        help="Set type to 'object' and object_type to the object name for type references to objects",
+    )
+    parser.add_argument(
+        "--no-set-object-types",
+        dest="set_object_types",
+        action="store_false",
+        help="Do not set type to 'object' and object_type to the object name for type references to objects",
+    )
+    parser.add_argument(
+        "--set-observable",
+        default=True,
+        action="store_true",
+        help="Set the observable field on attributes to the corresponding Observable Type ID where applicable",
+    )
+    parser.add_argument(
+        "--no-set-observable",
+        dest="set_observable",
+        action="store_false",
+        help="Do not set the observable field on attributes to the corresponding Observable Type ID where applicable",
+    )
 
-print(f"TARGET: {TARGET}")
-print("ORDER")
-prereqs: set[str] = set()
+    args = parser.parse_args()
 
+    options = CompilationOptions()
 
-def find_op(target: str):
-    for o in order:
-        if o.target == target and o.target not in prereqs:
-            if o.prerequisite is not None and o.prerequisite not in prereqs:
-                prereqs.add(o.prerequisite)
-                find_op(o.prerequisite)
-            # pprint(o)
-    return None
+    if args.profile:
+        options.profiles = args.profile
+    if args.ignore_profile:
+        options.ignore_profiles = args.ignore_profile
+    if args.extension:
+        options.extensions = args.extension
+    if args.ignore_extension:
+        options.ignore_extensions = args.ignore_extension
 
+    options.prefix_extensions = args.prefix_extensions
+    options.set_object_types = args.set_object_types
+    options.set_observable = args.set_observable
 
-find_op(TARGET)
+    repo = read_repo(args.path, preserve_raw_data=False)
+    compiler = Compilation(repo, options)
 
-for o in order:
-    if o.target in prereqs or o.target == TARGET:
-        pprint(o)
-        if o.target in compile:
-            for op, change in compile[o.target]:
-                if op == o:
-                    pprint(change)
+    print(to_json(compiler.build()))
 
-# print()
-# print("COMPILE")
-#
-# for prereq in prereqs:
-#    if prereq in compile:
-#        pprint(compile[prereq])
-# pprint(compile[TARGET])
+if __name__ == "__main__":
+    main()
diff --git a/src/ocsf/compile/compiler.py b/src/ocsf/compile/compiler.py
@@ -31,6 +31,7 @@
 from .planners.object_type import ObjectTypePlanner
 from .planners.uid_names import UidSiblingPlanner
 from .planners.datetime import DateTimePlanner
+from .planners.observable import MarkObservablesPlanner
 from .merge import MergeResult
 
 FileOperations = dict[RepoPath, list[Operation]]
@@ -69,6 +70,7 @@ def __init__(self, repo: Repository, options: CompilationOptions = CompilationOp
                 ObjectTypePlanner(self._proto, options),
                 UidSiblingPlanner(self._proto, options),
                 DateTimePlanner(self._proto, options),
+                MarkObservablesPlanner(self._proto, options),
                 ExtensionCopyPlanner(self._proto, options),
             ],
         ]

diff --git a/src/ocsf/compile/options.py b/src/ocsf/compile/options.py
@@ -26,3 +26,8 @@ class CompilationOptions:
     references to objects, as per the original OCSF server. If False, the type 
     field will refer directly to the object type.
     """
+
+    set_observable: bool = False
+    """If True, set the observable field on attributes to the corresponding
+    Observable Type ID where applicable.
+    """