From 39f205404b3038b7f3e1390065bdef7d60ec1b13 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Tue, 4 Jun 2019 14:18:36 -0400 Subject: [PATCH 01/11] search GitHub for OpenControl Components --- environment.yml | 1 + opencontrol.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 opencontrol.py diff --git a/environment.yml b/environment.yml index a0cf104..7f963fe 100644 --- a/environment.yml +++ b/environment.yml @@ -7,6 +7,7 @@ dependencies: - pip: - autoflake - black + - PyGithub - python-docx - plotly - pytest diff --git a/opencontrol.py b/opencontrol.py new file mode 100644 index 0000000..8c09885 --- /dev/null +++ b/opencontrol.py @@ -0,0 +1,15 @@ +from github import Github +import os + +token = os.getenv("GITHUB_TOKEN") +g = Github(token) + +# The GitHub code search API requires one or more org/user to be specified. +# https://developer.github.com/changes/2013-10-18-new-code-search-requirements/#new-validation-rule +# The following is a curated list. To find more, go to: +# https://github.com/search?utf8=%E2%9C%93&q=schema_version+satisfies+control_key+language%3Ayaml&type=Code +users = ["docker", "m3brown", "opencontrol"] +users_q = " ".join("user:{}".format(user) for user in users) +results = g.search_code("schema_version satisfies control_key language:yaml " + users_q) +for result in results: + print(result) From 164851248185695170f44f25f424ee5b4a102d1d Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Tue, 4 Jun 2019 14:18:49 -0400 Subject: [PATCH 02/11] expand the list of OpenControl repository sources --- opencontrol.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/opencontrol.py b/opencontrol.py index 8c09885..22089b3 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -8,7 +8,25 @@ # https://developer.github.com/changes/2013-10-18-new-code-search-requirements/#new-validation-rule # The following is a curated list. To find more, go to: # https://github.com/search?utf8=%E2%9C%93&q=schema_version+satisfies+control_key+language%3Ayaml&type=Code -users = ["docker", "m3brown", "opencontrol"] +users = [ + "18F", + "ComplianceAsCode", + "corbaltcode", + "docker", + "GovReady", + "GSA", + "jenglish", + "jmmcnj", + "m3brown", + "madhugilla", + "nsagoo-pivotal", + "opencontrol", + "redhatrises", + "SecurityCentral", + "shawndwells", + "superbrilliant", + "weirdscience", +] users_q = " ".join("user:{}".format(user) for user in users) results = g.search_code("schema_version satisfies control_key language:yaml " + users_q) for result in results: From 27a9b22037a0fae6cc6b11b3eb84bcddd1f1b291 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Tue, 4 Jun 2019 15:48:23 -0400 Subject: [PATCH 03/11] cache the GitHub API requests --- .gitignore | 1 + environment.yml | 1 + opencontrol.py | 3 +++ 3 files changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index fb3becf..4c3bc0f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ output.json matrix.csv out/ .ipynb_checkpoints/ +*.sqlite diff --git a/environment.yml b/environment.yml index 7f963fe..3650df3 100644 --- a/environment.yml +++ b/environment.yml @@ -9,6 +9,7 @@ dependencies: - black - PyGithub - python-docx + - requests-cache - plotly - pytest - rope diff --git a/opencontrol.py b/opencontrol.py index 22089b3..1a658f4 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -1,5 +1,8 @@ from github import Github import os +import requests_cache + +requests_cache.install_cache("requests_cache") token = os.getenv("GITHUB_TOKEN") g = Github(token) From 8d5fd2ca35260fd7a65f845d0849530dc35ea401 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Tue, 4 Jun 2019 15:48:51 -0400 Subject: [PATCH 04/11] print the list of OpenControl repositories --- opencontrol.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/opencontrol.py b/opencontrol.py index 1a658f4..c70e9e8 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -1,6 +1,7 @@ -from github import Github +from github import Github, RateLimitExceededException import os import requests_cache +import sys requests_cache.install_cache("requests_cache") @@ -32,5 +33,15 @@ ] users_q = " ".join("user:{}".format(user) for user in users) results = g.search_code("schema_version satisfies control_key language:yaml " + users_q) -for result in results: - print(result) + +# get as many as we can before hitting the rate limit +repos = set() +try: + for result in results: + repos.add(result.repository.full_name) +except RateLimitExceededException as err: + print(err, file=sys.stderr) + +sorted_repos = list(repos) +sorted_repos.sort() +print(sorted_repos) From 257fe3904d88b8c7c7fa3647b51e3011190009b2 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Tue, 4 Jun 2019 16:29:01 -0400 Subject: [PATCH 05/11] parse opencontrol systems --- environment.yml | 1 + opencontrol.py | 48 ++++++++++++++++++++++-------------------------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/environment.yml b/environment.yml index 3650df3..2d9d797 100644 --- a/environment.yml +++ b/environment.yml @@ -7,6 +7,7 @@ dependencies: - pip: - autoflake - black + - compliancelib - PyGithub - python-docx - requests-cache diff --git a/opencontrol.py b/opencontrol.py index c70e9e8..8ae1e8d 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -1,3 +1,4 @@ +import compliancelib from github import Github, RateLimitExceededException import os import requests_cache @@ -8,32 +9,7 @@ token = os.getenv("GITHUB_TOKEN") g = Github(token) -# The GitHub code search API requires one or more org/user to be specified. -# https://developer.github.com/changes/2013-10-18-new-code-search-requirements/#new-validation-rule -# The following is a curated list. To find more, go to: -# https://github.com/search?utf8=%E2%9C%93&q=schema_version+satisfies+control_key+language%3Ayaml&type=Code -users = [ - "18F", - "ComplianceAsCode", - "corbaltcode", - "docker", - "GovReady", - "GSA", - "jenglish", - "jmmcnj", - "m3brown", - "madhugilla", - "nsagoo-pivotal", - "opencontrol", - "redhatrises", - "SecurityCentral", - "shawndwells", - "superbrilliant", - "weirdscience", -] -users_q = " ".join("user:{}".format(user) for user in users) -results = g.search_code("schema_version satisfies control_key language:yaml " + users_q) - +results = g.search_code("path:/ filename:opencontrol.yaml components") # get as many as we can before hitting the rate limit repos = set() try: @@ -45,3 +21,23 @@ sorted_repos = list(repos) sorted_repos.sort() print(sorted_repos) + +systems = [] +for result in results: + print(result.path) + print(result.repository.html_url) + + sp = compliancelib.SystemCompliance() + try: + sp.load_system_from_opencontrol_repo(result.repository.html_url) + except Exception as err: + print( + "Failed to import {}.".format(result.repository.full_name), + err, + file=sys.stderr, + ) + continue + + systems.append(sp) + +print(len(systems)) From 95ad62db3af52ce0cb249d0494975b8ee8a8584a Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Wed, 5 Jun 2019 00:12:39 -0400 Subject: [PATCH 06/11] opencontrol: refactor to smaller functions --- opencontrol.py | 56 ++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/opencontrol.py b/opencontrol.py index 8ae1e8d..4d6ab67 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -6,38 +6,40 @@ requests_cache.install_cache("requests_cache") -token = os.getenv("GITHUB_TOKEN") -g = Github(token) -results = g.search_code("path:/ filename:opencontrol.yaml components") -# get as many as we can before hitting the rate limit -repos = set() -try: - for result in results: - repos.add(result.repository.full_name) -except RateLimitExceededException as err: - print(err, file=sys.stderr) - -sorted_repos = list(repos) -sorted_repos.sort() -print(sorted_repos) +def opencontrol_files(github_client): + # only pull in top-level opencontrol.yaml files for now, for simplicity + return github_client.search_code("path:/ filename:opencontrol.yaml components") -systems = [] -for result in results: - print(result.path) - print(result.repository.html_url) +def opencontrol_system(file_result): sp = compliancelib.SystemCompliance() + repo = file_result.repository try: - sp.load_system_from_opencontrol_repo(result.repository.html_url) + sp.load_system_from_opencontrol_repo(repo.html_url) except Exception as err: - print( - "Failed to import {}.".format(result.repository.full_name), - err, - file=sys.stderr, - ) - continue + print("Failed to import {}.".format(repo.full_name), file=sys.stderr) + return None + + return sp + + +def opencontrol_systems(github_client): + results = opencontrol_files(github_client) + systems = [] + for result in results: + print(result.path) + print(result.repository.html_url) + + system = opencontrol_system(result) + if system: + systems.append(system) + + return systems - systems.append(sp) -print(len(systems)) +if __name__ == "__main__": + token = os.getenv("GITHUB_TOKEN") + g = Github(token) + systems = opencontrol_systems(g) + print(len(systems)) From d3ec6b92e7e9b39da65ac6ded3e4b603b2a2c8ad Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Wed, 5 Jun 2019 02:05:43 -0400 Subject: [PATCH 07/11] make opencontrol_systems() a generator --- opencontrol.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/opencontrol.py b/opencontrol.py index 4d6ab67..2aef817 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -25,21 +25,19 @@ def opencontrol_system(file_result): def opencontrol_systems(github_client): + """A generator that yields instances of compliancelib.SystemCompliance().""" results = opencontrol_files(github_client) - systems = [] for result in results: print(result.path) print(result.repository.html_url) system = opencontrol_system(result) if system: - systems.append(system) - - return systems + yield system if __name__ == "__main__": token = os.getenv("GITHUB_TOKEN") g = Github(token) systems = opencontrol_systems(g) - print(len(systems)) + print(len(list(systems))) From 27c934c1d5cfb08eeec2c1ecd1e845c7e6f0a1d1 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Thu, 6 Jun 2019 01:32:52 -0400 Subject: [PATCH 08/11] opencontrol: parse into Controls --- opencontrol.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/opencontrol.py b/opencontrol.py index 2aef817..d31f889 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -1,4 +1,6 @@ import compliancelib +from fismatic.control import Control +from fismatic.control_set import ControlSet from github import Github, RateLimitExceededException import os import requests_cache @@ -25,7 +27,7 @@ def opencontrol_system(file_result): def opencontrol_systems(github_client): - """A generator that yields instances of compliancelib.SystemCompliance().""" + """A generator that yields instances of compliancelib.SystemCompliance.""" results = opencontrol_files(github_client) for result in results: print(result.path) @@ -36,8 +38,34 @@ def opencontrol_systems(github_client): yield system +def controls_for(system): + """A generator that yields instances of compliancelib.nist800_53.NIST800_53.""" + control_ids = compliancelib.NIST800_53.get_control_ids() + for control_id in control_ids: + try: + control = system.control(control_id) + except Exception: + # control not present, or some other issue + continue + yield control + + +def cl_to_fm_controls(cl_controls): + """Converts compliancelib controls into FISMAtic ones.""" + for cl_control in cl_controls: + fm_control = Control(name=cl_control.id) + # TODO handle multiple parts + fm_control.implementation = {"": cl_control.implementation_narrative} + yield fm_control + + if __name__ == "__main__": token = os.getenv("GITHUB_TOKEN") g = Github(token) + systems = opencontrol_systems(g) - print(len(list(systems))) + for system in systems: + cl_controls = controls_for(system) + fm_controls = cl_to_fm_controls(cl_controls) + control_set = ControlSet(fm_controls) + print(control_set.top_entities()) From 9aeb361cadd9b989c7024849b7af34382df7c33d Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Thu, 6 Jun 2019 01:38:10 -0400 Subject: [PATCH 09/11] allow a single system URL to be passed from the command line --- opencontrol.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/opencontrol.py b/opencontrol.py index d31f889..54f7444 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -14,18 +14,22 @@ def opencontrol_files(github_client): return github_client.search_code("path:/ filename:opencontrol.yaml components") -def opencontrol_system(file_result): +def system_for(url): sp = compliancelib.SystemCompliance() - repo = file_result.repository try: - sp.load_system_from_opencontrol_repo(repo.html_url) + sp.load_system_from_opencontrol_repo(url) except Exception as err: - print("Failed to import {}.".format(repo.full_name), file=sys.stderr) + print("Failed to import {}.".format(url), file=sys.stderr) return None return sp +def opencontrol_system(file_result): + url = file_result.repository.html_url + return system_for(url) + + def opencontrol_systems(github_client): """A generator that yields instances of compliancelib.SystemCompliance.""" results = opencontrol_files(github_client) @@ -63,7 +67,11 @@ def cl_to_fm_controls(cl_controls): token = os.getenv("GITHUB_TOKEN") g = Github(token) - systems = opencontrol_systems(g) + if len(sys.argv) > 1: + systems = [system_for(url) for url in sys.argv[1:]] + else: + systems = opencontrol_systems(g) + for system in systems: cl_controls = controls_for(system) fm_controls = cl_to_fm_controls(cl_controls) From d40abdda4f749ca61fe29e6a29891675cc401f73 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Thu, 6 Jun 2019 03:21:15 -0400 Subject: [PATCH 10/11] refactor opencontrol repository URL handling --- opencontrol.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/opencontrol.py b/opencontrol.py index 54f7444..3eec24f 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -18,28 +18,20 @@ def system_for(url): sp = compliancelib.SystemCompliance() try: sp.load_system_from_opencontrol_repo(url) - except Exception as err: - print("Failed to import {}.".format(url), file=sys.stderr) + except Exception: return None return sp -def opencontrol_system(file_result): - url = file_result.repository.html_url - return system_for(url) - - -def opencontrol_systems(github_client): - """A generator that yields instances of compliancelib.SystemCompliance.""" +def opencontrol_urls(github_client): + """A generator that yields strings of repository URLs.""" results = opencontrol_files(github_client) for result in results: print(result.path) print(result.repository.html_url) - system = opencontrol_system(result) - if system: - yield system + yield result.repository.html_url def controls_for(system): @@ -63,17 +55,26 @@ def cl_to_fm_controls(cl_controls): yield fm_control +def process_url(url): + system = system_for(url) + if not system: + print("Failed to import {}.".format(url), file=sys.stderr) + return + + cl_controls = controls_for(system) + fm_controls = cl_to_fm_controls(cl_controls) + control_set = ControlSet(fm_controls) + print(control_set.top_entities()) + + if __name__ == "__main__": token = os.getenv("GITHUB_TOKEN") g = Github(token) if len(sys.argv) > 1: - systems = [system_for(url) for url in sys.argv[1:]] + repo_urls = sys.argv[1:] else: - systems = opencontrol_systems(g) + repo_urls = opencontrol_urls(g) - for system in systems: - cl_controls = controls_for(system) - fm_controls = cl_to_fm_controls(cl_controls) - control_set = ControlSet(fm_controls) - print(control_set.top_entities()) + for url in repo_urls: + process_url(url) From b6edf37c594f74fa54ac7d7b4ced756bf4264aab Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Thu, 6 Jun 2019 03:32:47 -0400 Subject: [PATCH 11/11] remove unused import --- opencontrol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencontrol.py b/opencontrol.py index 3eec24f..9a9c239 100644 --- a/opencontrol.py +++ b/opencontrol.py @@ -1,7 +1,7 @@ import compliancelib from fismatic.control import Control from fismatic.control_set import ControlSet -from github import Github, RateLimitExceededException +from github import Github import os import requests_cache import sys