diff --git a/.github/lineage.yml b/.github/lineage.yml index 8dfc20b..b10c80c 100644 --- a/.github/lineage.yml +++ b/.github/lineage.yml @@ -3,4 +3,4 @@ version: "1" lineage: skeleton: - remote-url: https://github.com/cisagov/skeleton-generic.git + remote-url: https://github.com/cisagov/skeleton-docker.git diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ce70bf5..ece30de 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,10 +26,9 @@ on: env: BUILDX_CACHE_DIR: ~/.cache/buildx - IMAGE_NAME: cisagov/example + IMAGE_NAME: cisagov/vdp-scanner PIP_CACHE_DIR: ~/.cache/pip - PLATFORMS: "linux/amd64,linux/arm/v6,linux/arm/v7,\ - linux/arm64,linux/ppc64le,linux/s390x" + PLATFORMS: "linux/amd64,linux/arm/v7,linux/arm64" PRE_COMMIT_CACHE_DIR: ~/.cache/pre-commit jobs: diff --git a/.isort.cfg b/.isort.cfg index 46d45f3..20544c1 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -6,5 +6,7 @@ import_heading_stdlib=Standard Python Libraries import_heading_thirdparty=Third-Party Libraries import_heading_firstparty=cisagov Libraries +known_first_party=hash_http_content + # Run isort under the black profile to align with our other Python linting profile=black diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4222005..fb777dc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ all of which should be in this repository. If you want to report a bug or request a new feature, the most direct method is to [create an -issue](https://github.com/cisagov/skeleton-docker/issues) in this +issue](https://github.com/cisagov/vdp-scanner-docker/issues) in this repository. We recommend that you first search through existing issues (both open and closed) to check if your particular issue has already been reported. If it has then you might want to add a comment @@ -25,7 +25,7 @@ one. ## Pull requests ## If you choose to [submit a pull -request](https://github.com/cisagov/skeleton-docker/pulls), you will +request](https://github.com/cisagov/vdp-scanner-docker/pulls), you will notice that our continuous integration (CI) system runs a fairly extensive set of linters and syntax checkers. Your pull request may fail these checks, and that's OK. If you want you can stop there and @@ -111,9 +111,9 @@ can create and configure the Python virtual environment with these commands: ```console -cd skeleton-docker -pyenv virtualenv skeleton-docker -pyenv local skeleton-docker +cd vdp-scanner-docker +pyenv virtualenv vdp-scanner-docker +pyenv local vdp-scanner-docker pip install --requirement requirements-dev.txt ``` diff --git a/Dockerfile b/Dockerfile index 8819053..fc4c3d2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,38 +1,77 @@ -ARG VERSION=unspecified +ARG PY_VERSION=3.9 -FROM python:3.9-alpine - -ARG VERSION +FROM python:${PY_VERSION} AS compile-stage # For a list of pre-defined annotation keys and value types see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md # Note: Additional labels are added by the build workflow. -LABEL org.opencontainers.image.authors="mark.feldhousen@cisa.dhs.gov" +LABEL org.opencontainers.image.authors="nicholas.mcdonnell@cisa.dhs.gov" LABEL org.opencontainers.image.vendor="Cyber and Infrastructure Security Agency" -ARG CISA_UID=421 -ENV CISA_HOME="/home/cisa" -ENV ECHO_MESSAGE="Hello World from Dockerfile" +RUN apt-get update \ + && apt-get install -y --allow-downgrades --no-install-recommends \ + libxml2-dev=2.9.4+dfsg1-7+deb10u1 \ + libxslt1-dev=1.1.32-2.2~deb10u1 + +ENV PY_VENV=/.venv + +# Manually set up the virtual environment +RUN python -m venv --system-site-packages ${PY_VENV} +ENV PATH="${PY_VENV}/bin:$PATH" + +# Install core Python dependencies +RUN python -m pip install --no-cache-dir \ + pip==21.0.1 \ + pipenv==2020.11.15 \ + setuptools==53.0.0 \ + wheel==0.36.2 + +# Install vdp_scanner.py requirements +COPY src/Pipfile Pipfile +COPY src/Pipfile.lock Pipfile.lock +# PIPENV_VENV_IN_PROJECT=1 directs pipenv to use the current directory for venvs +RUN PIPENV_VENV_IN_PROJECT=1 pipenv sync + +# We only need pipenv to set up the environment, so we remove it from the venv +# as a last step. +RUN python -m pip uninstall --yes pipenv + +FROM python:${PY_VERSION}-slim AS build-stage + +ARG SERVERLESS_CHROME_VERSION="v1.0.0-57" +ARG SERVERLESS_CHROME_LOCAL="/usr/local/bin/serverless-chrome" + +RUN apt-get update \ + && apt-get install -y --allow-downgrades --no-install-recommends \ + ca-certificates=20200601~deb10u2 \ + chromium-common=88.0.4324.182-1~deb10u1 \ + curl=7.64.0-4+deb10u2 \ + libnss3=2:3.42.1-1+deb10u3 \ + libxml2-dev=2.9.4+dfsg1-7+deb10u1 \ + libxslt1-dev=1.1.32-2.2~deb10u1 \ + openssl=1.1.1d-0+deb10u6 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* -RUN addgroup --system --gid ${CISA_UID} cisa \ - && adduser --system --uid ${CISA_UID} --ingroup cisa cisa +# Download the specified serverless chrome release and install it for use +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +# Follow redirects and output as the specified file name +RUN curl -L \ + https://github.com/adieuadieu/serverless-chrome/releases/download/${SERVERLESS_CHROME_VERSION}/stable-headless-chromium-amazonlinux-2.zip \ + | gunzip --stdout - > ${SERVERLESS_CHROME_LOCAL} +RUN chmod 755 ${SERVERLESS_CHROME_LOCAL} -RUN apk --update --no-cache add \ -ca-certificates \ -openssl \ -py-pip +ENV PY_VENV=/.venv +COPY --from=compile-stage ${PY_VENV} ${PY_VENV} +ENV PATH="${PY_VENV}/bin:$PATH" -WORKDIR ${CISA_HOME} +ENV TASK_HOME="/task" -RUN wget -O sourcecode.tgz https://github.com/cisagov/skeleton-python-library/archive/v${VERSION}.tar.gz && \ - tar xzf sourcecode.tgz --strip-components=1 && \ - pip install --requirement requirements.txt && \ - ln -snf /run/secrets/quote.txt src/example/data/secret.txt && \ - rm sourcecode.tgz +WORKDIR ${TASK_HOME} +RUN mkdir host_mount -USER cisa +COPY src/version.txt version.txt +COPY src/vdp_scanner.py vdp_scanner.py -EXPOSE 8080/TCP -VOLUME ["/var/log"] -ENTRYPOINT ["example"] -CMD ["--log-level", "DEBUG"] +ENTRYPOINT ["python", "vdp_scanner.py"] +CMD ["github"] diff --git a/README.md b/README.md index 99246a9..5dfa827 100644 --- a/README.md +++ b/README.md @@ -1,72 +1,56 @@ -# skeleton-docker 💀🐳 # +# vdp-scanner-docker 🔍📄 # -[![GitHub Build Status](https://github.com/cisagov/skeleton-docker/workflows/build/badge.svg)](https://github.com/cisagov/skeleton-docker/actions) -[![Total alerts](https://img.shields.io/lgtm/alerts/g/cisagov/skeleton-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/skeleton-docker/alerts/) -[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cisagov/skeleton-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/skeleton-docker/context:python) +[![GitHub Build Status](https://github.com/cisagov/vdp-scanner-docker/workflows/build/badge.svg)](https://github.com/cisagov/vdp-scanner-docker/actions) +[![Total alerts](https://img.shields.io/lgtm/alerts/g/cisagov/vdp-scanner-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/vdp-scanner-docker/alerts/) +[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/cisagov/vdp-scanner-docker.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/cisagov/vdp-scanner-docker/context:python) ## Docker Image ## -[![Docker Pulls](https://img.shields.io/docker/pulls/cisagov/example)](https://hub.docker.com/r/cisagov/example) -[![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/example)](https://hub.docker.com/r/cisagov/example) -[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/skeleton-docker/tags) +[![Docker Pulls](https://img.shields.io/docker/pulls/cisagov/vdp-scanner-docker)](https://hub.docker.com/r/cisagov/vdp-scanner) +[![Docker Image Size (latest by date)](https://img.shields.io/docker/image-size/cisagov/vdp-scanner)](https://hub.docker.com/r/cisagov/vdp-scanner) +[![Platforms](https://img.shields.io/badge/platforms-amd64%20%7C%20arm%2Fv6%20%7C%20arm%2Fv7%20%7C%20arm64%20%7C%20ppc64le%20%7C%20s390x-blue)](https://hub.docker.com/r/cisagov/vdp-scanner/tags) -This is a docker skeleton project that can be used to quickly get a -new [cisagov](https://github.com/cisagov) GitHub docker project -started. This skeleton project contains [licensing -information](LICENSE), as well as [pre-commit hooks](https://pre-commit.com) -and [GitHub Actions](https://github.com/features/actions) configurations -appropriate for docker containers and the major languages that we use. +This is a Docker project to scan either the +[GSA current Federal .gov domain list](https://github.com/GSA/data/blob/master/dotgov-domains/current-federal.csv) +or a given CSV in the same format with the +[cisagov/hash-http-content](https://github.com/cisagov/hash-http-content) +Python library. Then it will output CSVs with agency and domain level results. ## Usage ## ### Install ### -Pull `cisagov/example` from the Docker repository: +Pull `cisagov/vdp-scanner` from the Docker repository: - docker pull cisagov/example +```console +docker pull cisagov/vdp-scanner +``` -Or build `cisagov/example` from source: +Or build `cisagov/vdp-scanner` from source: - git clone https://github.com/cisagov/skeleton-docker.git - cd skeleton-docker - docker-compose build --build-arg VERSION=0.0.1 +```console +git clone https://github.com/cisagov/vdp-scanner-docker.git +cd vdp-scanner-docker +docker-compose build +``` ### Run ### - docker-compose run --rm example +This Docker image needs a bind mount to get the output from the script to the +host. -## Ports ## +Using `docker run` -This container exposes the following ports: +```console +docker run --mount type=bind,source=$(pwd),target=/task/host_mount --rm cisagov/vdp-scanner +``` -| Port | Protocol | Service | -|-------|----------|----------| -| 8080 | TCP | http | +or if you have cloned the repository, you can use the included +`docker-compose.yml` -## Environment Variables ## - -| Variable | Default Value | Purpose | -|---------------|-------------------------------|--------------| -| ECHO_MESSAGE | `Hello World from Dockerfile` | Text to echo | - -## Secrets ## - -| Filename | Purpose | -|---------------|----------------------| -| quote.txt | Secret text to echo | - -## Volumes ## - -| Mount point | Purpose | -|-------------|----------------| -| /var/log | logging output | - -## New Repositories from a Skeleton ## - -Please see our [Project Setup guide](https://github.com/cisagov/development-guide/tree/develop/project_setup) -for step-by-step instructions on how to start a new repository from -a skeleton. This will save you time and effort when configuring a -new repository! +```console +docker-compose up +``` ## Contributing ## diff --git a/docker-compose.yml b/docker-compose.yml index 6774387..a873463 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,35 +3,25 @@ version: "3.7" # This docker-compose file is used to build and test the container -secrets: - quote_txt: - file: ./src/secrets/quote.txt - services: - example: + vdp-scanner: # Run the container normally build: - # VERSION must be specified on the command line: - # e.g., --build-arg VERSION=0.0.1 + # SERVERLESS_CHROME_VERSION and SERVERLESS_CHROME_LOCAL can be specified + # on the command line to modify what is installed and where: + # --build-arg SERVERLESS_CHROME_VERSION=v1.0.0-56 + # --build-arg SERVERLESS_CHROME_LOCAL=/opt/serverless-chrome context: . dockerfile: Dockerfile - image: cisagov/example + image: cisagov/vdp-scanner init: true restart: "no" - environment: - - ECHO_MESSAGE=Hello World from docker-compose! - ports: - - target: "8080" - published: "8080" - protocol: tcp - mode: host - secrets: - - source: quote_txt - target: quote.txt + volumes: + - .:/task/host_mount - example-version: + vdp-scanner-version: # Run the container to collect version information - image: cisagov/example + image: cisagov/vdp-scanner init: true restart: "no" command: --version diff --git a/src/Pipfile b/src/Pipfile new file mode 100644 index 0000000..c95097f --- /dev/null +++ b/src/Pipfile @@ -0,0 +1,18 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +docopt = "*" +hash-http-content = {file = "https://github.com/cisagov/hash-http-content/archive/v0.0.1.tar.gz"} +requests = "*" +urllib3 = "*" +pip = "*" +setuptools = "*" +wheel = "*" + +[dev-packages] + +[requires] +python_version = "3" diff --git a/src/Pipfile.lock b/src/Pipfile.lock new file mode 100644 index 0000000..f081d2e --- /dev/null +++ b/src/Pipfile.lock @@ -0,0 +1,213 @@ +{ + "_meta": { + "hash": { + "sha256": "1db2ac6669815a5e98c2e5e8fad55886f578b2e3b3240f1b56c5e04d95fcfecf" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "appdirs": { + "hashes": [ + "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", + "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" + ], + "version": "==1.4.4" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35", + "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25", + "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666" + ], + "version": "==4.9.3" + }, + "certifi": { + "hashes": [ + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" + ], + "version": "==2020.12.5" + }, + "chardet": { + "hashes": [ + "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", + "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==4.0.0" + }, + "contextlib2": { + "hashes": [ + "sha256:01f490098c18b19d2bd5bb5dc445b2054d2fa97f09a4280ba2c5f3c394c8162e", + "sha256:3355078a159fbb44ee60ea80abd0d87b80b78c248643b49aa6d94673b413609b" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.6.0.post1" + }, + "docopt": { + "hashes": [ + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + ], + "index": "pypi", + "version": "==0.6.2" + }, + "hash-http-content": { + "file": "https://github.com/cisagov/hash-http-content/archive/v0.0.1.tar.gz", + "hashes": [ + "sha256:388abc35517970eba40985df3283af85695aa77810e2a29fe94900484ff8a5b6" + ], + "version": "==0.0.1" + }, + "idna": { + "hashes": [ + "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", + "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.10" + }, + "lxml": { + "hashes": [ + "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d", + "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3", + "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2", + "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f", + "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927", + "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3", + "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7", + "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f", + "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade", + "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468", + "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b", + "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4", + "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83", + "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04", + "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791", + "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51", + "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1", + "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a", + "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f", + "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee", + "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec", + "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969", + "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28", + "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a", + "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa", + "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106", + "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d", + "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4", + "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0", + "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4", + "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2", + "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0", + "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654", + "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2", + "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23", + "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==4.6.3" + }, + "pyee": { + "hashes": [ + "sha256:383973b63ad7ed5e3c0311f8b179c52981f9e7b3eaea0e9a830d13ec34dde65f", + "sha256:92dacc5bd2bdb8f95aa8dd2585d47ca1c4840e2adb95ccf90034d64f725bfd31" + ], + "version": "==8.1.0" + }, + "pyppeteer": { + "hashes": [ + "sha256:c2974be1afa13b17f7ecd120d265d8b8cd324d536a231c3953ca872b68aba4af", + "sha256:d4cb4a5ef94b00c1073aed888b39646ce26cff3339cff7a3f1f1cc307bf50408" + ], + "markers": "python_version < '4' and python_full_version >= '3.6.1'", + "version": "==0.2.5" + }, + "requests": { + "hashes": [ + "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", + "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" + ], + "index": "pypi", + "version": "==2.25.1" + }, + "schema": { + "hashes": [ + "sha256:cf97e4cd27e203ab6bb35968532de1ed8991bce542a646f0ff1d643629a4945d", + "sha256:fbb6a52eb2d9facf292f233adcc6008cffd94343c63ccac9a1cb1f3e6de1db17" + ], + "version": "==0.7.4" + }, + "soupsieve": { + "hashes": [ + "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc", + "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b" + ], + "markers": "python_version >= '3.0'", + "version": "==2.2.1" + }, + "tqdm": { + "hashes": [ + "sha256:9fdf349068d047d4cfbe24862c425883af1db29bcddf4b0eeb2524f6fbdb23c7", + "sha256:d666ae29164da3e517fcf125e41d4fe96e5bb375cd87ff9763f6b38b5592fe33" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==4.59.0" + }, + "urllib3": { + "hashes": [ + "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df", + "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937" + ], + "index": "pypi", + "version": "==1.26.4" + }, + "websockets": { + "hashes": [ + "sha256:0e4fb4de42701340bd2353bb2eee45314651caa6ccee80dbd5f5d5978888fed5", + "sha256:1d3f1bf059d04a4e0eb4985a887d49195e15ebabc42364f4eb564b1d065793f5", + "sha256:20891f0dddade307ffddf593c733a3fdb6b83e6f9eef85908113e628fa5a8308", + "sha256:295359a2cc78736737dd88c343cd0747546b2174b5e1adc223824bcaf3e164cb", + "sha256:2db62a9142e88535038a6bcfea70ef9447696ea77891aebb730a333a51ed559a", + "sha256:3762791ab8b38948f0c4d281c8b2ddfa99b7e510e46bd8dfa942a5fff621068c", + "sha256:3db87421956f1b0779a7564915875ba774295cc86e81bc671631379371af1170", + "sha256:3ef56fcc7b1ff90de46ccd5a687bbd13a3180132268c4254fc0fa44ecf4fc422", + "sha256:4f9f7d28ce1d8f1295717c2c25b732c2bc0645db3215cf757551c392177d7cb8", + "sha256:5c01fd846263a75bc8a2b9542606927cfad57e7282965d96b93c387622487485", + "sha256:5c65d2da8c6bce0fca2528f69f44b2f977e06954c8512a952222cea50dad430f", + "sha256:751a556205d8245ff94aeef23546a1113b1dd4f6e4d102ded66c39b99c2ce6c8", + "sha256:7ff46d441db78241f4c6c27b3868c9ae71473fe03341340d2dfdbe8d79310acc", + "sha256:965889d9f0e2a75edd81a07592d0ced54daa5b0785f57dc429c378edbcffe779", + "sha256:9b248ba3dd8a03b1a10b19efe7d4f7fa41d158fdaa95e2cf65af5a7b95a4f989", + "sha256:9bef37ee224e104a413f0780e29adb3e514a5b698aabe0d969a6ba426b8435d1", + "sha256:c1ec8db4fac31850286b7cd3b9c0e1b944204668b8eb721674916d4e28744092", + "sha256:c8a116feafdb1f84607cb3b14aa1418424ae71fee131642fc568d21423b51824", + "sha256:ce85b06a10fc65e6143518b96d3dca27b081a740bae261c2fb20375801a9d56d", + "sha256:d705f8aeecdf3262379644e4b55107a3b55860eb812b673b28d0fbc347a60c55", + "sha256:e898a0863421650f0bebac8ba40840fc02258ef4714cb7e1fd76b6a6354bda36", + "sha256:f8a7bff6e8664afc4e6c28b983845c5bc14965030e3fb98789734d416af77c4b" + ], + "markers": "python_full_version >= '3.6.1'", + "version": "==8.1" + }, + "wheel": { + "hashes": [ + "sha256:78b5b185f0e5763c26ca1e324373aadd49182ca90e825f7853f4b2509215dc0e", + "sha256:e11eefd162658ea59a60a0f6c7d493a7190ea4b9a85e335b33489d9f17e0245e" + ], + "index": "pypi", + "version": "==0.36.2" + } + }, + "develop": {} +} diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..50de5f4 --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,4 @@ +docopt +https://github.com/cisagov/hash-http-content/archive/v0.0.1.tar.gz +requests +urllib3 diff --git a/src/secrets/quote.txt b/src/secrets/quote.txt deleted file mode 100644 index 93ee1a8..0000000 --- a/src/secrets/quote.txt +++ /dev/null @@ -1 +0,0 @@ -There are no secrets better kept than the secrets everybody guesses. diff --git a/src/vdp_scanner.py b/src/vdp_scanner.py new file mode 100644 index 0000000..85df97d --- /dev/null +++ b/src/vdp_scanner.py @@ -0,0 +1,307 @@ +"""Check current federal DotGov domains for a Vulnerability Disclosure Policy (VDP). + +Usage: + vdp_scanner.py [options] local FILE + vdp_scanner.py [options] github + +Arguments: + FILE The local CSV file to use. + +Options: + -h, --help Show this help message. + -v, --version Show script version. + -d, --debug Enable debugging output. + -a, --agency-csv=AGENCY_CSV Filename to use for agency results. + -t, --domain-csv=DOMAIN_CSV Filename to use for domain (TLD) results. + -p, --path-to-chrome=PATH Path to the serverless-chrome binary being used + [default: /usr/local/bin/serverless-chrome] +""" + +# Standard Python Libraries +from collections import defaultdict +import csv +from datetime import datetime +import logging +from os.path import join as path_join +from typing import Any, Dict, List, NamedTuple, Optional, Tuple +from urllib.parse import urlparse, urlunparse + +# Third-Party Libraries +import docopt +import requests +import urllib3 + +# cisagov Libraries +from hash_http_content import UrlHasher, UrlResult + + +class DomainResult(NamedTuple): + """Structured format for a domain check result.""" + + domain: str + agency: str + organization: str + security_contact: str + visited_url: str + is_redirect: bool + vdp_present: bool + + +class VdpScanner: + """Class to handle scanning and outputting the results of any scans.""" + + # Value that represents a missing security contact in a GSA formatted domain + # list CSV. + MISSING_SECURITY_CONTACT = "(blank)" + + # Header for the agency level results CSV. + agency_csv_header = [ + "Agency", + "Total Domains", + "Domains with Security Contact Listed", + "Domains with Organization Listed", + "Domains with Matching Organization and Agency", + "Domains with Published VDP", + ] + + # Header for the domain level results CSV. + domain_csv_header = [ + "Domain", + "Agency", + "Organization", + "Security Contact Email", + "Visited URL", + "Was it Redirected", + "VDP is Published", + ] + + def __init__(self, hasher: UrlHasher): + """Initialize variables and perform setup.""" + self._hasher = hasher + file_date = datetime.utcnow().strftime("%Y-%m-%d") + self.agency_csv = f"agency_results_{file_date}.csv" + self.domain_csv = f"domain_results_{file_date}.csv" + self.output_directory = "host_mount" + + self.agency_results: defaultdict = defaultdict( + lambda: {k: 0 for k in self.agency_csv_header[1:]} + ) + + self.domain_results: List[Dict[str, Any]] = [] + + @staticmethod + def _log_vdp_failure(domain: str, err: Exception) -> None: + """Log failure information during check_for_vdp() execution.""" + logging.warning("Unable to retrieve hash for '%s'", domain) + logging.debug("Caught %s", type(err).__name__) + logging.debug(err) + + def check_for_vdp(self, domain: str) -> Tuple[str, bool, bool]: + """Check for a VDP at the given domain and return the relavent information.""" + url = urlparse(f"https://{domain}/vulnerability-disclosure-policy") + result: Optional[UrlResult] = None + + # Try with HTTPS first + try: + result = self._hasher.hash_url(urlunparse(url)) + # If there is a TLS issue, try running it without verifying + except requests.exceptions.SSLError: + logging.warning( + "Falling back to HTTPS without TLS verification for '%s'", domain + ) + try: + # Fallback to unverified TLS + result = self._hasher.hash_url(urlunparse(url), verify=False) + # If this also fails, fallback to HTTP + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + logging.warning("Falling back to HTTP for '%s'", domain) + # Try connecting to the HTTP endpoint instead + try: + result = self._hasher.hash_url( + urlunparse(url._replace(scheme="http")) + ) + # If we're unable to successfully retrieve the URL for some reason + except Exception as err: + self._log_vdp_failure(domain, err) + # Fallback to HTTP in case there is no HTTPS for the given domain + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + logging.warning("Falling back to HTTP for '%s'", domain) + # Try connecting to the HTTP endpoint instead + try: + result = self._hasher.hash_url(urlunparse(url._replace(scheme="http"))) + # If we're unable to successfully retrieve the URL for some reason + except Exception as err: + self._log_vdp_failure(domain, err) + except Exception as err: + self._log_vdp_failure(domain, err) + + if not result: + return ("", False, False) + + if result.status == 200: + return (result.visited_url, result.is_redirect, True) + + return (result.visited_url, result.is_redirect, False) + + def process_domain(self, domain_info: Dict[str, Any]) -> None: + """Process a domain entry from the DotGov CSV.""" + # These are direct copies from current-federal.csv + vdp_result = self.check_for_vdp(domain_info["Domain Name"]) + + self.add_domain_result( + DomainResult( + domain_info["Domain Name"], + domain_info["Agency"], + domain_info["Organization"], + domain_info["Security Contact Email"], + *vdp_result, + ) + ) + + def add_domain_result(self, result: DomainResult) -> None: + """Process the provided results for a domain.""" + result_dict = { + "Domain": result.domain, + "Agency": result.agency, + "Organization": result.organization, + "Security Contact Email": result.security_contact, + "Visited URL": result.visited_url, + "Was it Redirected": result.is_redirect, + "VDP is Published": result.vdp_present, + } + self.domain_results.append(result_dict) + + self.agency_results[result.agency]["Total Domains"] += 1 + + if ( + result.security_contact + and result.security_contact != self.MISSING_SECURITY_CONTACT + ): + self.agency_results[result.agency][ + "Domains with Security Contact Listed" + ] += 1 + + if result.organization: + self.agency_results[result.agency]["Domains with Organization Listed"] += 1 + + if result.agency == result.organization: + self.agency_results[result.agency][ + "Domains with Matching Organization and Agency" + ] += 1 + + if result.vdp_present: + self.agency_results[result.agency]["Domains with Published VDP"] += 1 + + def output_agency_csv(self) -> None: + """Output the agency results to a CSV.""" + file = path_join(self.output_directory, self.agency_csv) + with open(file, "w") as csv_out: + agency_output = csv.DictWriter( + csv_out, fieldnames=VdpScanner.agency_csv_header + ) + agency_output.writeheader() + for agency, info in self.agency_results.items(): + output_dict = {"Agency": agency, **info} + agency_output.writerow(output_dict) + + def output_domain_csv(self) -> None: + """Output the agency results to a CSV.""" + file = path_join(self.output_directory, self.domain_csv) + with open(file, "w") as csv_out: + domain_output = csv.DictWriter( + csv_out, fieldnames=VdpScanner.domain_csv_header + ) + domain_output.writeheader() + for result in self.domain_results: + domain_output.writerow(result) + + def output_all_csvs(self) -> None: + """Output all CSVs.""" + self.output_agency_csv() + self.output_domain_csv() + + +def get_version(version_file) -> str: + """Extract a version number from the given file path.""" + with open(version_file) as vfile: + for line in vfile.read().splitlines(): + if line.startswith("__version__"): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + + raise RuntimeError("Unable to find version string.") + + +def get_local_csv(file: str) -> List[Dict[str, str]]: + """Load domains from a local CSV file.""" + with open(file) as csv_file: + csv_lines = [line.rstrip() for line in csv_file.readlines()] + + return list(csv.DictReader(csv_lines)) + + +def get_remote_csv() -> List[Dict[str, str]]: + """Load domains from the CSV at the given URL.""" + resp = requests.get( + "https://raw.githubusercontent.com/GSA/data/master/dotgov-domains/current-federal.csv" + ) + if resp.status_code != 200: + return [] + csv_lines = [str(line, resp.encoding) for line in resp.iter_lines()] + + return list(csv.DictReader(csv_lines)) + + +def main(): + """Scan hosts with the hash-http-content package and output results.""" + __version__: str = get_version("version.txt") + args: Dict[str, Any] = docopt.docopt(__doc__, version=__version__) + + log_level = logging.DEBUG if args["--debug"] else logging.INFO + logging.basicConfig( + format="%(asctime)-15s %(levelname)s %(message)s", level=log_level + ) + + # If we make a call to UrlHasher.hash_url() with verify=False, it will output + # a warning. Since this is a fallback mechanism, we can squelch these warnings. + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + browser_opts = { + "args": [ + "--no-sandbox", + "--disable-gpu", + "--disable-dev-shm-usage", + "--no-zygote", + ], + "executablePath": args["--path-to-chrome"], + } + http_hasher = UrlHasher("sha256", browser_options=browser_opts) + + scanner: VdpScanner = VdpScanner(http_hasher) + if args["--agency-csv"]: + scanner.agency_csv = args["--agency-csv"] + if args["--domain-csv"]: + scanner.domain_csv = args["--domain-csv"] + + domains_to_scan: List[Dict[str, str]] + + if args["local"]: + domains_to_scan = get_local_csv(path_join("host_mount", args["FILE"])) + + if args["github"]: + domains_to_scan = get_remote_csv() + + total_domains = len(domains_to_scan) + for i, domain_info in enumerate( + sorted(domains_to_scan, key=lambda d: d["Domain Name"]), start=1 + ): + logging.info( + "Processing '%s' (%d/%d)...", domain_info["Domain Name"], i, total_domains + ) + scanner.process_domain(domain_info) + + scanner.output_all_csvs() + + +if __name__ == "__main__": + main() diff --git a/tests/conftest.py b/tests/conftest.py index 28d6c42..4c4452a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ # Third-Party Libraries import pytest -MAIN_SERVICE_NAME = "example" +MAIN_SERVICE_NAME = "vdp-scanner" VERSION_SERVICE_NAME = f"{MAIN_SERVICE_NAME}-version" diff --git a/tests/container_test.py b/tests/container_test.py index 6153028..07d062a 100644 --- a/tests/container_test.py +++ b/tests/container_test.py @@ -1,19 +1,12 @@ #!/usr/bin/env pytest -vs -"""Tests for example container.""" +"""Tests for vdp-scanner container.""" # Standard Python Libraries import os -import time # Third-Party Libraries import pytest -ENV_VAR = "ECHO_MESSAGE" -ENV_VAR_VAL = "Hello World from docker-compose!" -READY_MESSAGE = "This is a debug message" -SECRET_QUOTE = ( - "There are no secrets better kept than the secrets everybody guesses." # nosec -) RELEASE_TAG = os.getenv("RELEASE_TAG") VERSION_FILE = "src/version.txt" @@ -26,35 +19,6 @@ def test_container_count(dockerc): ), "Wrong number of containers were started." -def test_wait_for_ready(main_container): - """Wait for container to be ready.""" - TIMEOUT = 10 - for i in range(TIMEOUT): - if READY_MESSAGE in main_container.logs().decode("utf-8"): - break - time.sleep(1) - else: - raise Exception( - f"Container does not seem ready. " - f'Expected "{READY_MESSAGE}" in the log within {TIMEOUT} seconds.' - ) - - -def test_wait_for_exits(main_container, version_container): - """Wait for containers to exit.""" - assert main_container.wait() == 0, "Container service (main) did not exit cleanly" - assert ( - version_container.wait() == 0 - ), "Container service (version) did not exit cleanly" - - -def test_output(main_container): - """Verify the container had the correct output.""" - main_container.wait() # make sure container exited if running test isolated - log_output = main_container.logs().decode("utf-8") - assert SECRET_QUOTE in log_output, "Secret not found in log output." - - @pytest.mark.skipif( RELEASE_TAG in [None, ""], reason="this is not a release (RELEASE_TAG not set)" )