From 6b678b3ad22be28324e3e411d6436fef88cd5dbd Mon Sep 17 00:00:00 2001 From: Paige Gulley Date: Wed, 26 Jun 2024 13:09:16 -0400 Subject: [PATCH] Add Airtable Update Script (#303) * Airtable upgrade script, related requirement update * added update call to deploy.sh * Removed the app interface, other tweaks * Added info field to airtable record, so we still get a record if things are configured weird * friendlier options for env * Remove bespoke script, use central management repository --------- Co-authored-by: Paige Gulley Co-authored-by: Paige Gulley --- .pre-commit-config.yaml | 8 +-- docker/deploy.sh | 13 +++++ pyproject.toml | 2 + requirements-dev.txt | 118 +++++++++++++++++++++------------------- requirements.txt | 70 ++++++++++++------------ 5 files changed, 117 insertions(+), 94 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc566769..84053c99 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: http://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -11,18 +11,18 @@ repos: - id: check-json - id: check-toml - repo: http://github.com/ambv/black - rev: 24.2.0 + rev: 24.4.2 hooks: - id: black language_version: python3.10 - repo: http://github.com/pre-commit/mirrors-mypy - rev: v1.9.0 + rev: v1.10.0 hooks: - id: mypy entry: bin/pre-commit-wrapper.py mypy additional_dependencies: ["pip==22.0.*"] - repo: http://github.com/pycqa/flake8 - rev: 7.0.0 + rev: 7.1.0 hooks: - id: flake8 - repo: http://github.com/pycqa/isort diff --git a/docker/deploy.sh b/docker/deploy.sh index bef75ef4..1478df41 100755 --- a/docker/deploy.sh +++ b/docker/deploy.sh @@ -40,6 +40,11 @@ if [ "x$(which jinja2)" = x ]; then fi fi +if ! python3 -m mc-manage.airtable-deployment-update --help >/dev/null; then + echo FATAL: deployment requires an up-to-date venv with pyairtable requirements 1>&2 + exit 3 +fi + # capture command line DEPLOYMENT_OPTIONS="$*" @@ -841,3 +846,11 @@ echo "$DATE_TIME $HOSTNAME $STACK_NAME $NOTE" >> deploy.log # XXX chown to LOGIN_USER? # optionally prune old images? + +#report deployment to airtable +export AIRTABLE_API_KEY +export MEAG_BASE_ID +if [ "x$AIRTABLE_API_KEY" != x ]; then + ##Is DEPLOYMENT_HOST always right here? as far as I can tell all the stacks get thrown onto ramos regardless. + python3 -m mc-manage.airtable-deployment-update --codebase "story-indexer" --name $STACK_NAME --env $DEPLOYMENT_TYPE --version $IMAGE_TAG --hardware $HOSTNAME +fi diff --git a/pyproject.toml b/pyproject.toml index 2754e197..e45acb85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,10 @@ dev = [ "boto3-stubs[s3] ~= 1.34.13", "jinja2-cli ~= 0.8.2", "lxml-stubs ~= 0.5.1", + "mc-manage @ git+https://github.com/mediacloud/mc-manage@v1.1.4", "mypy ~= 1.5.1", "pre-commit ~= 3.4.0", + "pyairtable ~= 2.3.3", "pytest ~= 7.4.2", "types-beautifulsoup4 ~= 4.12.0.20240106", "types-pika ~= 1.2.0b1", diff --git a/requirements-dev.txt b/requirements-dev.txt index 9e1b0399..5f79f258 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,9 +1,11 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile --allow-unsafe --extra=dev --output-file=requirements-dev.txt --strip-extras pyproject.toml # +annotated-types==0.7.0 + # via pydantic attrs==23.2.0 # via # automat @@ -11,6 +13,8 @@ attrs==23.2.0 # twisted automat==22.10.0 # via twisted +babel==2.15.0 + # via courlan beautifulsoup4==4.12.3 # via # feedfinder2 @@ -22,15 +26,15 @@ boilerpy3==1.0.7 # via mediacloud-metadata boto3==1.28.85 # via story-indexer (pyproject.toml) -boto3-stubs==1.34.62 +boto3-stubs==1.34.128 # via story-indexer (pyproject.toml) botocore==1.31.85 # via # boto3 # s3transfer -botocore-stubs==1.34.62 +botocore-stubs==1.34.128 # via boto3-stubs -certifi==2024.2.2 +certifi==2024.6.2 # via # elastic-transport # requests @@ -51,9 +55,9 @@ click==8.1.7 # via nltk constantly==23.10.4 # via twisted -courlan==1.0.0 +courlan==1.2.0 # via trafilatura -cryptography==42.0.5 +cryptography==42.0.8 # via # pyopenssl # scrapy @@ -69,23 +73,23 @@ dateparser==1.2.0 # via # htmldate # mediacloud-metadata +defusedxml==0.7.1 + # via scrapy distlib==0.3.8 # via virtualenv docker==6.1.3 # via story-indexer (pyproject.toml) -elastic-transport==8.12.0 +elastic-transport==8.13.1 # via elasticsearch elasticsearch==8.12.1 # via story-indexer (pyproject.toml) -exceptiongroup==1.2.0 - # via pytest faust-cchardet==2.1.19 # via mediacloud-metadata feedfinder2==0.0.4 # via newspaper3k feedparser==6.0.11 # via newspaper3k -filelock==3.13.1 +filelock==3.15.1 # via # tldextract # virtualenv @@ -99,26 +103,28 @@ htmldate==1.7.0 # trafilatura hyperlink==21.0.0 # via twisted -identify==2.5.35 +identify==2.5.36 # via pre-commit -idna==3.6 +idna==3.7 # via # hyperlink # requests # tldextract incremental==22.10.0 # via twisted +inflection==0.5.1 + # via pyairtable iniconfig==2.0.0 # via pytest -itemadapter==0.8.0 +itemadapter==0.9.0 # via # itemloaders # scrapy -itemloaders==1.1.0 +itemloaders==1.3.1 # via scrapy jieba3k==0.35.1 # via newspaper3k -jinja2==3.1.3 +jinja2==3.1.4 # via jinja2-cli jinja2-cli==0.8.2 # via story-indexer (pyproject.toml) @@ -128,12 +134,10 @@ jmespath==1.0.1 # botocore # itemloaders # parsel -joblib==1.3.2 +joblib==1.4.2 # via nltk -justext==3.0.0 +justext==3.0.1 # via trafilatura -langcodes==3.3.0 - # via courlan langdetect==1.0.9 # via goose3 lxml==4.9.4 @@ -154,7 +158,7 @@ mediacloud-metadata==0.12.0 # via story-indexer (pyproject.toml) mypy==1.5.1 # via story-indexer (pyproject.toml) -mypy-boto3-s3==1.34.62 +mypy-boto3-s3==1.34.120 # via boto3-stubs mypy-extensions==1.0.0 # via mypy @@ -162,48 +166,52 @@ newspaper3k==0.2.8 # via mediacloud-metadata nltk==3.8.1 # via newspaper3k -nodeenv==1.8.0 +nodeenv==1.9.1 # via pre-commit -numpy==1.26.4 +numpy==2.0.0 # via py3langid orderedmultidict==1.0.1 # via furl -packaging==24.0 +packaging==24.1 # via # docker # parsel # pytest # scrapy -parsel==1.9.0 +parsel==1.9.1 # via # itemloaders # scrapy pika==1.3.2 # via story-indexer (pyproject.toml) -pillow==10.2.0 +pillow==10.3.0 # via # goose3 # newspaper3k -platformdirs==4.2.0 +platformdirs==4.2.2 # via virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via pytest pre-commit==3.4.0 # via story-indexer (pyproject.toml) -protego==0.3.0 +protego==0.3.1 # via scrapy py3langid==0.2.2 # via mediacloud-metadata -pyahocorasick==2.0.0 +pyahocorasick==2.1.0 # via goose3 -pyasn1==0.5.1 +pyasn1==0.6.0 # via # pyasn1-modules # service-identity -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.0 # via service-identity -pycparser==2.21 +pycparser==2.22 # via cffi +pydantic==2.7.4 + # via pyairtable +pydantic-core==2.18.4 + # via pydantic pydispatcher==2.0.7 # via scrapy pyopenssl==24.1.0 @@ -223,31 +231,32 @@ pyyaml==6.0.1 # via # newspaper3k # pre-commit -queuelib==1.6.2 +queuelib==1.7.0 # via scrapy rabbitmq-admin==0.2 # via story-indexer (pyproject.toml) readability-lxml==0.8.1 # via mediacloud-metadata -regex==2023.12.25 +regex==2024.5.15 # via # dateparser # nltk -requests==2.31.0 +requests==2.32.3 # via # docker # feedfinder2 # goose3 # mediacloud-metadata # newspaper3k + # pyairtable # rabbitmq-admin # requests-file # tldextract -requests-file==2.0.0 +requests-file==2.1.0 # via tldextract s3transfer==0.7.0 # via boto3 -scrapy==2.11.1 +scrapy==2.11.2 # via story-indexer (pyproject.toml) sentry-sdk==1.34.0 # via story-indexer (pyproject.toml) @@ -279,39 +288,36 @@ tinysegmenter==0.3 # via newspaper3k tld==0.13 # via courlan -tldextract==5.1.1 +tldextract==5.1.2 # via # mediacloud-metadata # newspaper3k # scrapy # surt -tomli==2.0.1 - # via - # mypy - # pytest -tqdm==4.66.2 +tqdm==4.66.4 # via nltk trafilatura==1.6.4 # via mediacloud-metadata twisted==24.3.0 # via scrapy -types-awscrt==0.20.5 +types-awscrt==0.20.12 # via botocore-stubs -types-beautifulsoup4==4.12.0.20240229 +types-beautifulsoup4==4.12.0.20240511 # via story-indexer (pyproject.toml) types-html5lib==1.1.11.20240228 # via types-beautifulsoup4 types-pika==1.2.0b1 # via story-indexer (pyproject.toml) -types-requests==2.31.0.20240311 +types-requests==2.31.0.20240406 # via story-indexer (pyproject.toml) -types-s3transfer==0.10.0 +types-s3transfer==0.10.1 # via boto3-stubs -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via - # boto3-stubs # mypy - # mypy-boto3-s3 + # pyairtable + # pydantic + # pydantic-core # twisted tzlocal==5.2 # via dateparser @@ -324,30 +330,30 @@ urllib3==2.0.7 # docker # elastic-transport # htmldate + # pyairtable # requests # sentry-sdk # trafilatura # types-requests -virtualenv==20.25.1 +virtualenv==20.26.2 # via pre-commit -w3lib==2.1.2 +w3lib==2.2.1 # via # itemloaders # parsel # scrapy warcio==1.7.4 # via story-indexer (pyproject.toml) -websocket-client==1.7.0 +websocket-client==1.8.0 # via docker -zope-interface==6.2 +zope-interface==6.4.post2 # via # scrapy # twisted # The following packages are considered to be unsafe in a requirements file: -setuptools==69.2.0 +setuptools==70.0.0 # via - # nodeenv # scrapy # supervisor # zope-interface diff --git a/requirements.txt b/requirements.txt index 63fdf301..1754bd57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile --allow-unsafe --strip-extras pyproject.toml @@ -11,6 +11,8 @@ attrs==23.2.0 # twisted automat==22.10.0 # via twisted +babel==2.15.0 + # via courlan beautifulsoup4==4.12.3 # via # feedfinder2 @@ -26,7 +28,7 @@ botocore==1.31.85 # via # boto3 # s3transfer -certifi==2024.2.2 +certifi==2024.6.2 # via # elastic-transport # requests @@ -45,9 +47,9 @@ click==8.1.7 # via nltk constantly==23.10.4 # via twisted -courlan==1.0.0 +courlan==1.2.0 # via trafilatura -cryptography==42.0.5 +cryptography==42.0.8 # via # pyopenssl # scrapy @@ -63,9 +65,11 @@ dateparser==1.2.0 # via # htmldate # mediacloud-metadata +defusedxml==0.7.1 + # via scrapy docker==6.1.3 # via story-indexer (pyproject.toml) -elastic-transport==8.12.0 +elastic-transport==8.13.1 # via elasticsearch elasticsearch==8.12.1 # via story-indexer (pyproject.toml) @@ -75,7 +79,7 @@ feedfinder2==0.0.4 # via newspaper3k feedparser==6.0.11 # via newspaper3k -filelock==3.13.1 +filelock==3.15.1 # via tldextract furl==2.1.3 # via mediacloud-metadata @@ -87,18 +91,18 @@ htmldate==1.7.0 # trafilatura hyperlink==21.0.0 # via twisted -idna==3.6 +idna==3.7 # via # hyperlink # requests # tldextract incremental==22.10.0 # via twisted -itemadapter==0.8.0 +itemadapter==0.9.0 # via # itemloaders # scrapy -itemloaders==1.1.0 +itemloaders==1.3.1 # via scrapy jieba3k==0.35.1 # via newspaper3k @@ -108,12 +112,10 @@ jmespath==1.0.1 # botocore # itemloaders # parsel -joblib==1.3.2 +joblib==1.4.2 # via nltk -justext==3.0.0 +justext==3.0.1 # via trafilatura -langcodes==3.3.0 - # via courlan langdetect==1.0.9 # via goose3 lxml==4.9.4 @@ -132,38 +134,38 @@ newspaper3k==0.2.8 # via mediacloud-metadata nltk==3.8.1 # via newspaper3k -numpy==1.26.4 +numpy==2.0.0 # via py3langid orderedmultidict==1.0.1 # via furl -packaging==24.0 +packaging==24.1 # via # docker # parsel # scrapy -parsel==1.9.0 +parsel==1.9.1 # via # itemloaders # scrapy pika==1.3.2 # via story-indexer (pyproject.toml) -pillow==10.2.0 +pillow==10.3.0 # via # goose3 # newspaper3k -protego==0.3.0 +protego==0.3.1 # via scrapy py3langid==0.2.2 # via mediacloud-metadata -pyahocorasick==2.0.0 +pyahocorasick==2.1.0 # via goose3 -pyasn1==0.5.1 +pyasn1==0.6.0 # via # pyasn1-modules # service-identity -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.0 # via service-identity -pycparser==2.21 +pycparser==2.22 # via cffi pydispatcher==2.0.7 # via scrapy @@ -180,17 +182,17 @@ pytz==2024.1 # via dateparser pyyaml==6.0.1 # via newspaper3k -queuelib==1.6.2 +queuelib==1.7.0 # via scrapy rabbitmq-admin==0.2 # via story-indexer (pyproject.toml) readability-lxml==0.8.1 # via mediacloud-metadata -regex==2023.12.25 +regex==2024.5.15 # via # dateparser # nltk -requests==2.31.0 +requests==2.32.3 # via # docker # feedfinder2 @@ -200,11 +202,11 @@ requests==2.31.0 # rabbitmq-admin # requests-file # tldextract -requests-file==2.0.0 +requests-file==2.1.0 # via tldextract s3transfer==0.7.0 # via boto3 -scrapy==2.11.1 +scrapy==2.11.2 # via story-indexer (pyproject.toml) sentry-sdk==1.34.0 # via story-indexer (pyproject.toml) @@ -236,19 +238,19 @@ tinysegmenter==0.3 # via newspaper3k tld==0.13 # via courlan -tldextract==5.1.1 +tldextract==5.1.2 # via # mediacloud-metadata # newspaper3k # scrapy # surt -tqdm==4.66.2 +tqdm==4.66.4 # via nltk trafilatura==1.6.4 # via mediacloud-metadata twisted==24.3.0 # via scrapy -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via twisted tzlocal==5.2 # via dateparser @@ -264,22 +266,22 @@ urllib3==2.0.7 # requests # sentry-sdk # trafilatura -w3lib==2.1.2 +w3lib==2.2.1 # via # itemloaders # parsel # scrapy warcio==1.7.4 # via story-indexer (pyproject.toml) -websocket-client==1.7.0 +websocket-client==1.8.0 # via docker -zope-interface==6.2 +zope-interface==6.4.post2 # via # scrapy # twisted # The following packages are considered to be unsafe in a requirements file: -setuptools==69.2.0 +setuptools==70.0.0 # via # scrapy # supervisor