Skip to content

Commit

Permalink
First steps to lambda deploy
Browse files Browse the repository at this point in the history
So far this commit adds a SAM template to create a lambda function to
build a queue of scrapers to run, send this to an SQS queue, and
another lambda to run the scrapers.
  • Loading branch information
GeoWill committed Jun 16, 2021
1 parent 974bcb2 commit 3482e26
Show file tree
Hide file tree
Showing 10 changed files with 669 additions and 71 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ data
.DS_Store
.pytest_cache
*.sqlite
.aws-sam


# Byte-compiled / optimized / DLL files
Expand Down
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ python-dateutil = "*"
retry = "*"
black = "*"
rich = "*"
boto3 = "*"

[dev-packages]
pytest-mypy-plugins = "*"
aws-sam-cli = "*"

[requires]
python_version = "3.8"
Expand Down
529 changes: 461 additions & 68 deletions Pipfile.lock

Large diffs are not rendered by default.

Empty file added lgsf/aws_lambda/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions lgsf/aws_lambda/fixtures/sqs-message.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"Records": [
{
"messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78",
"receiptHandle": "MessageReceiptHandle",
"body": "{\"scraper_type\": \"councillors\",\"council\": \"WLV\"}",
"attributes": {
"ApproximateReceiveCount": "1",
"SentTimestamp": "1523232000000",
"SenderId": "123456789012",
"ApproximateFirstReceiveTimestamp": "1523232000001"
},
"messageAttributes": {},
"md5OfBody": "7b270e59b47ff90a553787216d55d91d",
"eventSource": "aws:sqs",
"eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:MyQueue",
"awsRegion": "eu-west-2"
}
]
}
34 changes: 34 additions & 0 deletions lgsf/aws_lambda/handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import json
import sys
import boto3
from lgsf.councillors.commands import Command
from lgsf.path_utils import load_scraper


def scraper_worker_handler(event, context):
print(event)
message = json.loads(event["Records"][0]["body"])
council = message["council"]
command = Command(argv=['', '--council', council], stdout=sys.stdout, execute=False)
scraper_cls = load_scraper(council, command.command_name)
options = {"council": council}
scraper = scraper_cls(options, None)
scraper.run()


def queue_builder_handler(event, context):
councillors_command = Command(argv=['', '--all-councils'], stdout=sys.stdout, execute=False)
councillors_command.options = {'all_councils': True}
councils = councillors_command.councils_to_run()

sqs = boto3.resource('sqs')

queue = sqs.get_queue_by_name(QueueName="ScraperQueue")

for council in councils:
message = {
"scraper_type": "councillors",
"council": council
} # TODO Define this somewhere else so scraper_worker_handler can share it.
queue.send_message(MessageBody=json.dumps(message))
print(message)
7 changes: 4 additions & 3 deletions lgsf/commands/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
class CommandBase(metaclass=abc.ABCMeta):
command_name = None

def __init__(self, argv, stdout):
def __init__(self, argv, stdout, execute=True):
self.argv = argv
self.create_parser()
self.stdout = stdout
self.console = Console(file=self.stdout)

# After all local vars are set up
self.execute()
if execute:
# After all local vars are set up
self.execute()

def create_parser(self):
self.parser = argparse.ArgumentParser()
Expand Down
74 changes: 74 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#
# These requirements were autogenerated by pipenv
# To regenerate from the project's Pipfile, run:
#
# pipenv lock --requirements
#

-i https://pypi.python.org/simple
appdirs==1.4.4
astroid==2.5.6; python_version ~= '3.6'
attrs==21.2.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
backcall==0.2.0
beautifulsoup4==4.9.3
black==21.6b0
certifi==2021.5.30
cffi==1.14.5
chardet==4.0.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
click==8.0.1; python_version >= '3.6'
colorama==0.4.4; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
commonmark==0.9.1
cryptography==3.4.7; python_version >= '3.6'
decorator==5.0.9; python_version >= '3.5'
html5lib==1.1
idna==2.10; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
iniconfig==1.1.1
ipdb==0.13.9
ipython-genutils==0.2.0
ipython==7.24.1; python_version >= '3.7'
isort==5.8.0; python_version >= '3.6' and python_version < '4.0'
itsdangerous==2.0.1; python_version >= '3.6'
jedi==0.18.0; python_version >= '3.6'
lazy-object-proxy==1.6.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
lxml==4.6.3
matplotlib-inline==0.1.2; python_version >= '3.5'
mccabe==0.6.1
mypy-extensions==0.4.3
packaging==20.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
parso==0.8.2; python_version >= '3.6'
pathspec==0.8.1
pexpect==4.8.0; sys_platform != 'win32'
pickleshare==0.7.5
pluggy==0.13.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
prompt-toolkit==3.0.18; python_full_version >= '3.6.1'
ptyprocess==0.7.0
py==1.10.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pycodestyle==2.7.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pycparser==2.20; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pyflakes==2.3.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
pygments==2.9.0; python_version >= '3.5'
pylint==2.8.3; python_version ~= '3.6'
pyopenssl==20.0.1
pyparsing==2.4.7; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
pytest-blockage==0.2.2
pytest-codestyle==2.0.1
pytest-flakes==4.0.3
pytest-pylint==0.18.0
pytest==6.2.4
python-dateutil==2.8.1
python-slugify==5.0.2
regex==2021.4.4
requests-cache==0.6.4
requests==2.25.1
retry==0.9.2
rich==10.3.0
six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
soupsieve==2.2.1; python_version >= '3.0'
text-unidecode==1.3
toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
traitlets==5.0.5; python_version >= '3.7'
url-normalize==1.4.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'
urllib3==1.26.5; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4.0'
wcwidth==0.2.5
webencodings==0.5.1
wrapt==1.12.1
63 changes: 63 additions & 0 deletions sam-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31
Description: >
lgsf-sam
Sample SAM Template for lgsf-sam
# More info about Globals: https://github.com/awslabs/serverless-application-model/blob/master/docs/globals.rst
Globals:
Function:
Timeout: 3


Resources:
QueueBuilderFunction:
Type: AWS::Serverless::Function # More info about Function Resource: https://github.com/awslabs/serverless-application-model/blob/master/versions/2016-10-31.md#awsserverlessfunction
Properties:
CodeUri: .
Handler: lgsf.aws_lambda.handlers.queue_builder_handler
Runtime: python3.8
Events:
QueueBuilder:
Type: Schedule # More info about Schedule Event Source: https://github.com/aws/serverless-application-model/blob/master/versions/2016-10-31.md#schedule
Properties:
Description: Send scraper tasks to SQS
Enabled: true
Name: queue-builder
Schedule: rate(1 day) # This could be 'cron ...' I think
Role: !Sub "arn:aws:iam::${AWS::AccountId}:role/LGSFLambdaExecutionRole"

SQSScraperQueue:
Type: AWS::SQS::Queue
Properties:
QueueName: "ScraperQueue"

ScraperWorkerFunction:
Type: AWS::Serverless::Function # More info about Function Resource: https://github.com/awslabs/serverless-application-model/blob/master/versions/2016-10-31.md#awsserverlessfunction
Properties:
CodeUri: .
Handler: lgsf.aws_lambda.handlers.scraper_worker_handler
Runtime: python3.8
Events:
SQSEvent:
Type: SQS
Properties:
Queue: !GetAtt SQSScraperQueue.Arn
BatchSize: 10
Enabled: true
Role: !Sub "arn:aws:iam::${AWS::AccountId}:role/LGSFLambdaExecutionRole"



Outputs:
# Find out more about other implicit resources you can reference within SAM
# https://github.com/awslabs/serverless-application-model/blob/master/docs/internals/generated_resources.rst#api
QueueBuilderFunction:
Description: "Queue Builder Lambda Function ARN"
Value: !GetAtt QueueBuilderFunction.Arn
QueueBuilderFunctionIamRole:
Description: "Implicit IAM Role created for Queue Builder function"
Value: !GetAtt QueueBuilderFunction.Arn

## SQS Outputs
10 changes: 10 additions & 0 deletions samconfig.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version = 0.1
[default]
[default.deploy]
[default.deploy.parameters]
stack_name = "lgsf-dev"
s3_bucket = "aws-sam-cli-managed-default-samclisourcebucket-mw12yk2c9frr"
s3_prefix = "lgsf-dev"
region = "eu-west-2"
profile = "dc-lgsf-dev"
capabilities = "CAPABILITY_IAM"

0 comments on commit 3482e26

Please sign in to comment.