Skip to content

Commit

Permalink
test new pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
sbusso committed Mar 23, 2024
1 parent aa48a26 commit bb04ff2
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 63 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
spidy.py
output.json
3 changes: 1 addition & 2 deletions scrapework/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class EnvConfig(BaseModel):
@classmethod
def create_config(cls):
fields = {}
for field_name, field_value in cls.__fields__.items():
for field_name, field_value in cls.model_fields.items():
if field_name in os.environ:
fields[field_name] = os.environ[field_name]
else:
Expand All @@ -29,5 +29,4 @@ def create_config(cls):

class PipelineConfig(BaseModel):
base_url: str

filename: str
4 changes: 2 additions & 2 deletions scrapework/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from typing import Any, Dict, Iterable, Union

import boto3
from pydantic import Field
from pydantic import BaseModel, Field


class Pipeline(ABC):
class Pipeline(ABC, BaseModel):
@abstractmethod
def process_items(
self,
Expand Down
70 changes: 11 additions & 59 deletions tests/test_pipelines.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,35 @@
import json
from unittest.mock import MagicMock, patch
from unittest.mock import patch

from scrapework.config import PipelineConfig
from scrapework.spider import Spider


# Create a concrete subclass of Spider for testing purposes
class ConcreteSpider(Spider):
name = "concrete_spider"

def parse(self):
pass


def test_process_items_with_file_backend():
items = [{"name": "item1"}, {"name": "item2"}]
config = PipelineConfig(
backend=BackendType.FILE,
base_url="https://example.com",
s3_bucket="test-bucket",
filename="test.json",
)
pipeline = ItemPipeline()

with patch("builtins.open", MagicMock()) as mock_open:
pipeline.process_items(items, config)

mock_open.assert_called_once_with("test.json", "w")
from scrapework.pipelines import JsonFilePipeline, S3Pipeline


def test_process_items_with_s3_backend():
items = [{"name": "item1"}, {"name": "item2"}]
config = PipelineConfig(
backend=BackendType.S3,
base_url="https://example.com",
s3_bucket="my-bucket",
filename="example.json",
)
pipeline = ItemPipeline()
pipeline = S3Pipeline(s3_bucket="my-bucket")

with patch("boto3.client") as mock_s3_client:
pipeline.process_items(items, config)
pipeline.process_items(items, config.filename)

mock_s3_client.assert_called_once_with("s3")
mock_s3_client.return_value.put_object.assert_called_once_with(
Body=json.dumps(items), Bucket="my-bucket", Key="example.json"
)


def test_export_to_json():
def test_process_items_with_json_file_backend():
items = [{"name": "item1"}, {"name": "item2"}]
config = PipelineConfig(
backend=BackendType.FILE,
base_url="https://example.com",
s3_bucket="my-bucket",
filename="example.json",
)
pipeline = ItemPipeline()

with patch("builtins.open", MagicMock()) as mock_open:
pipeline.export_to_json(items, config)

mock_open.assert_called_once_with("example.json", "w")

filename = "output.json"
pipeline = JsonFilePipeline()

def test_export_to_s3():
items = [{"name": "item1"}, {"name": "item2"}]
config = PipelineConfig(
backend=BackendType.S3,
base_url="https://example.com",
s3_bucket="my-bucket",
filename="example.json",
)
pipeline = ItemPipeline()
pipeline.process_items(items, filename)

with patch("boto3.client") as mock_s3_client:
pipeline.export_to_s3(items, config)
with open(filename, "r") as f:
data = json.load(f)

mock_s3_client.assert_called_once_with("s3")
mock_s3_client.return_value.put_object.assert_called_once_with(
Body=json.dumps(items), Bucket="my-bucket", Key="example.json"
)
assert data == items

0 comments on commit bb04ff2

Please sign in to comment.