Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests #18

Merged
merged 18 commits into from
Aug 27, 2023
Merged
86 changes: 86 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: JobSpy API Tests

on: [push, pull_request]

jobs:
test_api:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: '3.10'

- name: Install dependencies
run: pip install -r requirements.txt

- name: Install jq
run: sudo apt-get install jq

- name: Start JobSpy FastAPI app
run: uvicorn main:app --host 0.0.0.0 --port 8000 &

- name: Wait for server to be up
run: |
for i in {1..10}; do
curl -s http://0.0.0.0:8000/api/v1/jobs && break || sleep 1
done

- name: Check health
run: |
health_status=$(curl -L -s -o /dev/null -w "%{http_code}" http://0.0.0.0:8000/health)

if [ "$health_status" != "200" ]; then
echo "Error: Health check failed with status code $health_status"
exit 1
fi

- name: Check HTTP status to POST /api/v1/jobs/
run: |
response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
"site_type": ["indeed", "linkedin"],
"search_term": "software engineer",
"location": "austin, tx",
"distance": 10,
"job_type": "fulltime",
"results_wanted": 5
}' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")

status_code="${response: -3}"
echo "Received status code: $status_code"

if [ "$status_code" != "200" ]; then
echo "Error: Expected status code 200, but got $status_code"
exit 1
fi

echo "${response::-3}" > response.json
cat response.json

- name: Check error field in response
run: |
indeed_error=$(jq '.indeed.error' response.json)
linkedin_error=$(jq '.linkedin.error' response.json)

if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
echo "Error found in response:"
echo "Indeed Error: $indeed_error"
echo "LinkedIn Error: $linkedin_error"
exit 1
fi

- name: Verify returned_results in response
run: |
indeed_results=$(jq '.indeed.returned_results' response.json)
linkedin_results=$(jq '.linkedin.returned_results' response.json)

if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
echo "Mismatch in results_wanted and returned_results:"
echo "Indeed: Expected 5, Got $indeed_results"
echo "LinkedIn: Expected 5, Got $linkedin_results"
exit 1
fi
8 changes: 5 additions & 3 deletions api/core/scrapers/ziprecruiter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ def process_job(job: Tag) -> Optional[JobPost]:
title = job.find("h2", {"class": "title"}).text
company = job.find("a", {"class": "company_name"}).text.strip()

description, job_url = ZipRecruiterScraper.get_description(job_url, session)
description, updated_job_url = ZipRecruiterScraper.get_description(job_url, session)
if updated_job_url is not None:
job_url = updated_job_url
if description is None:
description = job.find("p", {"class": "job_snippet"}).text.strip()

Expand Down Expand Up @@ -185,7 +187,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse:
@staticmethod
def get_description(
job_page_url: str, session: tls_client.Session
) -> Tuple[Optional[str], str]:
) -> Tuple[Optional[str], Optional[str]]:
"""
Retrieves job description by going to the job page url
:param job_page_url:
Expand All @@ -196,7 +198,7 @@ def get_description(
job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
)
if response.status_code not in range(200, 400):
return None
return None, None

html_string = response.content
soup_job = BeautifulSoup(html_string, "html.parser")
Expand Down
3 changes: 1 addition & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
)
app.include_router(api_router)


@app.get("/", tags=["health"])
@app.get("/health", tags=["health"])
async def health_check():
return {"message": "JobSpy ready to scrape"}
8 changes: 4 additions & 4 deletions postman/JobSpy.postman_collection.json
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@
"method": "GET",
"header": [],
"url": {
"raw": "http://127.0.0.1:8000/",
"raw": "http://127.0.0.1:8000/health",
"protocol": "http",
"host": [
"127",
Expand All @@ -232,7 +232,7 @@
],
"port": "8000",
"path": [
""
"health"
]
}
},
Expand All @@ -243,7 +243,7 @@
"method": "GET",
"header": [],
"url": {
"raw": "http://127.0.0.1:8000/",
"raw": "http://127.0.0.1:8000/health",
"protocol": "http",
"host": [
"127",
Expand All @@ -253,7 +253,7 @@
],
"port": "8000",
"path": [
""
"health"
]
}
},
Expand Down
Loading