diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..6b2723c --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,86 @@ +name: JobSpy API Tests + +on: [push, pull_request] + +jobs: + test_api: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Install jq + run: sudo apt-get install jq + + - name: Start JobSpy FastAPI app + run: uvicorn main:app --host 0.0.0.0 --port 8000 & + + - name: Wait for server to be up + run: | + for i in {1..10}; do + curl -s http://0.0.0.0:8000/api/v1/jobs && break || sleep 1 + done + + - name: Check health + run: | + health_status=$(curl -L -s -o /dev/null -w "%{http_code}" http://0.0.0.0:8000/health) + + if [ "$health_status" != "200" ]; then + echo "Error: Health check failed with status code $health_status" + exit 1 + fi + + - name: Check HTTP status to POST /api/v1/jobs/ + run: | + response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{ + "site_type": ["indeed", "linkedin"], + "search_term": "software engineer", + "location": "austin, tx", + "distance": 10, + "job_type": "fulltime", + "results_wanted": 5 + }' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}") + + status_code="${response: -3}" + echo "Received status code: $status_code" + + if [ "$status_code" != "200" ]; then + echo "Error: Expected status code 200, but got $status_code" + exit 1 + fi + + echo "${response::-3}" > response.json + cat response.json + + - name: Check error field in response + run: | + indeed_error=$(jq '.indeed.error' response.json) + linkedin_error=$(jq '.linkedin.error' response.json) + + if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then + echo "Error found in response:" + echo "Indeed Error: $indeed_error" + echo "LinkedIn Error: $linkedin_error" + exit 1 + fi + + - name: Verify returned_results in response + run: | + indeed_results=$(jq '.indeed.returned_results' response.json) + linkedin_results=$(jq '.linkedin.returned_results' response.json) + + if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then + echo "Mismatch in results_wanted and returned_results:" + echo "Indeed: Expected 5, Got $indeed_results" + echo "LinkedIn: Expected 5, Got $linkedin_results" + exit 1 + fi \ No newline at end of file diff --git a/api/core/scrapers/ziprecruiter/__init__.py b/api/core/scrapers/ziprecruiter/__init__.py index f285097..492a815 100644 --- a/api/core/scrapers/ziprecruiter/__init__.py +++ b/api/core/scrapers/ziprecruiter/__init__.py @@ -96,7 +96,9 @@ def process_job(job: Tag) -> Optional[JobPost]: title = job.find("h2", {"class": "title"}).text company = job.find("a", {"class": "company_name"}).text.strip() - description, job_url = ZipRecruiterScraper.get_description(job_url, session) + description, updated_job_url = ZipRecruiterScraper.get_description(job_url, session) + if updated_job_url is not None: + job_url = updated_job_url if description is None: description = job.find("p", {"class": "job_snippet"}).text.strip() @@ -185,7 +187,7 @@ def scrape(self, scraper_input: ScraperInput) -> JobResponse: @staticmethod def get_description( job_page_url: str, session: tls_client.Session - ) -> Tuple[Optional[str], str]: + ) -> Tuple[Optional[str], Optional[str]]: """ Retrieves job description by going to the job page url :param job_page_url: @@ -196,7 +198,7 @@ def get_description( job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True ) if response.status_code not in range(200, 400): - return None + return None, None html_string = response.content soup_job = BeautifulSoup(html_string, "html.parser") diff --git a/main.py b/main.py index ab4b3f9..0a130a1 100644 --- a/main.py +++ b/main.py @@ -10,7 +10,6 @@ ) app.include_router(api_router) - -@app.get("/", tags=["health"]) +@app.get("/health", tags=["health"]) async def health_check(): return {"message": "JobSpy ready to scrape"} diff --git a/postman/JobSpy.postman_collection.json b/postman/JobSpy.postman_collection.json index dab6025..bc2d792 100644 --- a/postman/JobSpy.postman_collection.json +++ b/postman/JobSpy.postman_collection.json @@ -222,7 +222,7 @@ "method": "GET", "header": [], "url": { - "raw": "http://127.0.0.1:8000/", + "raw": "http://127.0.0.1:8000/health", "protocol": "http", "host": [ "127", @@ -232,7 +232,7 @@ ], "port": "8000", "path": [ - "" + "health" ] } }, @@ -243,7 +243,7 @@ "method": "GET", "header": [], "url": { - "raw": "http://127.0.0.1:8000/", + "raw": "http://127.0.0.1:8000/health", "protocol": "http", "host": [ "127", @@ -253,7 +253,7 @@ ], "port": "8000", "path": [ - "" + "health" ] } },