Skip to content

Commit

Permalink
Update RocketReach API query parameters and pagination logic
Browse files Browse the repository at this point in the history
Revised the data query parameters to use 'current_employer_domain' instead of 'company_domain' and updated the pagination handling to align with the new API response structure. Enhanced exception logging to specify RocketReach as the source of the error.
  • Loading branch information
L1ghtn1ng committed Oct 26, 2024
1 parent 4495dc8 commit 0b0d651
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions theHarvester/discovery/rocketreach.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ async def do_search(self) -> None:

next_page = 1 # track pagination
for count in range(1, self.limit):
data = f'{{"query":{{"company_domain": ["{self.word}"]}}, "start": {next_page}, "page_size": 100}}'
data = f'{{"query":{{"current_employer_domain": ["{self.word}"]}}, "page": {next_page}, "page_size": 100}}'
result = await AsyncFetcher.post_fetch(self.baseurl, headers=headers, data=data, json=True)
if 'detail' in result.keys() and 'error' in result.keys() and 'Subscribe to a plan to access' in result['detail']:
# No more results can be fetched
Expand All @@ -46,14 +46,14 @@ async def do_search(self) -> None:
if 'linkedin_url' in dict(profile).keys():
self.links.add(profile['linkedin_url'])
if 'pagination' in dict(result).keys():
next_page = int(result['pagination']['next'])
if next_page > int(result['pagination']['total']):
next_page = result['pagination']['page'] + 1
if next_page > result['pagination']['total_pages']:
break

await asyncio.sleep(get_delay() + 5)

except Exception as e:
print(f'An exception has occurred: {e}')
print(f'An exception has occurred rocketreach: {e}')

async def get_links(self):
return self.links
Expand Down

0 comments on commit 0b0d651

Please sign in to comment.