Skip to content

Commit

Permalink
Fixes search issues due to bugs in search query encodings
Browse files Browse the repository at this point in the history
- For Indeed and Monster, the query string was not properly encoded when a quoted phrase with spaces in-between words were provided. The fix was to encode all spaces with the proper character(+/-). This issue and fix also applied to city names.
- For GlassDoorStatic, the query string was encoded for a URL and returned improper results. Since this class searches using a JSON payload, the solution was to combine the keywords with a space instead.
-The old query construction function was moved from GlassDoorBase to GlassDoorDynamic to prevent the dynamic scraper class from breaking.

Fixes issues PaulMcInnis#80.
  • Loading branch information
bunsenmurder committed Jun 28, 2020
1 parent 0a246cb commit 4f1cb48
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 5 deletions.
1 change: 0 additions & 1 deletion jobfunnel/glassdoor_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(self, args):
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
}
self.query = '-'.join(self.search_terms['keywords'])

def convert_radius(self, radius):
"""function that quantizes the user input radius to a valid radius
Expand Down
2 changes: 2 additions & 0 deletions jobfunnel/glassdoor_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def __init__(self, args):
super().__init__(args)
self.provider = 'glassdoordynamic'

# Keeping old query function so this class does not break.
self.query = '-'.join(self.search_terms['keywords'])
# initialize the webdriver
self.driver = get_webdriver()

Expand Down
2 changes: 2 additions & 0 deletions jobfunnel/glassdoor_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def __init__(self, args):
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
}
# Concatenates keywords with '-'
self.query = ' '.join(self.search_terms['keywords'])

def get_search_url(self, method='get'):
"""gets the glassdoor search url"""
Expand Down
5 changes: 3 additions & 2 deletions jobfunnel/indeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def __init__(self, args):
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
}
self.query = '+'.join(self.search_terms['keywords'])
# Concatenates keywords with '+' and encodes spaces as '+'
self.query = '+'.join(self.search_terms['keywords']).replace(' ', '+')

def convert_radius(self, radius):
"""function that quantizes the user input radius to a valid radius
Expand Down Expand Up @@ -59,7 +60,7 @@ def get_search_url(self, method='get'):
'q={1}&l={2}%2C+{3}&radius={4}&limit={5}&filter={6}'.format(
self.search_terms['region']['domain'],
self.query,
self.search_terms['region']['city'],
self.search_terms['region']['city'].replace(' ', '+'),
self.search_terms['region']['province'],
self.convert_radius(
self.search_terms['region']['radius']),
Expand Down
5 changes: 3 additions & 2 deletions jobfunnel/monster.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def __init__(self, args):
'Cache-Control': 'no-cache',
'Connection': 'keep-alive'
}
self.query = '-'.join(self.search_terms['keywords'])
# Concatenates keywords with '-' and encodes spaces as '-'
self.query = '-'.join(self.search_terms['keywords']).replace(' ', '-')

def convert_radius(self, radius):
"""function that quantizes the user input radius to a valid radius
Expand Down Expand Up @@ -83,7 +84,7 @@ def get_search_url(self, method='get'):
'q={1}&where={2}__2C-{3}&intcid={4}&rad={5}&where={2}__2c-{3}'.format(
self.search_terms['region']['domain'],
self.query,
self.search_terms['region']['city'],
self.search_terms['region']['city'].replace(' ', "-"),
self.search_terms['region']['province'],
'skr_navigation_nhpso_searchMain',
self.convert_radius(self.search_terms['region']['radius'])))
Expand Down

0 comments on commit 4f1cb48

Please sign in to comment.