Skip to content

Commit

Permalink
Scrapper utility: recuring non blocking timeout for get_page() using …
Browse files Browse the repository at this point in the history
…exponential backoff

- Exponential backoff algorithm used to handle network issues using 'backoff' library
- 'max_time' parameter used to add timeout

References
1. https://github.com/litl/backoff
2. https://en.wikipedia.org/wiki/Exponential_backoff

Fixes rajatkb#29
  • Loading branch information
Shardul Aeer authored and Shardul Aeer committed Mar 26, 2020
1 parent f70be02 commit 08d7724
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 13 deletions.
1 change: 1 addition & 0 deletions Scrapper-Service/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ traceback2==1.4.0
unittest2==1.1.0
urllib3==1.25.8
dill==0.3.1.1
backoff==1.10.0
21 changes: 8 additions & 13 deletions Scrapper-Service/utility/adapative_request.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,15 @@
import requests
import math
import backoff

class AdaptiveRequest:
def __init__(self):
self.max_wait_time = 10
self.num_fail = 0
self.num_success = 0

@backoff.on_exception(
backoff.expo, #exponential backoff
(requests.HTTPError , requests.ConnectionError), #retry if errors encountered
max_time=300 #give up after 300 seconds time
)
def get(self , link ):
try:
res = requests.get(link , timeout = self.max_wait_time)
self.num_success +=1
return res
except (requests.HTTPError , requests.ConnectionError) as err:
self.num_fail= self.num_fail+1
if self.num_fail != self.num_success:
self.max_wait_time = math.pow( 10 + 1/(self.num_success - self.num_fail) , self.num_fail)
else:
self.max_wait_time += 1
raise err
return requests.get(link , timeout = self.max_wait_time)

0 comments on commit 08d7724

Please sign in to comment.