forked from rajatkb/Conference-Notify
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Scrapper utility: recuring non blocking timeout for get_page() using …
…exponential backoff - Exponential backoff algorithm used to handle network issues using 'backoff' library - 'max_time' parameter used to add timeout References 1. https://github.com/litl/backoff 2. https://en.wikipedia.org/wiki/Exponential_backoff Fixes rajatkb#29
- Loading branch information
Shardul Aeer
authored and
Shardul Aeer
committed
Mar 26, 2020
1 parent
f70be02
commit 08d7724
Showing
2 changed files
with
9 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ traceback2==1.4.0 | |
unittest2==1.1.0 | ||
urllib3==1.25.8 | ||
dill==0.3.1.1 | ||
backoff==1.10.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,15 @@ | ||
import requests | ||
import math | ||
import backoff | ||
|
||
class AdaptiveRequest: | ||
def __init__(self): | ||
self.max_wait_time = 10 | ||
self.num_fail = 0 | ||
self.num_success = 0 | ||
|
||
@backoff.on_exception( | ||
backoff.expo, #exponential backoff | ||
(requests.HTTPError , requests.ConnectionError), #retry if errors encountered | ||
max_time=300 #give up after 300 seconds time | ||
) | ||
def get(self , link ): | ||
try: | ||
res = requests.get(link , timeout = self.max_wait_time) | ||
self.num_success +=1 | ||
return res | ||
except (requests.HTTPError , requests.ConnectionError) as err: | ||
self.num_fail= self.num_fail+1 | ||
if self.num_fail != self.num_success: | ||
self.max_wait_time = math.pow( 10 + 1/(self.num_success - self.num_fail) , self.num_fail) | ||
else: | ||
self.max_wait_time += 1 | ||
raise err | ||
return requests.get(link , timeout = self.max_wait_time) |