From 959da39ffc14d9ec7d669a1e1e1bf1515f3ca749 Mon Sep 17 00:00:00 2001 From: Anmol Sahoo Date: Sun, 11 Mar 2018 16:07:12 +0530 Subject: [PATCH 1/2] Fixed the section parsing due to section name not being parsed properly --- edx_dl/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edx_dl/parsing.py b/edx_dl/parsing.py index 717b1f5f..46fc40cf 100644 --- a/edx_dl/parsing.py +++ b/edx_dl/parsing.py @@ -376,7 +376,7 @@ def _make_url(section_soup): # FIXME: Extract from here and test def _get_section_name(section_soup): # FIXME: Extract from here and test try: - return section_soup.div.h3.string + return section_soup.find_all("h3", class_="section-title")[0].string except AttributeError: return None From f5ff8490330ace0de2b0413a3a7dd4f6a78fc859 Mon Sep 17 00:00:00 2001 From: Anmol Sahoo Date: Sun, 11 Mar 2018 17:57:57 +0530 Subject: [PATCH 2/2] Created a custom opener for files and added the headers to it --- edx_dl/edx_dl.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/edx_dl/edx_dl.py b/edx_dl/edx_dl.py index 2d25bc88..c9abe4db 100644 --- a/edx_dl/edx_dl.py +++ b/edx_dl/edx_dl.py @@ -715,7 +715,12 @@ def download_url(url, filename, headers, args): # order) is due to different behaviors in different Python versions # (e.g., 2.7 vs. 3.4). try: - urlretrieve(url, filename) + headers_list = [(k,headers[k]) for k in headers] + opener = build_opener() + opener.addheaders = headers_list + response = opener.open(url) + with open(filename, 'wb') as f: + f.write(response.read()) except Exception as e: logging.warn('Got SSL/Connection error: %s', e) if not args.ignore_errors: