v0.03

新的v0.03来了，这次它可以下载pdf课件了！ The v0.03 is coming, and now it is able to download pdf files! Download from Lanzou Cloud: https://www.lanzous.com/ia6vxdi p.s:Extract the 7z file from Laznou Cloud and you can run the __main__.py as long as you've installed python.
Endericedragon · Mar 12, 2020 · c4edb26 · c4edb26
1 parent 936d3a9
commit c4edb26
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 13 deletions.
diff --git a/createFolder.py b/createFolder.py
@@ -24,7 +24,7 @@ def creater(courseName, courseDict):
                 url = info['url']
                 path = courseName+'\\'+week+'\\'+section
                 #network.ariaDown(url, fileName, path)
-                if (fileName[-4:]=='m3u8'):
+                if (fileName[-4:]=='m3u8' or info['format']=='pdf'):
                     network.urllibDown(url, fileName, path)
                 else:
                     p.apply_async(network.ariaDown, args=(url, fileName, path))

diff --git a/network.py b/network.py
@@ -117,11 +117,46 @@ def urllibDown(url, fileName, path):
     if (os.path.exists(path+'\\'+fileName) or os.path.exists(path+'\\'+fileName) or os.path.exists(path+'\\'+fileName)):
         print('File exists.{0} won\'t be downloaded.'.format(fileName))
         return 0
+    for i in range(1,6):
+        try:
+            print('正在下载'+fileName+'...')
+            r = requests.get(url, headers = hd, timeout = 2)
+            break
+        except:
+            print('重试...第%d次' % (i))
+    with open(path+'\\'+fileName, 'wb') as f:
+        f.write(r.content)
+    print(fileName, '下载完成。')
+
+def getPdfUrl(name, conId, Id):
+    pdfUrl = 'http://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr'
+    pdfData = {
+        'callCount':'1',
+        'scriptSessionId':'${scriptSessionId}190',
+        'httpSessionId':'0397251039424db2b1659352f45d1540',
+        'c0-scriptName':'CourseBean',
+        'c0-methodName':'getLessonUnitLearnVo',
+        'c0-id':'0',
+        'c0-param0':'number:'+conId, #(contentId)
+        'c0-param1':'number:3',
+        'c0-param2':'number:0',
+        'c0-param3':'number:'+Id, #(id)
+        'batchId':'1584017274511'
+    }
     try:
-        print('正在下载'+fileName+'...')
-        res = urllib.request.urlopen(url)
-        with open(path+'\\'+fileName, 'wb') as f:
-            f.write(res.read())
+        r = requests.post(pdfUrl, headers = hd, data = pdfData)
+        r.encoding = r.apparent_encoding
+        r.raise_for_status()
     except:
-        print('Urllib error!')
-        return -1
+        return ''
+    gotIt = r.text
+    pdfGet = re.search('textOrigUrl:\".+?\"', gotIt)
+    if pdfGet:
+        pdfGet = pdfGet.group()[13:-1]
+    else:
+        pdfGet = ''
+    return pdfGet
+def test():
+    getPdfUrl('wow', '1003316105', '1214227359')
+if __name__ == '__main__':
+    test()
diff --git a/parseList.py b/parseList.py
@@ -1,8 +1,42 @@
 import re
 from network import getVideoUrl
+from network import getPdfUrl
+
+def char2int(n):
+    indexDict = {
+        '一':1,
+        '二':2,
+        '三':3,
+        '四':4,
+        '五':5,
+        '六':6,
+        '七':7,
+        '八':8,
+        '九':9,
+        '十':10
+    }
+    if len(n)==1:
+        return str(indexDict[n])
+    elif len(n)==2 and n[0]=='十':
+        return str(10+indexDict[n[1]])
+    elif len(n)==2 and n[1]=='十':
+        return str(indexDict[n[0]]*10)
+    elif len(n)==2:
+        return str(indexDict[n[0]]*10+indexDict[n[1]])
+    else:
+        return str(indexDict[n[0]]*10+indexDict[n[2]])
 
 def namer(name, num='k'):
-    forbid = re.compile('[\\/:\*\?\"<>\|]')
+    weeksForbid1 = re.compile('^第[一二三四五六七八九十][讲周]')
+    weeksForbid2 = re.compile('^第[一二三四五六七八九十][一二三四五六七八九十][讲周]')
+    weeksForbid3 = re.compile('^第[一二三四五六七八九十][一二三四五六七八九十][一二三四五六七八九十][讲周]')
+    if weeksForbid1.match(name):
+        name = name[0]+char2int(name[1])+name[2:]
+    elif weeksForbid2.match(name):
+        name = name[0]+char2int(name[1:3])+name[3:]
+    elif weeksForbid3.match(name):
+        name = name[0]+char2int(name[1:4])+name[4:]
+    forbid = re.compile('[\\/:\*\?\"<>\|\s]')
     legal_name = ''
     for each in name:
         if forbid.match(each):
@@ -64,6 +98,7 @@ def parser(courseList:str, courseUrl)->dict:
     一级目录 无chapterId 无contentId contentType=1
     二级目录 有chapterId 无contentId contentType=1
     视频文件 有chapterId 有contentId contentType=1
+    pdf文件 有chapterId 有contentId contentType=3
     '''
     courseList = courseList.split('\n')
     for line in courseList:
@@ -95,5 +130,12 @@ def parser(courseList:str, courseUrl)->dict:
                 'format':yes[1]
             }
             cc+=1
+        elif (getchId(line) and getcoId(line) and getcoType(line)=='3'):
+            # ~ print('pdf课件')
+            ok = getPdfUrl(getName(line), getcoId(line), getId(line))
+            temp2[namer(getName(line))]={
+                'url':ok,
+                'format':'pdf'
+            }
     return courseDict
 
diff --git a/process.py b/process.py
@@ -1,6 +1,7 @@
 import re
 import os
 import pickle
+import network
 from multiprocessing import Pool
 
 def namer(num):
@@ -22,7 +23,7 @@ def m3u8Process(courseName, courseDict):
             for vidName, detail in vidInfo.items(): #vidName没有后缀名
                 if detail['format']!='m3u8':
                     continue
-                elif os.path.exists(path+'\\'+vidName+'.mp4'):
+                elif os.path.exists(path+'\\'+vidName+'.ts'):
                     continue
                 for vid in os.listdir(path): #vid带有后缀名
                     if re.search(vidName, vid) and 'mp4' not in vid:
@@ -37,17 +38,17 @@ def m3u8Process(courseName, courseDict):
                         for each in tsInfo:
                             each = each.strip()
                             if each[0]!='#':
-                                # ~ os.system('{0} -n -i \"{1}\" \"{2}\\{3}.mp4\"'.format(fg, url+each, path, namer(cc)))
-                                p.apply_async(os.system, args = ('{0} -n -i \"{1}\" \"{2}\\{3}.mp4\"'.format(fg, url+each, path, namer(cc)),))
+                                # ~ p.apply_async(os.system, args = ('{0} -n -i \"{1}\" \"{2}\\{3}.ts\"'.format(fg, url+each, path, namer(cc)),))
+                                p.apply_async(network.urllibDown, args = (url+each, namer(cc)+'.ts', path))
                                 cc+=1
                         p.close()
                         p.join()
                         print(vidName, '的分片文件全部下载完成')
                         files = 'concat:'
                         for i in range(cc):
-                            files+=(path+'\\'+namer(i)+'.mp4|')
+                            files+=(path+'\\'+namer(i)+'.ts|')
                         files = files[:-1]
                         os.system('{0} -n -i \"{1}\" \"{2}\\{3}.mp4\"'.format(fg, files, path, vidName))
                         os.remove(path+'\\'+vid)
                         for ck in range(cc):
-                            os.system('del \"{0}\\{1}.mp4\"'.format(path, namer(ck)))
+                            os.system('del \"{0}\\{1}.ts\"'.format(path, namer(ck)))