Skip to content

Commit

Permalink
v0.03
Browse files Browse the repository at this point in the history
新的v0.03来了,这次它可以下载pdf课件了!
The v0.03 is coming, and now it is able to download pdf files!
Download from Lanzou Cloud: https://www.lanzous.com/ia6vxdi
p.s:Extract the 7z file from Laznou Cloud and you can run the __main__.py as long as you've installed python.
  • Loading branch information
Endericedragon authored Mar 12, 2020
1 parent 936d3a9 commit c4edb26
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 13 deletions.
2 changes: 1 addition & 1 deletion createFolder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def creater(courseName, courseDict):
url = info['url']
path = courseName+'\\'+week+'\\'+section
#network.ariaDown(url, fileName, path)
if (fileName[-4:]=='m3u8'):
if (fileName[-4:]=='m3u8' or info['format']=='pdf'):
network.urllibDown(url, fileName, path)
else:
p.apply_async(network.ariaDown, args=(url, fileName, path))
Expand Down
47 changes: 41 additions & 6 deletions network.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,46 @@ def urllibDown(url, fileName, path):
if (os.path.exists(path+'\\'+fileName) or os.path.exists(path+'\\'+fileName) or os.path.exists(path+'\\'+fileName)):
print('File exists.{0} won\'t be downloaded.'.format(fileName))
return 0
for i in range(1,6):
try:
print('正在下载'+fileName+'...')
r = requests.get(url, headers = hd, timeout = 2)
break
except:
print('重试...第%d次' % (i))
with open(path+'\\'+fileName, 'wb') as f:
f.write(r.content)
print(fileName, '下载完成。')

def getPdfUrl(name, conId, Id):
pdfUrl = 'http://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr'
pdfData = {
'callCount':'1',
'scriptSessionId':'${scriptSessionId}190',
'httpSessionId':'0397251039424db2b1659352f45d1540',
'c0-scriptName':'CourseBean',
'c0-methodName':'getLessonUnitLearnVo',
'c0-id':'0',
'c0-param0':'number:'+conId, #(contentId)
'c0-param1':'number:3',
'c0-param2':'number:0',
'c0-param3':'number:'+Id, #(id)
'batchId':'1584017274511'
}
try:
print('正在下载'+fileName+'...')
res = urllib.request.urlopen(url)
with open(path+'\\'+fileName, 'wb') as f:
f.write(res.read())
r = requests.post(pdfUrl, headers = hd, data = pdfData)
r.encoding = r.apparent_encoding
r.raise_for_status()
except:
print('Urllib error!')
return -1
return ''
gotIt = r.text
pdfGet = re.search('textOrigUrl:\".+?\"', gotIt)
if pdfGet:
pdfGet = pdfGet.group()[13:-1]
else:
pdfGet = ''
return pdfGet
def test():
getPdfUrl('wow', '1003316105', '1214227359')
if __name__ == '__main__':
test()
44 changes: 43 additions & 1 deletion parseList.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,42 @@
import re
from network import getVideoUrl
from network import getPdfUrl

def char2int(n):
indexDict = {
'一':1,
'二':2,
'三':3,
'四':4,
'五':5,
'六':6,
'七':7,
'八':8,
'九':9,
'十':10
}
if len(n)==1:
return str(indexDict[n])
elif len(n)==2 and n[0]=='十':
return str(10+indexDict[n[1]])
elif len(n)==2 and n[1]=='十':
return str(indexDict[n[0]]*10)
elif len(n)==2:
return str(indexDict[n[0]]*10+indexDict[n[1]])
else:
return str(indexDict[n[0]]*10+indexDict[n[2]])

def namer(name, num='k'):
forbid = re.compile('[\\/:\*\?\"<>\|]')
weeksForbid1 = re.compile('^第[一二三四五六七八九十][讲周]')
weeksForbid2 = re.compile('^第[一二三四五六七八九十][一二三四五六七八九十][讲周]')
weeksForbid3 = re.compile('^第[一二三四五六七八九十][一二三四五六七八九十][一二三四五六七八九十][讲周]')
if weeksForbid1.match(name):
name = name[0]+char2int(name[1])+name[2:]
elif weeksForbid2.match(name):
name = name[0]+char2int(name[1:3])+name[3:]
elif weeksForbid3.match(name):
name = name[0]+char2int(name[1:4])+name[4:]
forbid = re.compile('[\\/:\*\?\"<>\|\s]')
legal_name = ''
for each in name:
if forbid.match(each):
Expand Down Expand Up @@ -64,6 +98,7 @@ def parser(courseList:str, courseUrl)->dict:
一级目录 无chapterId 无contentId contentType=1
二级目录 有chapterId 无contentId contentType=1
视频文件 有chapterId 有contentId contentType=1
pdf文件 有chapterId 有contentId contentType=3
'''
courseList = courseList.split('\n')
for line in courseList:
Expand Down Expand Up @@ -95,5 +130,12 @@ def parser(courseList:str, courseUrl)->dict:
'format':yes[1]
}
cc+=1
elif (getchId(line) and getcoId(line) and getcoType(line)=='3'):
# ~ print('pdf课件')
ok = getPdfUrl(getName(line), getcoId(line), getId(line))
temp2[namer(getName(line))]={
'url':ok,
'format':'pdf'
}
return courseDict

11 changes: 6 additions & 5 deletions process.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import os
import pickle
import network
from multiprocessing import Pool

def namer(num):
Expand All @@ -22,7 +23,7 @@ def m3u8Process(courseName, courseDict):
for vidName, detail in vidInfo.items(): #vidName没有后缀名
if detail['format']!='m3u8':
continue
elif os.path.exists(path+'\\'+vidName+'.mp4'):
elif os.path.exists(path+'\\'+vidName+'.ts'):
continue
for vid in os.listdir(path): #vid带有后缀名
if re.search(vidName, vid) and 'mp4' not in vid:
Expand All @@ -37,17 +38,17 @@ def m3u8Process(courseName, courseDict):
for each in tsInfo:
each = each.strip()
if each[0]!='#':
# ~ os.system('{0} -n -i \"{1}\" \"{2}\\{3}.mp4\"'.format(fg, url+each, path, namer(cc)))
p.apply_async(os.system, args = ('{0} -n -i \"{1}\" \"{2}\\{3}.mp4\"'.format(fg, url+each, path, namer(cc)),))
# ~ p.apply_async(os.system, args = ('{0} -n -i \"{1}\" \"{2}\\{3}.ts\"'.format(fg, url+each, path, namer(cc)),))
p.apply_async(network.urllibDown, args = (url+each, namer(cc)+'.ts', path))
cc+=1
p.close()
p.join()
print(vidName, '的分片文件全部下载完成')
files = 'concat:'
for i in range(cc):
files+=(path+'\\'+namer(i)+'.mp4|')
files+=(path+'\\'+namer(i)+'.ts|')
files = files[:-1]
os.system('{0} -n -i \"{1}\" \"{2}\\{3}.mp4\"'.format(fg, files, path, vidName))
os.remove(path+'\\'+vid)
for ck in range(cc):
os.system('del \"{0}\\{1}.mp4\"'.format(path, namer(ck)))
os.system('del \"{0}\\{1}.ts\"'.format(path, namer(ck)))

0 comments on commit c4edb26

Please sign in to comment.