-
Notifications
You must be signed in to change notification settings - Fork 26
/
weibo_cctv9.py
120 lines (107 loc) · 4.43 KB
/
weibo_cctv9.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from .__weibo_search import search as weibo_search
from .__weibo_search import headers
from datetime import date, datetime, timedelta
from epg.model import Channel, Program
import re
import requests
import json
keyword = "#每日央视纪录片精选#"
def update_programs(programs: list[Program], programs_new: list[Program]) -> int:
"""
Update programs with new programs.
Args:
programs (list[Program]): The programs to update.
programs_new (list[Program]): The new programs.
Returns:
int: The number of programs updated.
"""
num_updated_programs = 0
for program_new in programs_new:
for program in programs:
if program_new.start_time - program.start_time < timedelta(minutes=5):
program.title = program_new.title
num_updated_programs += 1
break
return num_updated_programs
def update(channel: Channel, date: date) -> int:
"""
Update programs of a channel.
Args:
channel (Channel): The channel to update.
date (date): The date of programs to update.
Returns:
int: The number of programs updated.
"""
num_updated_programs = 0
weibo_list = weibo_search(keyword, 1)
programs_weibo = []
for weibo in weibo_list:
created_at = datetime.strptime(weibo["created_at"], "%a %b %d %H:%M:%S %z %Y")
if created_at.date() == date:
# 获取微博正文
text_weibo = weibo["text"]
text_url_suffix = re.findall(r'href="(.*?)"', text_weibo)[-1]
text_url = "https://m.weibo.cn" + text_url_suffix
try:
r = requests.get(text_url, headers=headers, timeout=5)
except:
continue
render_data = re.findall(
r".*var \$render_data = (.*\}\])\[0\]", r.text, re.S
)
render_data = json.loads(render_data[0])
text = render_data[0]["status"]["text"]
# 获取节目列表文本
program_list = re.findall(r"(\d\d:\d\d)\s+(.*?)<br />", text)
# 生成节目列表
for program in program_list:
start_time = datetime.strptime(
f"{created_at.date()} {program[0]} +08:00", "%Y-%m-%d %H:%M %z"
)
title = program[1]
title = title.replace(" ", " ")
programs_weibo.append(Program(title, start_time, None, "cctv9@weibo"))
title_dict = {}
for program in channel.programs:
if program.sub_title != "":
title_dict[program.sub_title] = program.title
for program_new in programs_weibo:
for program in channel.programs:
if (
timedelta(minutes=-15)
< program.start_time - program_new.start_time
< timedelta(minutes=15)
and program_new.title != program.title
):
title_dict[program.title] = program_new.title
# find today's program and process "第x-y集" if there is empty sub_title
# 《极速猎杀》第1-2集
# 《寻找雪豹》第1—2集
for program in channel.programs:
if program.sub_title != "":
re_find = re.findall(r"(.*)第(\d+)[-|—](\d+)集", program.title)
if re_find != []:
title_dict[program.sub_title] = re_find[0][0] + f"第{re_find[0][1]}集"
re_start = int(re_find[0][1])
re_end = int(re_find[0][2])
i = re_start + 1
new_program = program
while i <= re_end:
new_program = channel.next_program(new_program.start_time)
if new_program is None:
break
else:
if new_program.sub_title == "": # only update empty sub_title
title_dict[new_program.title] = re_find[0][0] + f"第{i}集"
i += 1
for program in channel.programs:
sub_title = program.sub_title if program.sub_title != "" else program.title
if title_dict.get(sub_title):
if program.sub_title == "":
program.sub_title = sub_title
program.title = title_dict[sub_title]
program.scraper_id = "cctv9@weibo"
num_updated_programs += 1
if num_updated_programs > 0:
channel.metadata["last_scraper"] += "+weibo_cctv9"
return num_updated_programs