-
Notifications
You must be signed in to change notification settings - Fork 1
/
GetUid.py
257 lines (209 loc) · 10.1 KB
/
GetUid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
import os
import shutil
import sys
import time
import subprocess
from urllib.parse import urlencode
import requests
from bs4 import BeautifulSoup
# 定义获取关键词的函数
def fetch_keywords():
keywords_url = 'https://raw.kkgithub.com/ayyayyayy2002/BiliBiliVideoAutoReport/main/云端文件/keyword.txt' # 替换为实际的GitHub URL
keywords_filename = '附加文件/keyword.txt'
try:
response = requests.get(keywords_url, timeout=(5, 10))
if response.status_code == 200:
with open(keywords_filename, 'wb') as f_out:
f_out.write(response.content)
print(f"成功下载关键词文件 {keywords_url} 并保存为 {keywords_filename}")
else:
print(f"无法访问 {keywords_url},状态码:{response.status_code}")
return load_local_keywords(keywords_filename) # 返回本地关键词
except requests.exceptions.RequestException as e:
print(f"下载关键词文件时发生请求异常:{e}")
return load_local_keywords(keywords_filename) # 返回本地关键词
return load_local_keywords(keywords_filename)
# 定义从本地文件加载关键词的函数
def load_local_keywords(filename):
keywords = []
if os.path.exists(filename):
with open(filename, 'r', encoding='utf-8') as f:
for line in f:
stripped_line = line.strip()
if stripped_line and not stripped_line.startswith('#'): # 排除空行和以“#”开头的行
keywords.append(stripped_line)
else:
print(f"本地关键词文件 {filename} 不存在。")
return keywords
# 使用fetch_keywords函数替代原有的keywords定义
keywords = fetch_keywords()
# 文件路径和文件名
output_file = os.path.join(os.getcwd(), '附加文件/uid.txt')
whitelist_file = os.path.join(os.getcwd(), '附加文件/whitelist.txt')
# 如果文件存在则删除
if os.path.exists(output_file):
os.remove(output_file)
else:
print(f"文件 {output_file} 不存在,无需删除。")
# 定义搜索函数
def search_and_extract_uid(keyword):
# 构建搜索链接
base_url = 'https://search.bilibili.com/video?'
search_params1 = {
'keyword': keyword,
'from_source': 'webtop_search',
}
search_params2 = {
'keyword': keyword,
'from_source': 'webtop_search',
'order': 'pubdate'
}
search_url1 = base_url + urlencode(search_params1)
search_url2 = base_url + urlencode(search_params2)
print(search_url1)
print(search_url2)
try:
# 添加头部信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/91.0.4472.124 Safari/537.36',
}
# 发起HTTP GET请求获取搜索结果页面内容
response = requests.get(search_url1, headers=headers, timeout=(5, 10))
response.raise_for_status() # 检查请求是否成功
# 使用BeautifulSoup加载HTML内容
soup = BeautifulSoup(response.text, 'html.parser')
# 存储解析出的UID列表
uid_list = []
# 使用CSS选择器定位搜索结果的链接,并提取UID
count = 0 # 计数器,用于限制获取的UID数量
for link in soup.select('.bili-video-card .bili-video-card__info--owner'):
if count >= 5:
break
href = link['href']
uid = href.split('/')[-1] # 获取链接中最后的数字部分作为UID
uid_list.append(uid)
count += 1
# 将UID列表传递给处理函数(这里假设是process_uid_list函数)
process_uid_list(keyword, uid_list)
response = requests.get(search_url2, headers=headers, timeout=(5, 10))
response.raise_for_status() # 检查请求是否成功
# 使用BeautifulSoup加载HTML内容
soup = BeautifulSoup(response.text, 'html.parser')
# 存储解析出的UID列表
uid_list = []
# 使用CSS选择器定位搜索结果的链接,并提取UID
count = 0 # 计数器,用于限制获取的UID数量
for link in soup.select('.bili-video-card .bili-video-card__info--owner'):
if count >= 3:
break
href = link['href']
uid = href.split('/')[-1] # 获取链接中最后的数字部分作为UID
uid_list.append(uid)
count += 1
# 将UID列表传递给处理函数(这里假设是process_uid_list函数)
process_uid_list(keyword, uid_list)
except requests.exceptions.RequestException as e:
print(f"关键词 \"{keyword}\" 搜索页面请求失败:", e)
# 定义处理UID列表的函数(追加写入同一文件)
def process_uid_list(keyword, uid_list):
print(f"关键词 \"{keyword}\" 的UID列表:", uid_list)
# 将UID列表追加写入文件
with open(output_file, 'a', encoding='utf-8') as f:
f.write(f"关键词 \"{keyword}\" 的UID列表:\n")
for uid in uid_list:
f.write(uid + '\n')
f.write('\n') # 添加空行分隔每个关键词的UID列表
print(f"关键词 \"{keyword}\" 的UID列表已追加至文件: uid.txt")
# 主函数,循环运行搜索和处理
def main():
while True:
unique_uids = set() # 使用集合存储唯一的 UID
# 遍历关键词列表,进行搜索和处理
for keyword in keywords:
search_and_extract_uid(keyword)
print('读取当前文件中所有的 UID,并添加到集合中去重')
with open(output_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
uid = line.strip()
if uid.isdigit(): # 假设 UID 是数字格式
unique_uids.add(uid)
try:
shutil.copy('附加文件/uid.txt', '附加文件/uid_backup.txt')
print(f"成功保存备份")
except IOError as e:
print(f"复制保存备份时发生错误:{e}")
whitelist_url = 'https://raw.kkgithub.com/ayyayyayy2002/BiliBiliVideoAutoReport/main/云端文件/whitelist.txt'
whitelist_filename = '附加文件/whitelist.txt'
try:
response = requests.get(whitelist_url, timeout=(5, 10))
if response.status_code == 200:
with open(whitelist_filename, 'wb') as f_out:
f_out.write(response.content)
print(f"成功下载文件 {whitelist_url} 并保存为 {whitelist_filename}")
else:
print(f"无法访问 {whitelist_url},状态码:{response.status_code}")
except requests.exceptions.RequestException as e:
print(f"下载文件时发生请求异常:{e}")
except IOError as e:
print(f"文件操作发生错误:{e}")
except Exception as e:
print(f"发生未知错误:{e}")
# 处理 whitelist_file
with open(whitelist_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
uid = line.strip()
if uid.isdigit(): # 假设 UID 是数字格式
unique_uids.add(uid)
blacklist_url = 'https://raw.kkgithub.com/ayyayyayy2002/BiliBiliVideoAutoReport/main/云端文件/blacklist.txt'
blacklist_filename = '附加文件/blacklist.txt'
try:
response = requests.get(blacklist_url, timeout=(5, 10))
if response.status_code == 200:
with open(blacklist_filename, 'wb') as f_out:
f_out.write(response.content)
print(f"成功下载文件 {blacklist_url} 并保存为 {blacklist_filename}")
else:
print(f"无法访问 {blacklist_url},状态码:{response.status_code}")
except requests.exceptions.RequestException as e:
print(f"下载文件时发生请求异常:{e}")
except IOError as e:
print(f"文件操作发生错误:{e}")
except Exception as e:
print(f"发生未知错误:{e}")
print('继续执行其他操作,不受文件下载错误的影响')
exclude_uids = set()
with open('附加文件/blacklist.txt', 'r', encoding='utf-8') as exclude_file:
exclude_lines = exclude_file.readlines()
for line in exclude_lines:
exclude_uid = line.strip()
if exclude_uid.isdigit(): # 假设 UID 是数字格式
exclude_uids.add(exclude_uid)
print('从 unique_uids 中移除在 exclude_uids 中存在的 UID')
unique_uids -= exclude_uids
print('将唯一的 UID 列表按格式写入文件')
with open(output_file, 'w', encoding='utf-8') as f:
for uid in unique_uids:
f.write(uid + '\n')
print('所有关键词搜索和处理完成后,通过虚拟机在指定路径运行 report.py')
while True: # 无限循环以重启 report.py
# 启动 report.py
base_dir = os.path.dirname(os.path.abspath(__file__))
# 使用相对路径设置 Python 可执行文件和脚本路径
python_executable = os.path.join(base_dir, 'venv', 'Scripts', 'python.exe')
report_script = os.path.join(base_dir, 'Report.py')
# 启动子进程
report_process = subprocess.Popen([python_executable, report_script], shell=True)
while report_process.poll() is None: # 等待 report.py 结束
time.sleep(1) # 等待1秒钟
# 检查 report.py 的退出状态
if report_process.returncode == 0:
print("report.py 正常退出,正在重新启动 getuid.py...")
subprocess.run([sys.executable, __file__]) # 重新启动 getuid.py
break # 跳出循环,防止再次进入无限循环
else:
print(f"report.py 出现错误,返回码: {report_process.returncode},正在重新运行 report.py...")
if __name__ == "__main__":
main()