forked from ShaoQiBNU/YouTube_get_video
-
Notifications
You must be signed in to change notification settings - Fork 0
/
youtube_download.py
58 lines (43 loc) · 1.72 KB
/
youtube_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#! /usr/bin/env python3
# author: Qi Shao
########### load packages ############
from selenium import webdriver
import time
from bs4 import BeautifulSoup
########### 打开Chrome浏览器 ############
# chromedriver下载地址: http://npm.taobao.org/mirrors/chromedriver/
driver = webdriver.Chrome(executable_path="/home/sensetime/Desktop/code/anet_dataset/chromedriver")
driver.get("https://www.youtube.com/")
########### 窗口最大化 ############
driver.maximize_window()
time.sleep(1)
driver.refresh()
########### 获取cookie ############
cookie = driver.get_cookies()
########### 查询query ############
for query in ['cat', 'dog']:
########### 查询query,限制video时长在4分钟以内 ############
url = 'https://www.youtube.com/results?search_query=' + query + '&sp=EgQQARgB'
driver.get(url)
print(query)
def execute_times(times):
for i in range(times + 1):
########### 解析html ############
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
zzr = soup.find_all('a', id="thumbnail")
########### 获取video_id ############
for item in zzr:
video = item.get("href")
if video is not None and "/watch?v=" in video:
video_id = video.replace('/watch?v=', '')
print(video_id)
########### 模拟鼠标向下滑动 ############
js = "var q=document.documentElement.scrollTop=100000000000"
driver.execute_script(js)
time.sleep(3) # 等待页面刷新
########### 模拟鼠标向下滑动3次 ############
execute_times(3)
time.sleep(1)
########### 退出Chrome ############
driver.quit()