-
Notifications
You must be signed in to change notification settings - Fork 0
/
tao.py
27 lines (24 loc) · 1.33 KB
/
tao.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#coding=utf-8
import re
import requests
import sys
sys.setdefaultencoding("utf-8")
def TaoWeb(str):
url = 'https://page.1688.com/html/homeIndex.html'
print requests.get(url)
payload = {'q': str,'s': '1','ie':'utf8'} #字典传递url参数
file = open('taobao_test.txt','w',encoding='utf-8')
for k in range(0,100): #100次,就是100个页的商品数据
payload ['s'] = 44*k+1 #此处改变的url参数为s,s为1时第一页,s为45是第二页,89时第三页以此类推
resp = requests.get(url, params = payload)
print(resp.url) #打印访问的网址
resp.encoding = 'utf-8' #设置编码
title = re.findall(r'"raw_title":"([^"]+)"',resp.text,re.I) #正则保存所有raw_title的内容,这个是书名,下面是价格,地址
price = re.findall(r'"view_price":"([^"]+)"',resp.text,re.I)
loc = re.findall(r'"item_loc":"([^"]+)"',resp.text,re.I)
x = len(title) #每一页商品的数量
for i in range(0,x) : #把列表的数据保存到文件中
file.write(str(k*44+i+1)+'书名:'+title[i]+'\n'+'价格:'+price[i]+'\n'+'地址:'+loc[i]+'\n\n')
file.close()
if __name__=='__main__':
TaoWeb(小礼物)#爬取商品为小礼物的信息