-
Notifications
You must be signed in to change notification settings - Fork 0
/
baidu.py
executable file
·130 lines (119 loc) · 4.27 KB
/
baidu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import execjs
import requests
import re
JS_CODE = """
function a(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a: r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
var C = null;
var token = function(r, _gtk) {
var o = r.length;
o > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(o / 2) - 5, 10) + r.substring(r.length, r.length - 10));
var t = void 0,
t = null !== C ? C: (C = _gtk || "") || "";
for (var e = t.split("."), h = Number(e[0]) || 0, i = Number(e[1]) || 0, d = [], f = 0, g = 0; g < r.length; g++) {
var m = r.charCodeAt(g);
128 > m ? d[f++] = m: (2048 > m ? d[f++] = m >> 6 | 192 : (55296 === (64512 & m) && g + 1 < r.length && 56320 === (64512 & r.charCodeAt(g + 1)) ? (m = 65536 + ((1023 & m) << 10) + (1023 & r.charCodeAt(++g)), d[f++] = m >> 18 | 240, d[f++] = m >> 12 & 63 | 128) : d[f++] = m >> 12 | 224, d[f++] = m >> 6 & 63 | 128), d[f++] = 63 & m | 128)
}
for (var S = h,
u = "+-a^+6",
l = "+-3^+b+-f",
s = 0; s < d.length; s++) S += d[s],
S = a(S, u);
return S = a(S, l),
S ^= i,
0 > S && (S = (2147483647 & S) + 2147483648),
S %= 1e6,
S.toString() + "." + (S ^ h)
}
"""
class Baidu:
def __init__(self, fl, tl):
self.sess = requests.Session()
self.headers = {
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
self.token = None
self.gtk = None
self.fl = fl
self.tl = tl
# 获得token和gtk
# 必须要加载两次保证token是最新的,否则会出现998的错误
self.loadMainPage()
self.loadMainPage()
def loadMainPage(self):
"""
load main page : https://fanyi.baidu.com/
and get token, gtk
"""
url = 'https://fanyi.baidu.com'
try:
r = self.sess.get(url, headers=self.headers)
self.token = re.findall(r"token: '(.*?)',", r.text)[0]
self.gtk = re.findall(r"window.gtk = '(.*?)';", r.text)[0]
except Exception as e:
raise e
# print(e)
def langdetect(self, query):
"""
post query to https://fanyi.baidu.com/langdetect
return json
{"error":0,"msg":"success","lan":"en"}
"""
url = 'https://fanyi.baidu.com/langdetect'
data = {'query' : query}
try:
r = self.sess.post(url=url, data=data)
except Exception as e:
raise e
# print(e)
json = r.json()
if 'msg' in json and json['msg'] == 'success':
return json['lan']
return None
def dictionary(self, query):
"""
max query count = 2
get translate result from https://fanyi.baidu.com/v2transapi
"""
url = 'https://fanyi.baidu.com/v2transapi'
sign = execjs.compile(JS_CODE).call('token', query, self.gtk)
fl = self.fl
tl = self.tl
if self.fl is None:
lang = self.langdetect(query)
fl = 'en' if lang == 'en' else 'zh'
tl = 'zh' if lang == 'en' else 'en'
data = {
'from': fl,
'to': tl,
'query': query,
'simple_means_flag': 3,
'sign': sign,
'token': self.token,
}
try:
r = self.sess.post(url=url, data=data)
except Exception as e:
raise e
if r.status_code == 200:
json = r.json()
if 'error' in json:
raise Exception('baidu sdk error: {}'.format(json['error']))
# 998错误则意味需要重新加载主页获取新的token
return json
return None
def translate(self, text):
js = self.dictionary(text)
l = [x['dst'] for x in js['trans_result']['data']]
return '\n'.join(l)
if __name__ == "__main__":
print(Baidu('zh', 'en').translate('我能吞下玻璃而不伤身体'))
print(Baidu('en', 'zh').translate('I can swallow the glass without hurting my body'))