-
Notifications
You must be signed in to change notification settings - Fork 0
/
database.py
118 lines (102 loc) · 2.92 KB
/
database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
#
# Author: jimin.huang
#
# Created Time: 2015年03月06日 星期五 16时36分19秒
#
import torndb
import logging
import json
import environment
COMPANY_SERVICE =\
torndb.Connection(
'mysql',
'company_service',
user=environment.get_user(),
password=environment.get_password(),
)
def release():
COMPANY_SERVICE.close()
class Crawler(object):
'''
爬虫的持久化对象
'''
@staticmethod
def select(crawler_id):
'''
获取爬虫信息
'''
sql =\
(
'SELECT * '
'FROM contribute_crawler.crawler '
'WHERE crawler_id = {crawler_id}'
).format(crawler_id=crawler_id)
return COMPANY_SERVICE.get(sql)
@staticmethod
def _update(index, value_dict, search_column='crawler_id'):
'''
更新爬虫信息
'''
value_sql = ""
#生成更新语句
for key, value in value_dict.iteritems():
#字符串需要用单引号包围
if isinstance(value, basestring):
value = ''.join(('\'', value, '\''))
value_sql += '{key} = {value} '.format(key=key, value=value)
sql =\
(
'UPDATE contribute_crawler.crawler '
'SET {value_sql} '
'WHERE {search_column} = {index}'
).format(
value_sql=value_sql,
search_column=search_column,
index=index,
)
COMPANY_SERVICE.execute(sql)
@staticmethod
def status(crawler_id, new_status, text=None, search_column='crawler_id'):
'''
更新爬虫状态
'''
#状态列表
_status = [
'error',
'finished',
'pending',
'crawling',
]
#检查新状态
if new_status not in _status:
logging.error('Error: '+new_status+' not defined in Crawler update')
return
#状态附带信息的添加
if text is not None:
new_status = ''.join((new_status, ':', text))
value_dict = {'crawler_status': new_status}
Crawler._update(crawler_id, value_dict, search_column=search_column)
@staticmethod
def register(crawler_id, container):
'''
生成新的爬虫任务
'''
container_id = container['Id']
value_dict = {'crawler_jobid': container_id}
Crawler._update(crawler_id, value_dict)
class Model(object):
'''
模型持久化对象
'''
@staticmethod
def select(model_id):
'''
读取模型
'''
sql = (
'SELECT * '
'FROM model '
'WHERE model_id = {model_id}'
).format(model_id=model_id)
return COMPANY_SERVICE.get(sql)