-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathrunner.py
79 lines (65 loc) · 2.89 KB
/
runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
from argparse import ArgumentParser
from logging import getLogger
from pathlib import Path
from scrapy.crawler import CrawlerProcess
from spiders import ItemSpider, NPCSpider, ObjectSpider, QuestSpider, QuestXpSpider
from utils.paths import OUTPUT_DIR
class Runner:
lang: str = ""
target: str = ""
version: str = ""
target_dir: Path = None
def __init__(self, lang: str, target: str, version: str) -> None:
self.lang = lang
self.target = target
self.version = version
self.logger = getLogger(__name__)
self.target_dir = OUTPUT_DIR / target
if not self.target_dir.exists():
self.target_dir.mkdir()
self.target_dir = self.target_dir.relative_to(os.path.dirname(os.path.realpath(__file__)))
def run(self) -> None:
feed_uri = self.target_dir / (self.lang + "_data.json")
if feed_uri is None:
return None
if feed_uri.exists():
self.logger.info("Removing existing '{}' file".format(feed_uri))
feed_uri.unlink()
process = CrawlerProcess(settings={
"LOG_LEVEL": "INFO",
"FEED_EXPORT_ENCODING": "utf-8",
"FEED_FORMAT": "json",
"CONCURRENT_REQUESTS": 32,
"FEED_URI": str(feed_uri),
"COOKIES_ENABLED": False
})
self.logger.info("Starting {} crawler".format(self.target))
self.logger.info("Output goes to '{}'".format(feed_uri))
if self.target == "item":
process.crawl(ItemSpider, lang=self.lang, version=self.version)
elif self.target == "npc":
process.crawl(NPCSpider, lang=self.lang, version=self.version)
elif self.target == "object":
process.crawl(ObjectSpider, lang=self.lang, version=self.version)
elif self.target == "quest":
process.crawl(QuestSpider, lang=self.lang, version=self.version)
elif self.target == "xp":
process.crawl(QuestXpSpider, lang=self.lang, version=self.version)
process.start()
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("-l", "--lang", help="The language you want to scrape. Default: 'en'", type=str)
parser.add_argument("-t", "--target", help="The target you want to scrape."
"Possible values are 'npc', 'quest', 'item', 'object' and 'xp'. Default: 'npc'", type=str)
parser.add_argument("-v", "--version", help="The version of WoW Classic you want to scrape."
"Possible values are 'classic', 'tbc' and 'wotlk'. Default: 'wotlk'", type=str)
args = parser.parse_args()
if args.lang is None:
args.lang = "en"
if args.target is None:
args.target = "npc"
if args.version is None:
args.version = 'wotlk'
runner = Runner(args.lang, args.target, args.version)
runner.run()