Scrapy pattern
AUTOTHROTTLE_ENABLED = True HTTPCACHE_ENABLED = True FEED_FORMAT = "csv" FEED_URI = "res.csv"
Scrapy 프로젝트에서 구현 한 bot 이름 (프로젝트 이름이라고도 함) BOT_NAME = 'test'
SPIDER_MODULES = ['test.spiders'] NEWSPIDER_MODULE = 'test.spiders'
USER_AGENT = 'test'
ROBOTSTXT_OBEY = False
CONCURRENT_REQUESTS = 32
DOWNLOAD_DELAY = 2
CONCURRENT_REQUESTS_PER_DOMAIN = 16
CONCURRENT_REQUESTS_PER_IP = 16
COOKIES_ENABLED = True
TELNETCONSOLE_ENABLED = False
DEFAULT_REQUEST_HEADERS = { 'Referer': 'https://news.daum.net/', }
SPIDER_MIDDLEWARES = { 'test.middlewares.TestSpiderMiddleware': 543, }
DOWNLOADER_MIDDLEWARES = { 'test.middlewares.TestDownloaderMiddleware': 543, }
EXTENSIONS = { 'scrapy.extensions.telnet.TelnetConsole': None, }
파이프라인 설정 ITEM_PIPELINES = { 'test.pipelines.TestPipeline': 300, }
HTTPCACHE_ENABLED = False
HTTPCACHE_EXPIRATION_SECS = 30
HTTPCACHE_DIR = 'httpcache'
HTTPCACHE_IGNORE_HTTP_CODES = [] HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
FEED_EXPORT_ENCODING = 'utf-8'
RETRY_ENABLED = True
RETRY_TIMES = 2
RETRY_HTTP_CODES = [500, 502, 503, 504, 408]
HTTPERROR_ALLOWED_CODES = [404]
HTTPERROR_ALLOW_ALL = False