From 3d138a144fe624c6f7b7705623b63cba8d569885 Mon Sep 17 00:00:00 2001 From: kouloumos Date: Fri, 13 Dec 2024 10:34:56 +0200 Subject: [PATCH] fix(bitcointalk): add missing configuration file --- .../scrapy_sources_configs/bitcointalk.yaml | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 scraper/scrapy_sources_configs/bitcointalk.yaml diff --git a/scraper/scrapy_sources_configs/bitcointalk.yaml b/scraper/scrapy_sources_configs/bitcointalk.yaml new file mode 100644 index 0000000..97b4f8e --- /dev/null +++ b/scraper/scrapy_sources_configs/bitcointalk.yaml @@ -0,0 +1,31 @@ +selectors: + index_page: + items: + item_selector: + selector: td.windowbg > span > a + attribute: href + multiple: true + pattern: \?topic=\d+ + next_page: + selector: td.middletext span.prevnext:last-of-type a.navPages + attribute: href + resource_page: + items: + item_selector: + selector: table.bordercolor > tr > td > table > tr > td[class^="windowbg"] + multiple: true + title: + selector: .subject > a + author: + selector: .poster_info > b > a + date: + selector: .td_headerandpost .smalltext + transform: parse_date + content: + selector: .post + url: + selector: .subject a + attribute: href + next_page: + selector: td.middletext span.prevnext:last-of-type a.navPages + attribute: href