diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5cc03f6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,32 @@ +## 0.2.0 (2025-01-08) + +### Feat + +- add github actions workflows for automated source scraping +- **github**: add `checkout_commit` option for testing specific repo states +- **scrapers**: Add GitHub metadata scraper for issues and pull requests +- **elastic**: add index mappings and initialization commands +- **scrapers**: stackexchange api scraper (#90) +- **scrapers**: add Bitcoin Core PR Review Club +- **github**: add repository metadata field analyzer +- **scrapy**: add LLM analyzer and config validation +- **scrapy**: implement configuration-based system +- **notebooks**: add summary efficiency analysis notebook +- **scrapers**: add flexible scrapy-based scraper +- **data**: add structured author information with aliases +- **processors**: implement flexible document processing pipeline +- **scraper**: introduce scraping package + +### Fix + +- **bitcointalk**: add missing configuration file +- resolve ModuleNotFoundError for common - Added sys.path modification to include the repository root directory +- import error for 6922e937 +- UTC `indexed_at` timestamp + +### Refactor + +- **scrapers**: standardize markdown as canonical format +- **logging**: migrate from MetadataDocument to ScraperRunDocument +- **pr-review-club**: follow existing schema +- **elasticsearch**: support for local instance (#82) diff --git a/pyproject.toml b/pyproject.toml index a42580a..2db7253 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scraper" -version = "0.1.0" +version = "0.2.0" description = "A flexible multi-source scraper application designed to gather information from GitHub repositories and web pages. Leverages both Git-based and Scrapy-based approaches to handle different source types effectively." authors = ["kouloumos ", "urvish patel "] readme = "README.md" @@ -36,7 +36,7 @@ build-backend = "poetry.core.masonry.api" [tool.commitizen] name = "cz_conventional_commits" -version = "0.1.0" +version = "0.2.0" tag_format = "$version" version_files = [ "pyproject.toml:version",