From b3902957a7942d889653ac8acba908c5e108ec00 Mon Sep 17 00:00:00 2001 From: CaroFG Date: Mon, 15 Jan 2024 18:52:47 +0100 Subject: [PATCH 1/2] Add config file support on docssearch strategy --- src/crawler.ts | 2 +- src/scrapers/docssearch.ts | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/crawler.ts b/src/crawler.ts index f4dc2d6..e42da1b 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -48,7 +48,7 @@ export class Crawler { this.scraper = this.config.strategy == 'docssearch' - ? new DocsearchScraper(this.sender) + ? new DocsearchScraper(this.sender, this.config) : this.config.strategy == 'schema' ? new SchemaScraper(this.sender, this.config) : new DefaultScraper(this.sender, this.config) diff --git a/src/scrapers/docssearch.ts b/src/scrapers/docssearch.ts index eec617e..301eeda 100644 --- a/src/scrapers/docssearch.ts +++ b/src/scrapers/docssearch.ts @@ -1,5 +1,6 @@ import { v4 as uuidv4 } from 'uuid' import { Sender } from '../sender' +import { Config } from '../types' import { Page } from 'puppeteer' import { DocsSearchDocument, @@ -34,11 +35,14 @@ const TAG_LEVELS: Record = { export default class DocsearchScaper { sender: Sender + settings: Config['meilisearch_settings'] - constructor(sender: Sender) { + constructor(sender: Sender, config?: Config) { console.info('DocsearchScaper::constructor') this.sender = sender - void this.sender.updateSettings({ + + // Predefined settings + const defaultSettings = { distinctAttribute: 'url', rankingRules: [ 'words', @@ -65,7 +69,15 @@ export default class DocsearchScaper { 'hierarchy_lvl0', 'content', ], - }) + }; + + // Merge user-defined settings with predefined settings + this.settings = { + ...defaultSettings, + ...(config?.meilisearch_settings || {}), + }; + + void this.sender.updateSettings(this.settings); } _amount_of_hierarchies(pageMap: DocsSearchDocument) { From a8ffcf754020e2738f8e32967a9d9ac0b80669c4 Mon Sep 17 00:00:00 2001 From: CaroFG Date: Mon, 15 Jan 2024 19:12:58 +0100 Subject: [PATCH 2/2] Fix lint errors --- src/scrapers/docssearch.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scrapers/docssearch.ts b/src/scrapers/docssearch.ts index 301eeda..b3c7fdb 100644 --- a/src/scrapers/docssearch.ts +++ b/src/scrapers/docssearch.ts @@ -69,15 +69,15 @@ export default class DocsearchScaper { 'hierarchy_lvl0', 'content', ], - }; + } // Merge user-defined settings with predefined settings this.settings = { ...defaultSettings, ...(config?.meilisearch_settings || {}), - }; + } - void this.sender.updateSettings(this.settings); + void this.sender.updateSettings(this.settings) } _amount_of_hierarchies(pageMap: DocsSearchDocument) {