diff --git a/src/crawler.ts b/src/crawler.ts index f4dc2d6..e42da1b 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -48,7 +48,7 @@ export class Crawler { this.scraper = this.config.strategy == 'docssearch' - ? new DocsearchScraper(this.sender) + ? new DocsearchScraper(this.sender, this.config) : this.config.strategy == 'schema' ? new SchemaScraper(this.sender, this.config) : new DefaultScraper(this.sender, this.config) diff --git a/src/scrapers/docssearch.ts b/src/scrapers/docssearch.ts index eec617e..b3c7fdb 100644 --- a/src/scrapers/docssearch.ts +++ b/src/scrapers/docssearch.ts @@ -1,5 +1,6 @@ import { v4 as uuidv4 } from 'uuid' import { Sender } from '../sender' +import { Config } from '../types' import { Page } from 'puppeteer' import { DocsSearchDocument, @@ -34,11 +35,14 @@ const TAG_LEVELS: Record = { export default class DocsearchScaper { sender: Sender + settings: Config['meilisearch_settings'] - constructor(sender: Sender) { + constructor(sender: Sender, config?: Config) { console.info('DocsearchScaper::constructor') this.sender = sender - void this.sender.updateSettings({ + + // Predefined settings + const defaultSettings = { distinctAttribute: 'url', rankingRules: [ 'words', @@ -65,7 +69,15 @@ export default class DocsearchScaper { 'hierarchy_lvl0', 'content', ], - }) + } + + // Merge user-defined settings with predefined settings + this.settings = { + ...defaultSettings, + ...(config?.meilisearch_settings || {}), + } + + void this.sender.updateSettings(this.settings) } _amount_of_hierarchies(pageMap: DocsSearchDocument) {