Skip to content

Commit

Permalink
add configuration option for additional request headers
Browse files Browse the repository at this point in the history
add a pre navigation hook to add thos headers to the request
  • Loading branch information
dardanbujupaj committed Mar 18, 2024
1 parent d12a41f commit 4400157
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
20 changes: 18 additions & 2 deletions src/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
PuppeteerCrawlingContext,
PuppeteerCrawlerOptions,
RequestQueue,
PuppeteerHook,
} from 'crawlee'

import { minimatch } from 'minimatch'
Expand Down Expand Up @@ -50,8 +51,8 @@ export class Crawler {
this.config.strategy == 'docssearch'
? new DocsearchScraper(this.sender, this.config)
: this.config.strategy == 'schema'
? new SchemaScraper(this.sender, this.config)
: new DefaultScraper(this.sender, this.config)
? new SchemaScraper(this.sender, this.config)
: new DefaultScraper(this.sender, this.config)
}

async run() {
Expand All @@ -62,12 +63,27 @@ export class Crawler {
//Create the router
const router = createPuppeteerRouter()


// type DefaultHandler = Parameters<typeof router.addDefaultHandler>[0];
router.addDefaultHandler(this.defaultHandler.bind(this))

const preNavigationHooks: PuppeteerHook[] = this.config.additional_request_headers ? [
async (crawlingContext) => {
crawlingContext.addInterceptRequestHandler(async (request) => {
request.continue({
headers: {
...request.headers(),
...this.config.additional_request_headers,
}
});
})
},
] : []

const puppeteerCrawlerOptions: PuppeteerCrawlerOptions = {
requestQueue,
requestHandler: router,
preNavigationHooks: preNavigationHooks,
launchContext: {
launchOptions: {
headless: this.config.headless || true,
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export type Config = {
meilisearch_api_key: string
start_urls: string[]
urls_to_exclude?: string[]
additional_request_headers?: Record<string, string>
queue?: string[]
primary_key?: string
batch_size?: number
Expand Down

0 comments on commit 4400157

Please sign in to comment.