-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawler_config.json
22 lines (22 loc) · 989 Bytes
/
crawler_config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
{
"initial_urls": ["https://developers.google.com/gmail"],
"url_patterns": [
"https://developers\\.google\\.com/gmail/api/guides",
"https://developers\\.google\\.com/gmail/imap/imap-smtp",
"https://developers\\.google\\.com/gmail/imap/imap-smtp",
"https://developers\\.google\\.com/gmail/postmaster",
"https://developers\\.google\\.com/gmail/design",
"https://developers\\.google\\.com/gmail.*",
"https://developers\\.google\\.com/workspace/add-ons/gmail.*",
"https://developers\\.google\\.com/apps-script/reference/gmail.*"
],
"ignore_patterns": [
"https://developer\\.chrome\\.com/docs/extensions/reference/api/.*#.*",
"https://developer\\.chrome\\.com/docs/extensions/reference/api/.*\\?hl=(?!zh-cn).*",
"https://developer\\.chrome\\.com/docs/extensions/mv2/.*",
".*\\.(js|css|png|jpg|jpeg|gif|svg)$"
],
"max_urls": 100,
"max_workers": 10,
"crawl_delay": 1
}