-
Notifications
You must be signed in to change notification settings - Fork 0
/
jd2cr.schema.json
146 lines (146 loc) · 7.23 KB
/
jd2cr.schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/sergxerj/jdownloader2-crawler-rule-json-schema/main/jd2cr.schema.json",
"title": "Jdownloader2 Crawler Rule definition (single-rule file)",
"description": "A crawler rule.\n - Anything prefixed as \"HTML RegEx\" will do Java-flavored pattern-matching on the page's raw (unmodified by javascript) HTML.",
"type": "object",
"required": ["enabled", "name", "rule", "pattern"],
"additionalProperties": false,
"properties": {
"enabled": {
"title": "ENABLED flag",
"description": "- Type: Boolean\n- Applies to rule: ALL\n- Purpose: Enables or disables this rule",
"type": "boolean"
},
"cookies": {
"title": "Cookie list for the site",
"description": "- Type: Array of length-2 Arrays\n- Applies to rule: DIRECTHTTP, DEEPDECRYPT, SUBMITFORM or FOLLOWREDIRECT\n- Purpose: A list of length-2 arrays in the form [[\"cookieName\", \"cookieValue\"],[..., ...]...]. Here you can put in your personal cookies e.g. login cookies of websites which JD otherwise fails to parse.Also if \"updateCookies\" is enabled, JD will update these with all cookies it receives from the website(s) that match the \"pattern\" property.",
"examples": [[["cookieName", "cookieValue"]]],
"type": "array",
"items": {
"type": "array",
"items": [
{"type": "string"},
{"type": "string"}
]
}
},
"updateCookies": {
"title": "Updates site cookies",
"description": "- Type: Boolean\n- Applies to rule: DIRECTHTTP, DEEPDECRYPT, SUBMITFORM or FOLLOWREDIRECT\n- Purpose: If the target websites returns new cookies, save these inside this rule and update this rule.",
"type": "boolean"
},
"logging": {
"title": "Enable logging",
"description": "- Type: Boolean\n- Applies to rule: ALL\n- Purpose: Enable this for support purposes. Logs of your LinkCrawler Rules can be found in your JD install dir/logs/: LinkCrawlerRule.<RuleID>.log.0 and /LinkCrawlerDeep.*",
"type": "boolean"
},
"maxDecryptDepth": {
"title": "Limits crawling depth",
"description": "- Type: Integer\n- Applies to rule: ALL\n- Purpose: How many layers deep do should your rule crawl (e.g. rule returns URLs matching the same rule again recursively - how often is this chain allowed to happen?)",
"examples": [1],
"type": "integer"
},
"id": {
"title": "Rule ID",
"description": "- Type: Integer\n- Applies to rule: ALL\n- Purpose: Auto generated ID of the rule. Normally leave this blank and JD2 will autoinsert.",
"type": "integer"
},
"name": {
"title": "Name of the rule",
"description": "- Type: String\n- Applies to rule: ALL\n- Purpose: name of the rule.",
"type": "string"
},
"pattern": {
"title": "This rule will be used for all URLs matching this pattern",
"description": "- Type: RegEx\n- Applies to rule: ALL\n- Purpose: defines on which URLs will this rule apply by matching it to the pattern",
"examples": ["https?://www\\.example\\.com/subdir1/subdir2/subdir3"],
"type": "string",
"format": "regex"
},
"rule": {
"title": "Type of the rule",
"enum": ["DEEPDECRYPT", "REWRITE", "DIRECTHTTP", "FOLLOWREDIRECT", "SUBMITFORM"],
"type": "string"
},
"packageNamePattern": {
"title": "Define package name and group al crawled urls for the current job under it",
"description": "- Type: HTML RegEx\n- Applies to rule: DEEPDECRYPT\n- Purpose: All URLs crawled by this rule will be grouped into the same package that is the HTML RegEx's first capture",
"examples": ["<title>(.*?)</title>"],
"type": "string",
"format": "regex"
},
"passwordPattern": {
"title": "Pattern to find extraction passwords for downloaded archives",
"description": "- Type: HTML RegEx or null\n- Applies to rule: DEEPDECRYPT\n- Purpose: Matches against archive extraction password that may be found as text inside the page's (unmodified by javascript) HTML code. First returned capture must be the password.",
"examples": ["password:([^>]+)>"],
"type": "string",
"format": "regex"
},
"formPattern": {
"title": "Find (and submit) HTML Form",
"description": "- Type: HTML RegEx\n- Applies to rule: DEEPDECRYPT\n- Purpose:",
"examples": ["<form id=\"example\">(.*?)</form>"],
"type": "string",
"format": "regex"
},
"deepPattern": {
"title": "URLs to look for inside the page.",
"description": "- Type: HTML RegEx or null\n- Applies to rule: DEEPDECRYPT\n- Purpose: Which URLs should this rule look for inside the page's (unmodified by javascript) HTML code. null (or blank) = auto scan and return all supported URLs found in HTML code.\nKeep in mind that, if the url's found in the html are relative (e.g. starting with a slash / character instead of a protocol like http or a domain (website root) name) you WILL have to enclose the entire expression in quotes (outside of the parentheses that do capturing) like \\\"(...)\\\" AND enclose the part that would match the missing part of the url (i.e. from the protocol to the slash-or-other-character it begins with) in an 'OPTIONAL, NON CAPTURING' group (?:...)?. Resulting in the following pattern for pretty much every case \\\"((?:missing part of full url)?rest of url that is in the html)\\\"",
"examples": [
"(https?://absolute\\.url\\.example\\.com/subdir1/.*?\\.ext)",
"\"((?:https?://missing\\.domain\\.example\\.com\\)?/partial_url_existing_in_page)\""
],
"oneOf": [
{"type": "string"},
{"type": "null"}
],
"format": "regex"
},
"rewriteReplaceWith": {
"title": "Pattern for new URL",
"description": "- Type: String\n- Applies to rule: REWRITE\n- Purpose:Pattern for new URL, can use captures from \"pattern\" in the $1 form",
"examples": ["https://example.com/$1"],
"type": "string",
"format": "regex"
}
},
"allOf": [
{
"if": {
"$comment": "Check if rule is not DEEPDECRYPT and disallow deepPattern. packageNamePattern and passwordPattern being non-null",
"properties": { "rule": { "not": { "const": "DEEPDECRYPT" } }
}
},
"then": {
"properties": {
"deepPattern": { "type": "null" } ,
"packageNamePattern": { "type": "null" } ,
"passwordPattern": { "type": "null" }
}
}
},
{
"if": {
"$comment": "Check if rule is not REWRITE and disallow rewriteReplaceWith being non-null",
"properties": { "rule": { "not": { "const": "REWRITE" } }
}
},
"then": {
"properties": { "rewriteReplaceWith": { "type": "null" } }
}
},
{
"if": {
"$comment": "Check if rule is not DIRECTHTTP, DEEPDECRYPT, SUBMITFORM or FOLLOWREDIRECT and disallow updateCookies being non-null",
"properties": { "rule": { "not": { "oneOf" : [ { "const": "DIRECTHTTP" }, { "const": "DEEPDECRYPT" }, { "const": "SUBMITFORM" }, { "const": "FOLLOWREDIRECT" }] } } }
},
"then": {
"properties": {
"updateCookies": { "type": "null" },
"cookies": { "type": "null" }
}
}
}
]
}