From 3fd138392ae19ba6879832f596b82e0e8938d09f Mon Sep 17 00:00:00 2001 From: jackburridge Date: Fri, 26 Jun 2015 15:02:58 +0100 Subject: [PATCH 1/2] Recursive getStartUrls Allows for addresses like "http://example.com/search?mode=[1-10]&type=[1-10]" --- extension/scripts/Sitemap.js | 37 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/extension/scripts/Sitemap.js b/extension/scripts/Sitemap.js index 0cfaf47a..4c6ab3f8 100644 --- a/extension/scripts/Sitemap.js +++ b/extension/scripts/Sitemap.js @@ -66,18 +66,16 @@ Sitemap.prototype = { startUrls = [startUrls]; } - var urls = []; - startUrls.forEach(function(startUrl) { - - // zero padding helper - var lpad = function(str, length) { + var nextUrls = function(url){ + var urls = []; + var lpad = function(str, length) { while (str.length < length) str = "0" + str; return str; }; - + var re = /^(.*?)\[(\d+)\-(\d+)(:(\d+))?\](.*)$/; - var matches = startUrl.match(re); + var matches = url.match(re); if(matches) { var startStr = matches[2]; var endStr = matches[3]; @@ -88,21 +86,32 @@ Sitemap.prototype = { if(matches[5] !== undefined) { incremental = parseInt(matches[5]); } + var nextSet = nextUrls(matches[6]); for (var i = start; i <= end; i+=incremental) { - + + + var current; // with zero padding if(startStr.length === endStr.length) { - urls.push(matches[1]+lpad(i.toString(), startStr.length)+matches[6]); + + current = matches[1]+lpad(i.toString(), startStr.length); } else { - urls.push(matches[1]+i+matches[6]); + current = matches[1]+i; } + nextSet.forEach(function(next){ + urls.push(current+next); + }); } - return urls; - } - else { - urls.push(startUrl); + }else { + urls.push(url); } + return urls; + }; + var urls = []; + + startUrls.forEach(function(startUrl) { + urls.concat(nextUrls(startUrl)); }); return urls; From ff6610775f0bbc23845f3bc0e934efc272fabdfd Mon Sep 17 00:00:00 2001 From: jackburridge Date: Fri, 26 Jun 2015 19:33:24 +0100 Subject: [PATCH 2/2] Update Sitemap.js --- extension/scripts/Sitemap.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/scripts/Sitemap.js b/extension/scripts/Sitemap.js index 4c6ab3f8..9889a538 100644 --- a/extension/scripts/Sitemap.js +++ b/extension/scripts/Sitemap.js @@ -111,7 +111,7 @@ Sitemap.prototype = { var urls = []; startUrls.forEach(function(startUrl) { - urls.concat(nextUrls(startUrl)); + urls = urls.concat(nextUrls(startUrl)); }); return urls;