-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.js
101 lines (87 loc) · 3.09 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
const request = require('request')
const cheerio = require('cheerio')
const moment = require('moment')
const readline = require('readline')
const filter = require('./filter')
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
})
async function askQuestion (question) {
return new Promise(resolve => {
rl.question(question, (answer) => {
if (!answer || answer === 'y') {
return resolve(true)
}
return resolve(false)
});
})
}
async function getPage(url) {
return new Promise((resolve, reject) => {
request({
url:url,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
}
}, (error, response, body) =>{
if (error) {
return reject(error)
}
return resolve(cheerio.load(body), {decodeEntities: false})
})
})
}
async function getInfoFromPage(url, page) {
let result = []
const $ = await getPage(url)
// console.log('$ : ', $)
const ads = $('.section-procurement__item-information').each((i, el) => {
// console.log(i, el)
result.push($(el))
})
// console.log("result: ", result)
// console.log(`Page ${page}: found ${ads.length}`)
const nextPage = $('a[data-cy="pagination__next"]')
if (nextPage.get(0) && await askQuestion("Next page?")) {
const nextAds = await getInfoFromPage(nextPage.attr('href'), ++page)
result = result.concat(nextAds)
}
return result
}
async function run(url) {
const result = []
const ads = await getInfoFromPage(url, 1)
// console.log(`Total found: ` + ads.length)
for (const inf of ads) {
const regExp = /(РН)?[0-9]+/
// console.log(inf.find('a.section-procurement__item-title').text())
//Тут обрабатываем title фильтром. И создаём элемент только с валидным тайтлом
const title = inf.find('a.section-procurement__item-title').text()
if (filter.filter(title)) {
let offerData = {
title,
number: inf.find('.section-procurement__item-numbers span').text().trim().match(regExp)[0],
href: 'https://www.tektorg.ru' + inf.find('a.section-procurement__item-title').attr('href')
}
// console.log(offerData.title, offerData.number, offerData.href)
result.push(offerData)
}
}
// console.log("result: ", result)
return result
}
module.exports.parserResult = async function (url) {
try {
const result = []
// const r = await getPage(url);
// var a = 5;
var ads = await run(url)
// console.log("ads: ", ads)
// const r = await askQuestion()
} catch (e) {
throw e
}
// console.log("this.ads", this.ads)
return ads
}