forked from liuxing/mm-spider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.js
87 lines (77 loc) · 1.99 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// // 'use strict'
const request = require('superagent')
const cheerio = require('cheerio')
const fs = require('fs-extra')
const path = require('path')
let url = 'http://www.mmjpg.com/tag/meitui/'
/**
* 生成[n, m]随机数
* @param {number} min
* @param {number} max
*/
function random(min,max){
let range = max - min
let rand = Math.random()
let num = min + Math.round(rand * range)
return num
}
/**
* 获取图集的URL
*/
async function getUrl() {
let linkArr = []
for (let i = 1; i <= 10; i++) {
const res = await request.get(url + i)
const $ = cheerio.load(res.text)
$('.pic li').each(function (i, elem) {
let link = $(this).find('a').attr('href')
linkArr.push(link)
})
}
return linkArr
}
/**
* 获取图集中的图片
* @param {string} url 图集URL
*/
async function getPic(url) {
const res = await request.get(url)
const $ = cheerio.load(res.text)
// 以图集名称来分目录
const dir = $('.article h2').text()
console.log(`创建${dir}文件夹`)
await fs.mkdir(path.join(__dirname, '/mm', dir))
const pageCount = parseInt($('#page .ch.all').prev().text())
for (let i = 1; i <= pageCount; i++) {
let pageUrl = url + '/' + i
const data = await request.get(pageUrl)
const _$ = cheerio.load(data.text)
// 获取图片的真实地址
const imgUrl = _$('#content img').attr('src')
download(dir, imgUrl)
await sleep(random(1000, 5000))
}
}
// 下载图片
function download(dir, imgUrl) {
console.log(`正在下载${imgUrl}`)
const filename = imgUrl.split('/').pop()
const req = request.get(imgUrl)
.set({ 'Referer': 'http://www.mmjpg.com' })
req.pipe(fs.createWriteStream(path.join(__dirname, 'mm', dir, filename)))
}
// sleep函数
function sleep(time) {
return new Promise(function (resolve, reject) {
setTimeout(function () {
resolve()
}, time)
})
};
async function init(){
let urls = await getUrl()
for (let url of urls) {
await getPic(url)
}
}
init()