diff --git a/config/config.js b/config/config.js new file mode 100644 index 0000000..ddcf23d --- /dev/null +++ b/config/config.js @@ -0,0 +1,27 @@ +const mysqlConfig = { + host: '47.100.166.125', + user: 'root', + database: 'sun', + password: '111!aaaA', + charset: 'utf8', //应该设置编码(省略在某些情况下会有错误) + //以下选项均为默认值(如果不需要变动可省略) + connectTimeout: 10000, //获取连接的毫秒 + waitForConnections: true, //为true时,连接排队等待可用连接。为false将立即抛出错误 + connectionLimit: 20, //单次可创建最大连接数 + queueLimit: 0 //连接池的最大请求数,从getConnection方法前依次排队。设置为0将没有限制 +} + +const fetchConfig = { + baseUrl: 'https://www.esrl.noaa.gov/gmd/grad/solcalc/table.php', + sleepTime: 2000, //两次相隔请求之间的间隔,避免爬取服务器奔溃 +} + +const logConfig = { + level: 'info' +} + +module.exports = { + mysqlConfig: mysqlConfig, + fetchConfig: fetchConfig, + logConfig: logConfig +} diff --git a/readme.md b/readme.md index 4876588..e1cbfb2 100644 --- a/readme.md +++ b/readme.md @@ -1,11 +1,49 @@ ## 爬取日升日落数据 +## 环境 + +- node v8.4.0以上 + +## 数据源 + +- ### 城市数据源 +[](./sql/sun_city.sql) + +- ### 爬取日升日落数据源 + +``` +https://www.esrl.noaa.gov/gmd/grad/solcalc/table.php +``` + +### sun 表 + +time 日期 +code 区县编码 +sunrise 日升时间 +solar_moon 正午时间 +sunset 日落时间 + +### city 表 + +区县编码经纬度表 + +## 配置 + +[./config/config.js](./config/config.js) + +配置mysql库连接地址 + ## 运行 +### 安装依赖 + ``` npm i ``` +### 运行项目 + ``` npm run start ``` + diff --git a/src/fetch.js b/src/fetch.js index db7b030..1611a3c 100644 --- a/src/fetch.js +++ b/src/fetch.js @@ -3,7 +3,8 @@ const cheerio = require('cheerio') const {pool} = require('./mysql') const logger = require('./log') -const baseUrl = "https://www.esrl.noaa.gov/gmd/grad/solcalc/table.php"; +const {fetchConfig} = require('../config/config') + const parseData = ($, sunrise, year) => { const trs = $(sunrise).find('tr') @@ -36,7 +37,7 @@ const saveData = async (code, year, sunData) => { } const fetchData = async (year, code, latitude, longitude) => { - const url = baseUrl + + const url = fetchConfig.baseUrl + '?lat=' + latitude + '&lon=' + longitude + '&year=' + year diff --git a/src/log.js b/src/log.js index 21a9111..4dae99c 100644 --- a/src/log.js +++ b/src/log.js @@ -1,5 +1,6 @@ const path = require('path'); const log4js = require('log4js'); +const {logConfig} = require('../config/config'); const config = { /** @@ -9,7 +10,7 @@ const config = { /** * 日志级别 */ - level: 'info', + level: logConfig.level, /** * 默认日志路径 */ diff --git a/src/mysql.js b/src/mysql.js index 36e53a0..373e2c8 100644 --- a/src/mysql.js +++ b/src/mysql.js @@ -1,18 +1,9 @@ const mysql = require('mysql2/promise'); const logger = require('./log') +const {mysqlConfig} = require('../config/config') + // create the connection to database -const pool = mysql.createPool({ - host: '47.100.166.125', - user: 'root', - database: 'sun', - password: '111!aaaA', - charset: 'utf8', //应该设置编码(省略在某些情况下会有错误) - //以下选项均为默认值(如果不需要变动可省略) - connectTimeout: 10000, //获取连接的毫秒 - waitForConnections: true, //为true时,连接排队等待可用连接。为false将立即抛出错误 - connectionLimit: 20, //单次可创建最大连接数 - queueLimit: 0 //连接池的最大请求数,从getConnection方法前依次排队。设置为0将没有限制 -}); +const pool = mysql.createPool(mysqlConfig); const queryOne = async (sql, prepare) => { const [rows] = await pool.query(sql, prepare); diff --git a/src/process.js b/src/process.js index a9fabdd..fe7713d 100644 --- a/src/process.js +++ b/src/process.js @@ -2,6 +2,7 @@ const logger = require('./log') const fetchData = require('./fetch') const os = require("os") const {query, queryOne} = require('./mysql') +const {fetchConfig} = require('../config/config') function sleep(milliSeconds) { const startTime = new Date().getTime(); @@ -33,7 +34,7 @@ const process = async () => { while (cities && cities.length > 0) { const city = cities.shift(); await fetchData(city.year, city.code, city.latitude, city.longitude) - sleep(3000) + sleep(fetchConfig.sleepTime) } }) }