forked from kalisio/krawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhealthcheck.js
149 lines (139 loc) · 5.63 KB
/
healthcheck.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
const request = require('request')
const program = require('commander')
const utils = require('util')
const path = require('path')
const fs = require('fs-extra')
const _ = require('lodash')
program
.usage('[options]')
.allowUnknownOption()
.option('-a, --api', 'Setup as web app by exposing an API')
.option('-ap, --api-prefix [prefix]', 'When exposed as an API change the prefix (defaults to /api)', '/api')
.option('-po, --port [port]', 'Change the port to be used (defaults to 3030)', 3030)
.option('-sr, --success-rate [rate]', 'Change the success rate for fault-tolerant jobs to be considered as successful (defaults to 1)', 1)
.option('-md, --max-duration [duration]', 'Change the maximum run duration in seconds for fault-tolerant jobs to be considered as failed (defaults to unset)', -1)
.option('-nsj, --nb-skipped-jobs [nb]', 'Change the number of skipped runs for fault-tolerant jobs to be considered as failed (defaults to 3)', 3)
.option('-sw, --slack-webhook [url]', 'Slack webhook URL to post messages on failure', process.env.SLACK_WEBHOOK_URL)
.option('-mt, --message-template [template]', 'Message template used on failure', 'Job <%= jobId %>: <%= error.message %>')
.option('-lt, --link-template [template]', 'Link template used on failure', '')
.option('-d, --debug', 'Verbose output for debugging')
.parse(process.argv)
const logFile = path.join(__dirname, 'healthcheck.log')
function readFromLog () {
try {
if (fs.pathExistsSync(logFile)) return fs.readJsonSync(logFile)
else return {} // First launch
} catch (error) {
// Allowed to fail to make healthcheck robust
console.error(error)
return {}
}
}
function writeToLog (data) {
try {
fs.writeJsonSync(logFile, data)
} catch (error) {
// Allowed to fail to make healthcheck robust
console.error(error)
}
}
function publishToConsole (data, compilers, pretext, stream = 'error') {
try {
if (stream === 'error') console.error(pretext, compilers.message(data))
else console.log(pretext, compilers.message(data))
} catch (error) {
// Allowed to fail to make healthcheck robust
console.error(error)
}
}
async function publishToSlack (data, compilers, posttext = '', color = 'danger') {
if (!program.slackWebhook) return
try {
const message = compilers.message(data)
const link = compilers.link(data)
const text = link ? `<${link}|${message}${posttext}>` : `${message}${posttext}`
await utils.promisify(request.post)({
url: program.slackWebhook,
body: JSON.stringify({
attachments: [
{
color: color,
mrkdwn_in: ['text'],
text: text
}
]
})
})
} catch (error) {
// Allowed to fail to make healthcheck robust
console.error(error)
}
}
function isSameError (previousError, error) {
return (_.has(previousError, 'code') && _.has(error, 'code')
? _.isEqual(_.get(previousError, 'code'), _.get(error, 'code'))
: _.isEqual(_.get(previousError, 'message'), _.get(error, 'message')))
}
async function healthcheck () {
const endpoint = `http://localhost:${program.port}${program.api ? program.apiPrefix : ''}/healthcheck`
const compilers = {
message: _.template(program.messageTemplate),
link: _.template(program.linkTemplate),
origin: _.template(program.messageOrigin)
}
let previousError
try {
const previousHealthcheck = readFromLog()
previousError = previousHealthcheck.error
const response = await utils.promisify(request.get)(endpoint)
const data = JSON.parse(response.body)
if (program.debug) {
console.log('Current healthcheck output read from service', data)
console.log('Previous healthcheck output read from log', previousHealthcheck)
}
if (response.statusCode === 200) {
// Fault-tolerant jobs always return 200, we use more criteria to check for health status
if (_.has(data, 'successRate') && (data.successRate < program.successRate)) {
data.error = { code: 'HEALTHCHECK_SUCCESS_RATE', message: `Insufficient success rate (${data.successRate.toFixed(2)})` }
}
if (data.nbSkippedJobs >= program.nbSkippedJobs) {
data.error = { code: 'HEALTHCHECK_SKIPPED_JOBS', message: `Too much skipped jobs (${data.nbSkippedJobs})` }
}
if ((program.maxDuration > 0) && (data.duration > program.maxDuration)) {
data.error = { code: 'HEALTHCHECK_DURATION', message: `Too much slow execution (${data.duration}s)` }
}
}
writeToLog(data)
// Add env available for templates
Object.assign(data, process.env)
if (data.error) {
// Only notify on new errors
if (!previousError || !isSameError(previousError, data.error)) {
publishToConsole(data, compilers, '[ALERT]', 'error')
await publishToSlack(data, compilers, '', 'danger')
}
process.exit(1)
} else {
// Only notify on closing errors
if (previousError) {
data.error = previousError
publishToConsole(data, compilers, '[CLOSED ALERT]', 'log')
await publishToSlack(data, compilers, ' [RESOLVED]', 'good')
}
process.exit(0)
}
} catch (error) {
// Set jobId variable/error available in context so that templates will not fail
const data = Object.assign({ jobId: '' }, { error: _.pick(error, ['code', 'message']) })
writeToLog(data)
// Add env available for templates
Object.assign(data, process.env)
// Only notify on new errors
if (!previousError || !isSameError(previousError, data.error)) {
publishToConsole(data, compilers, '[ALERT]', 'error')
await publishToSlack(data, compilers, '', 'danger')
}
process.exit(1)
}
}
healthcheck()