Skip to content

Commit

Permalink
Retry failed services (#706)
Browse files Browse the repository at this point in the history
* chore: retry failed services

* chore: only retry mandatory services

* chore: pr fixes

* chore: fix tests

* chore: add test for onGatewayReplaceSchemaHandler

* chore: fix lint issue

* chore: fix tests

* chore: improve tests, update docs

* chore: fix tests

* chore: no longer a breaking change

* chore: fix ts tests

* chore: fix tests

* chore: fix tests

* chore: fix types

* chore: wait time increased

* chore: retry count changed

* chore: failed service names added
  • Loading branch information
sameer-coder authored Feb 3, 2022
1 parent 6943453 commit a1b66e5
Show file tree
Hide file tree
Showing 9 changed files with 598 additions and 9 deletions.
2 changes: 2 additions & 0 deletions docs/api/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@
- `wsConnectionParams.failedConnectionCallback`: `Function` A function called after a `connection_error` message is received, the first argument contains the message payload.
- `wsConnectionParams.failedReconnectCallback`: `Function` A function called if reconnect is enabled and maxReconnectAttempts is reached.
- `wsConnectionParams.rewriteConnectionInitPayload`: `Function` A function that gets the original `connection_init` payload along with the context as a parameter and returns an object that replaces the original `connection_init` payload before forwarding it to the federated service
- `gateway.retryServicesCount`: `Number` Specifies the maximum number of retries when a service fails to start on gateway initialization. (Default: 10)
- `gateway.retryServicesInterval`: `Number` The amount of time(in milliseconds) between service retry attempts in case a service fails to start on gateway initialization. (Default: 3000)

- `persistedQueries`: A hash/query map to resolve the full query text using it's unique hash. Overrides `persistedQueryProvider`.
- `onlyPersisted`: Boolean. Flag to control whether to allow graphql queries other than persisted. When `true`, it'll make the server reject any queries that are not present in the `persistedQueries` option above. It will also disable any ide available (graphiql). Requires `persistedQueries` to be set, and overrides `persistedQueryProvider`.
Expand Down
2 changes: 1 addition & 1 deletion index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ interface ServiceConfig {
}

interface Gateway {
refresh: () => Promise<GraphQLSchema | null>;
refresh: (isRetry?: boolean) => Promise<GraphQLSchema | null>;
serviceMap: Record<string, ServiceConfig>;
}

Expand Down
43 changes: 43 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,49 @@ const plugin = fp(async function (app, opts) {
}

let entityResolversFactory
let gatewayRetryIntervalTimer = null
const retryServicesCount = gateway && gateway.retryServicesCount ? gateway.retryServicesCount : 10

const retryServices = (interval) => {
let retryCount = 0
let isRetry = true

return setInterval(async () => {
try {
if (retryCount === retryServicesCount) {
clearInterval(gatewayRetryIntervalTimer)
isRetry = false
}
retryCount++

const context = assignApplicationLifecycleHooksToContext({}, fastifyGraphQl[kHooks])
const schema = await gateway.refresh(isRetry)
if (schema !== null) {
clearInterval(gatewayRetryIntervalTimer)
// Trigger onGatewayReplaceSchema hook
if (context.onGatewayReplaceSchema !== null) {
await onGatewayReplaceSchemaHandler(context, { instance: app, schema })
}
fastifyGraphQl.replaceSchema(schema)
}
} catch (error) {
app.log.error(error)
}
}, interval)
}

if (gateway) {
const retryInterval = gateway.retryServicesInterval || 3000
gateway = await buildGateway(gateway, app)

const serviceMap = Object.values(gateway.serviceMap)
const failedMandatoryServices = serviceMap.filter(service => !!service.error && service.mandatory)

if (failedMandatoryServices.length) {
gatewayRetryIntervalTimer = retryServices(retryInterval)
gatewayRetryIntervalTimer.unref()
}

schema = gateway.schema
entityResolversFactory = gateway.entityResolversFactory

Expand Down Expand Up @@ -210,6 +250,9 @@ const plugin = fp(async function (app, opts) {
if (gatewayInterval !== null) {
clearInterval(gatewayInterval)
}
if (gatewayRetryIntervalTimer !== null) {
clearInterval(gatewayRetryIntervalTimer)
}
setImmediate(next)
})

Expand Down
5 changes: 5 additions & 0 deletions lib/errors.js
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,11 @@ const errors = {
'MER_ERR_HOOK_UNSUPPORTED_HOOK',
'%s hook not supported!',
500
),
MER_ERR_SERVICE_RETRY_FAILED: createError(
'MER_ERR_SERVICE_RETRY_FAILED',
'Mandatory services retry failed - [%s]',
500
)
}

Expand Down
25 changes: 21 additions & 4 deletions lib/gateway.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const {
createEntityReferenceResolverOperation,
kEntityResolvers
} = require('./gateway/make-resolver')
const { MER_ERR_GQL_GATEWAY_REFRESH, MER_ERR_GQL_GATEWAY_INIT } = require('./errors')
const { MER_ERR_GQL_GATEWAY_REFRESH, MER_ERR_GQL_GATEWAY_INIT, MER_ERR_SERVICE_RETRY_FAILED } = require('./errors')
const findValueTypes = require('./gateway/find-value-types')
const getQueryResult = require('./gateway/get-query-result')
const allSettled = require('promise.allsettled')
Expand Down Expand Up @@ -257,7 +257,7 @@ function defaultErrorHandler (error, service) {
async function buildGateway (gatewayOpts, app) {
const { services, errorHandler = defaultErrorHandler } = gatewayOpts

const serviceMap = await buildServiceMap(services, errorHandler)
const serviceMap = await buildServiceMap(services, errorHandler, app.log)

const serviceSDLs = Object.entries(serviceMap).reduce((acc, [name, value]) => {
const { schemaDefinition, error } = value
Expand Down Expand Up @@ -379,19 +379,36 @@ async function buildGateway (gatewayOpts, app) {
serviceMap,
entityResolversFactory: factory,
pollingInterval: gatewayOpts.pollingInterval,
async refresh () {
async refresh (isRetry) {
const failedMandatoryServices = []
if (this._serviceSDLs === undefined) {
this._serviceSDLs = serviceSDLs.join('')
}

const $refreshResult = await allSettled(
Object.values(serviceMap).map((service) =>
service.refresh().catch((err) => {
errorHandler(err, service)
// If non-mandatory service or if retry count has exceeded for mandatory service then throw
if (!service.mandatory || !isRetry) {
errorHandler(err, service)
}

// If service is mandatory and retry count has not exceeded then add to service to
// failedMandatoryServices so it can be returned for retrying
if (service.mandatory) {
failedMandatoryServices.push(service)
}
})
)
)

if (failedMandatoryServices.length > 0) {
const serviceNames = failedMandatoryServices.map(service => service.name)
const err = new MER_ERR_SERVICE_RETRY_FAILED(serviceNames.join(', '))
err.failedServices = serviceNames
throw err
}

const rejectedResults = $refreshResult
.filter(({ status }) => status === 'rejected')
.map(({ reason }) => reason)
Expand Down
10 changes: 7 additions & 3 deletions lib/gateway/service-map.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ const {
isTypeDefinitionNode,
parse,
buildSchema,
GraphQLSchema
GraphQLSchema,
GraphQLError
} = require('graphql')

const { buildRequest, sendRequest } = require('./request')
Expand Down Expand Up @@ -109,7 +110,7 @@ function safeBuildSchema (schemaDefinition) {
}
}

async function buildServiceMap (services, errorHandler) {
async function buildServiceMap (services, errorHandler, log) {
const serviceMap = {}

for (const service of services) {
Expand Down Expand Up @@ -201,7 +202,10 @@ async function buildServiceMap (services, errorHandler) {
serviceConfig = await serviceMap[service.name].init()
} catch (err) {
serviceConfigErr = err
errorHandler(err, service)
if (!service.mandatory || err instanceof GraphQLError) {
log.warn(`Initializing service "${service.name}" failed with message: "${err.message}"`)
errorHandler(err, service)
}
}

if (serviceConfig) {
Expand Down
4 changes: 4 additions & 0 deletions tap-snapshots/test/gateway/remote-services.js.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@
exports['test/gateway/remote-services.js TAP Does not error if at least one service schema is valid > must match snapshot 1'] = `
Initializing service "not-working" failed with message: "Unknown type "World"."
`

exports['test/gateway/remote-services.js TAP Does not error if at least one service schema is valid > must match snapshot 2'] = `
Initializing service "not-working" failed with message: "Unknown type "World"."
`
2 changes: 1 addition & 1 deletion test/gateway/remote-services.js
Original file line number Diff line number Diff line change
Expand Up @@ -120,5 +120,5 @@ test('Does not error if at least one service schema is valid', async (t) => {
} catch (err) {
t.error(err)
}
t.equal(warnCalled, 1, 'Warning is called')
t.equal(warnCalled, 2, 'Warning is called')
})
Loading

0 comments on commit a1b66e5

Please sign in to comment.