fix: Queue health monitor should only run on worker processes (#6228)
This commit is contained in:
40
server/queues/HealthMonitor.ts
Normal file
40
server/queues/HealthMonitor.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { Queue } from "bull";
|
||||
import { Second } from "@shared/utils/time";
|
||||
import Logger from "@server/logging/Logger";
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-misused-promises */
|
||||
export default class HealthMonitor {
|
||||
/**
|
||||
* Starts a health monitor for the given queue. If the queue stops processing jobs then the
|
||||
* process is exit.
|
||||
*
|
||||
* @param queue The queue to monitor
|
||||
*/
|
||||
public static start(queue: Queue) {
|
||||
let processedJobsSinceCheck = 0;
|
||||
|
||||
queue.on("active", () => {
|
||||
processedJobsSinceCheck += 1;
|
||||
});
|
||||
|
||||
setInterval(async () => {
|
||||
if (processedJobsSinceCheck > 0) {
|
||||
processedJobsSinceCheck = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
processedJobsSinceCheck = 0;
|
||||
const waiting = await queue.getWaitingCount();
|
||||
if (waiting > 50) {
|
||||
Logger.fatal(
|
||||
"Queue has stopped processing jobs",
|
||||
new Error(`Jobs are waiting in the ${queue.name} queue`),
|
||||
{
|
||||
queue: queue.name,
|
||||
waiting,
|
||||
}
|
||||
);
|
||||
}
|
||||
}, 30 * Second);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { createQueue } from "@server/utils/queue";
|
||||
import { createQueue } from "@server/queues/queue";
|
||||
|
||||
export const globalEventQueue = createQueue("globalEvents", {
|
||||
attempts: 5,
|
||||
|
||||
68
server/queues/queue.ts
Normal file
68
server/queues/queue.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
/* eslint-disable @typescript-eslint/no-misused-promises */
|
||||
import Queue from "bull";
|
||||
import snakeCase from "lodash/snakeCase";
|
||||
import { Second } from "@shared/utils/time";
|
||||
import env from "@server/env";
|
||||
import Metrics from "@server/logging/Metrics";
|
||||
import Redis from "@server/storage/redis";
|
||||
import ShutdownHelper, { ShutdownOrder } from "@server/utils/ShutdownHelper";
|
||||
|
||||
export function createQueue(
|
||||
name: string,
|
||||
defaultJobOptions?: Partial<Queue.JobOptions>
|
||||
) {
|
||||
const prefix = `queue.${snakeCase(name)}`;
|
||||
|
||||
// Notes on reusing Redis connections for Bull:
|
||||
// https://github.com/OptimalBits/bull/blob/b6d530f72a774be0fd4936ddb4ad9df3b183f4b6/PATTERNS.md#reusing-redis-connections
|
||||
const queue = new Queue(name, {
|
||||
createClient(type) {
|
||||
switch (type) {
|
||||
case "client":
|
||||
return Redis.defaultClient;
|
||||
|
||||
case "subscriber":
|
||||
return Redis.defaultSubscriber;
|
||||
|
||||
case "bclient":
|
||||
return new Redis(env.REDIS_URL, {
|
||||
maxRetriesPerRequest: null,
|
||||
connectionNameSuffix: "bull",
|
||||
});
|
||||
|
||||
default:
|
||||
throw new Error(`Unexpected connection type: ${type}`);
|
||||
}
|
||||
},
|
||||
defaultJobOptions: {
|
||||
removeOnComplete: true,
|
||||
removeOnFail: true,
|
||||
...defaultJobOptions,
|
||||
},
|
||||
});
|
||||
queue.on("stalled", () => {
|
||||
Metrics.increment(`${prefix}.jobs.stalled`);
|
||||
});
|
||||
queue.on("completed", () => {
|
||||
Metrics.increment(`${prefix}.jobs.completed`);
|
||||
});
|
||||
queue.on("error", () => {
|
||||
Metrics.increment(`${prefix}.jobs.errored`);
|
||||
});
|
||||
queue.on("failed", () => {
|
||||
Metrics.increment(`${prefix}.jobs.failed`);
|
||||
});
|
||||
|
||||
if (env.ENVIRONMENT !== "test") {
|
||||
setInterval(async () => {
|
||||
Metrics.gauge(`${prefix}.count`, await queue.count());
|
||||
Metrics.gauge(`${prefix}.delayed_count`, await queue.getDelayedCount());
|
||||
}, 5 * Second);
|
||||
}
|
||||
|
||||
ShutdownHelper.add(name, ShutdownOrder.normal, async () => {
|
||||
await queue.close();
|
||||
});
|
||||
|
||||
return queue;
|
||||
}
|
||||
Reference in New Issue
Block a user