feat(core): Expose queue metrics for Prometheus (#10559)

This commit is contained in:
Iván Ovejero
2024-08-28 11:36:00 +02:00
committed by GitHub
parent acfd60ac85
commit 008c510b76
12 changed files with 304 additions and 52 deletions

View File

@@ -7,6 +7,7 @@ import type express from 'express';
import type { MessageEventBus } from '@/eventbus/message-event-bus/message-event-bus';
import { mockInstance } from '@test/mocking';
import { GlobalConfig } from '@n8n/config';
import type { EventService } from '@/events/event.service';
const mockMiddleware = (
_req: express.Request,
@@ -22,27 +23,62 @@ describe('PrometheusMetricsService', () => {
endpoints: {
metrics: {
prefix: 'n8n_',
includeDefaultMetrics: true,
includeApiEndpoints: true,
includeCacheMetrics: true,
includeMessageEventBusMetrics: true,
includeDefaultMetrics: false,
includeApiEndpoints: false,
includeCacheMetrics: false,
includeMessageEventBusMetrics: false,
includeCredentialTypeLabel: false,
includeNodeTypeLabel: false,
includeWorkflowIdLabel: false,
includeApiPathLabel: true,
includeApiMethodLabel: true,
includeApiStatusCodeLabel: true,
includeApiPathLabel: false,
includeApiMethodLabel: false,
includeApiStatusCodeLabel: false,
includeQueueMetrics: false,
},
},
});
const app = mock<express.Application>();
const eventBus = mock<MessageEventBus>();
const eventService = mock<EventService>();
const prometheusMetricsService = new PrometheusMetricsService(
mock(),
eventBus,
globalConfig,
eventService,
);
afterEach(() => {
jest.clearAllMocks();
prometheusMetricsService.disableAllMetrics();
});
describe('constructor', () => {
it('should enable metrics based on global config', async () => {
const customGlobalConfig = { ...globalConfig };
customGlobalConfig.endpoints.metrics.includeCacheMetrics = true;
const customPrometheusMetricsService = new PrometheusMetricsService(
mock(),
mock(),
customGlobalConfig,
mock(),
);
await customPrometheusMetricsService.init(app);
expect(promClient.Counter).toHaveBeenCalledWith({
name: 'n8n_cache_hits_total',
help: 'Total number of cache hits.',
labelNames: ['cache'],
});
});
});
describe('init', () => {
it('should set up `n8n_version_info`', async () => {
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
await prometheusMetricsService.init(app);
await service.init(mock<express.Application>());
expect(promClient.Gauge).toHaveBeenCalledWith({
expect(promClient.Gauge).toHaveBeenNthCalledWith(1, {
name: 'n8n_version_info',
help: 'n8n version info.',
labelNames: ['version', 'major', 'minor', 'patch'],
@@ -50,48 +86,37 @@ describe('PrometheusMetricsService', () => {
});
it('should set up default metrics collection with `prom-client`', async () => {
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
await service.init(mock<express.Application>());
prometheusMetricsService.enableMetric('default');
await prometheusMetricsService.init(app);
expect(promClient.collectDefaultMetrics).toHaveBeenCalled();
});
it('should set up `n8n_cache_hits_total`', async () => {
config.set('endpoints.metrics.includeCacheMetrics', true);
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
await service.init(mock<express.Application>());
prometheusMetricsService.enableMetric('cache');
await prometheusMetricsService.init(app);
expect(promClient.Counter).toHaveBeenCalledWith({
name: 'n8n_cache_hits_total',
help: 'Total number of cache hits.',
labelNames: ['cache'],
});
// @ts-expect-error private field
expect(service.counters.cacheHitsTotal?.inc).toHaveBeenCalledWith(0);
});
it('should set up `n8n_cache_misses_total`', async () => {
config.set('endpoints.metrics.includeCacheMetrics', true);
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
await service.init(mock<express.Application>());
prometheusMetricsService.enableMetric('cache');
await prometheusMetricsService.init(app);
expect(promClient.Counter).toHaveBeenCalledWith({
name: 'n8n_cache_misses_total',
help: 'Total number of cache misses.',
labelNames: ['cache'],
});
// @ts-expect-error private field
expect(service.counters.cacheMissesTotal?.inc).toHaveBeenCalledWith(0);
});
it('should set up `n8n_cache_updates_total`', async () => {
config.set('endpoints.metrics.includeCacheMetrics', true);
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
await service.init(mock<express.Application>());
prometheusMetricsService.enableMetric('cache');
await prometheusMetricsService.init(app);
expect(promClient.Counter).toHaveBeenCalledWith({
name: 'n8n_cache_updates_total',
@@ -99,26 +124,19 @@ describe('PrometheusMetricsService', () => {
labelNames: ['cache'],
});
// @ts-expect-error private field
expect(service.counters.cacheUpdatesTotal?.inc).toHaveBeenCalledWith(0);
expect(prometheusMetricsService.counters.cacheUpdatesTotal?.inc).toHaveBeenCalledWith(0);
});
it('should set up route metrics with `express-prom-bundle`', async () => {
config.set('endpoints.metrics.includeApiEndpoints', true);
config.set('endpoints.metrics.includeApiPathLabel', true);
config.set('endpoints.metrics.includeApiMethodLabel', true);
config.set('endpoints.metrics.includeApiStatusCodeLabel', true);
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
const app = mock<express.Application>();
await service.init(app);
prometheusMetricsService.enableMetric('routes');
await prometheusMetricsService.init(app);
expect(promBundle).toHaveBeenCalledWith({
autoregister: false,
includeUp: false,
includePath: true,
includeMethod: true,
includeStatusCode: true,
includePath: false,
includeMethod: false,
includeStatusCode: false,
});
expect(app.use).toHaveBeenCalledWith(
@@ -137,12 +155,52 @@ describe('PrometheusMetricsService', () => {
});
it('should set up event bus metrics', async () => {
const eventBus = mock<MessageEventBus>();
const service = new PrometheusMetricsService(mock(), eventBus, globalConfig);
await service.init(mock<express.Application>());
prometheusMetricsService.enableMetric('logs');
await prometheusMetricsService.init(app);
expect(eventBus.on).toHaveBeenCalledWith('metrics.eventBus.event', expect.any(Function));
});
it('should set up queue metrics if enabled', async () => {
config.set('executions.mode', 'queue');
prometheusMetricsService.enableMetric('queue');
await prometheusMetricsService.init(app);
// call 1 is for `n8n_version_info` (always enabled)
expect(promClient.Gauge).toHaveBeenNthCalledWith(2, {
name: 'n8n_scaling_mode_queue_jobs_waiting',
help: 'Current number of enqueued jobs waiting for pickup in scaling mode.',
});
expect(promClient.Gauge).toHaveBeenNthCalledWith(3, {
name: 'n8n_scaling_mode_queue_jobs_active',
help: 'Current number of jobs being processed across all workers in scaling mode.',
});
expect(promClient.Counter).toHaveBeenNthCalledWith(1, {
name: 'n8n_scaling_mode_queue_jobs_completed',
help: 'Total number of jobs completed across all workers in scaling mode since instance start.',
});
expect(promClient.Counter).toHaveBeenNthCalledWith(2, {
name: 'n8n_scaling_mode_queue_jobs_failed',
help: 'Total number of jobs failed across all workers in scaling mode since instance start.',
});
expect(eventService.on).toHaveBeenCalledWith('job-counts-updated', expect.any(Function));
});
it('should not set up queue metrics if enabled but not on scaling mode', async () => {
config.set('executions.mode', 'regular');
prometheusMetricsService.enableMetric('queue');
await prometheusMetricsService.init(app);
expect(promClient.Gauge).toHaveBeenCalledTimes(1); // version metric
expect(promClient.Counter).toHaveBeenCalledTimes(0); // cache metrics
expect(eventService.on).not.toHaveBeenCalled();
});
});
});

View File

@@ -1,7 +1,7 @@
import { N8N_VERSION } from '@/constants';
import type express from 'express';
import promBundle from 'express-prom-bundle';
import promClient, { type Counter } from 'prom-client';
import promClient, { type Counter, type Gauge } from 'prom-client';
import semverParse from 'semver/functions/parse';
import { Service } from 'typedi';
@@ -11,6 +11,8 @@ import { EventMessageTypeNames } from 'n8n-workflow';
import type { EventMessageTypes } from '@/eventbus';
import type { Includes, MetricCategory, MetricLabel } from './types';
import { GlobalConfig } from '@n8n/config';
import { EventService } from '@/events/event.service';
import config from '@/config';
@Service()
export class PrometheusMetricsService {
@@ -18,10 +20,13 @@ export class PrometheusMetricsService {
private readonly cacheService: CacheService,
private readonly eventBus: MessageEventBus,
private readonly globalConfig: GlobalConfig,
private readonly eventService: EventService,
) {}
private readonly counters: { [key: string]: Counter<string> | null } = {};
private readonly gauges: Record<string, Gauge<string>> = {};
private readonly prefix = this.globalConfig.endpoints.metrics.prefix;
private readonly includes: Includes = {
@@ -30,6 +35,7 @@ export class PrometheusMetricsService {
routes: this.globalConfig.endpoints.metrics.includeApiEndpoints,
cache: this.globalConfig.endpoints.metrics.includeCacheMetrics,
logs: this.globalConfig.endpoints.metrics.includeMessageEventBusMetrics,
queue: this.globalConfig.endpoints.metrics.includeQueueMetrics,
},
labels: {
credentialsType: this.globalConfig.endpoints.metrics.includeCredentialTypeLabel,
@@ -48,6 +54,7 @@ export class PrometheusMetricsService {
this.initCacheMetrics();
this.initEventBusMetrics();
this.initRouteMetrics(app);
this.initQueueMetrics();
this.mountMetricsEndpoint(app);
}
@@ -218,6 +225,42 @@ export class PrometheusMetricsService {
});
}
private initQueueMetrics() {
if (!this.includes.metrics.queue || config.getEnv('executions.mode') !== 'queue') return;
this.gauges.waiting = new promClient.Gauge({
name: this.prefix + 'scaling_mode_queue_jobs_waiting',
help: 'Current number of enqueued jobs waiting for pickup in scaling mode.',
});
this.gauges.active = new promClient.Gauge({
name: this.prefix + 'scaling_mode_queue_jobs_active',
help: 'Current number of jobs being processed across all workers in scaling mode.',
});
this.counters.completed = new promClient.Counter({
name: this.prefix + 'scaling_mode_queue_jobs_completed',
help: 'Total number of jobs completed across all workers in scaling mode since instance start.',
});
this.counters.failed = new promClient.Counter({
name: this.prefix + 'scaling_mode_queue_jobs_failed',
help: 'Total number of jobs failed across all workers in scaling mode since instance start.',
});
this.gauges.waiting.set(0);
this.gauges.active.set(0);
this.counters.completed.inc(0);
this.counters.failed.inc(0);
this.eventService.on('job-counts-updated', (jobCounts) => {
this.gauges.waiting.set(jobCounts.waiting);
this.gauges.active.set(jobCounts.active);
this.counters.completed?.inc(jobCounts.completed);
this.counters.failed?.inc(jobCounts.failed);
});
}
private toLabels(event: EventMessageTypes): Record<string, string> {
const { __type, eventName, payload } = event;

View File

@@ -1,4 +1,4 @@
export type MetricCategory = 'default' | 'routes' | 'cache' | 'logs';
export type MetricCategory = 'default' | 'routes' | 'cache' | 'logs' | 'queue';
export type MetricLabel =
| 'credentialsType'