feat(core): Expose queue metrics for Prometheus (#10559)
This commit is contained in:
@@ -7,6 +7,7 @@ import type express from 'express';
|
||||
import type { MessageEventBus } from '@/eventbus/message-event-bus/message-event-bus';
|
||||
import { mockInstance } from '@test/mocking';
|
||||
import { GlobalConfig } from '@n8n/config';
|
||||
import type { EventService } from '@/events/event.service';
|
||||
|
||||
const mockMiddleware = (
|
||||
_req: express.Request,
|
||||
@@ -22,27 +23,62 @@ describe('PrometheusMetricsService', () => {
|
||||
endpoints: {
|
||||
metrics: {
|
||||
prefix: 'n8n_',
|
||||
includeDefaultMetrics: true,
|
||||
includeApiEndpoints: true,
|
||||
includeCacheMetrics: true,
|
||||
includeMessageEventBusMetrics: true,
|
||||
includeDefaultMetrics: false,
|
||||
includeApiEndpoints: false,
|
||||
includeCacheMetrics: false,
|
||||
includeMessageEventBusMetrics: false,
|
||||
includeCredentialTypeLabel: false,
|
||||
includeNodeTypeLabel: false,
|
||||
includeWorkflowIdLabel: false,
|
||||
includeApiPathLabel: true,
|
||||
includeApiMethodLabel: true,
|
||||
includeApiStatusCodeLabel: true,
|
||||
includeApiPathLabel: false,
|
||||
includeApiMethodLabel: false,
|
||||
includeApiStatusCodeLabel: false,
|
||||
includeQueueMetrics: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const app = mock<express.Application>();
|
||||
const eventBus = mock<MessageEventBus>();
|
||||
const eventService = mock<EventService>();
|
||||
const prometheusMetricsService = new PrometheusMetricsService(
|
||||
mock(),
|
||||
eventBus,
|
||||
globalConfig,
|
||||
eventService,
|
||||
);
|
||||
|
||||
afterEach(() => {
|
||||
jest.clearAllMocks();
|
||||
prometheusMetricsService.disableAllMetrics();
|
||||
});
|
||||
|
||||
describe('constructor', () => {
|
||||
it('should enable metrics based on global config', async () => {
|
||||
const customGlobalConfig = { ...globalConfig };
|
||||
customGlobalConfig.endpoints.metrics.includeCacheMetrics = true;
|
||||
const customPrometheusMetricsService = new PrometheusMetricsService(
|
||||
mock(),
|
||||
mock(),
|
||||
customGlobalConfig,
|
||||
mock(),
|
||||
);
|
||||
|
||||
await customPrometheusMetricsService.init(app);
|
||||
|
||||
expect(promClient.Counter).toHaveBeenCalledWith({
|
||||
name: 'n8n_cache_hits_total',
|
||||
help: 'Total number of cache hits.',
|
||||
labelNames: ['cache'],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('init', () => {
|
||||
it('should set up `n8n_version_info`', async () => {
|
||||
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
await service.init(mock<express.Application>());
|
||||
|
||||
expect(promClient.Gauge).toHaveBeenCalledWith({
|
||||
expect(promClient.Gauge).toHaveBeenNthCalledWith(1, {
|
||||
name: 'n8n_version_info',
|
||||
help: 'n8n version info.',
|
||||
labelNames: ['version', 'major', 'minor', 'patch'],
|
||||
@@ -50,48 +86,37 @@ describe('PrometheusMetricsService', () => {
|
||||
});
|
||||
|
||||
it('should set up default metrics collection with `prom-client`', async () => {
|
||||
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
|
||||
|
||||
await service.init(mock<express.Application>());
|
||||
prometheusMetricsService.enableMetric('default');
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
expect(promClient.collectDefaultMetrics).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should set up `n8n_cache_hits_total`', async () => {
|
||||
config.set('endpoints.metrics.includeCacheMetrics', true);
|
||||
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
|
||||
|
||||
await service.init(mock<express.Application>());
|
||||
prometheusMetricsService.enableMetric('cache');
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
expect(promClient.Counter).toHaveBeenCalledWith({
|
||||
name: 'n8n_cache_hits_total',
|
||||
help: 'Total number of cache hits.',
|
||||
labelNames: ['cache'],
|
||||
});
|
||||
// @ts-expect-error private field
|
||||
expect(service.counters.cacheHitsTotal?.inc).toHaveBeenCalledWith(0);
|
||||
});
|
||||
|
||||
it('should set up `n8n_cache_misses_total`', async () => {
|
||||
config.set('endpoints.metrics.includeCacheMetrics', true);
|
||||
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
|
||||
|
||||
await service.init(mock<express.Application>());
|
||||
prometheusMetricsService.enableMetric('cache');
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
expect(promClient.Counter).toHaveBeenCalledWith({
|
||||
name: 'n8n_cache_misses_total',
|
||||
help: 'Total number of cache misses.',
|
||||
labelNames: ['cache'],
|
||||
});
|
||||
// @ts-expect-error private field
|
||||
expect(service.counters.cacheMissesTotal?.inc).toHaveBeenCalledWith(0);
|
||||
});
|
||||
|
||||
it('should set up `n8n_cache_updates_total`', async () => {
|
||||
config.set('endpoints.metrics.includeCacheMetrics', true);
|
||||
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
|
||||
|
||||
await service.init(mock<express.Application>());
|
||||
prometheusMetricsService.enableMetric('cache');
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
expect(promClient.Counter).toHaveBeenCalledWith({
|
||||
name: 'n8n_cache_updates_total',
|
||||
@@ -99,26 +124,19 @@ describe('PrometheusMetricsService', () => {
|
||||
labelNames: ['cache'],
|
||||
});
|
||||
// @ts-expect-error private field
|
||||
expect(service.counters.cacheUpdatesTotal?.inc).toHaveBeenCalledWith(0);
|
||||
expect(prometheusMetricsService.counters.cacheUpdatesTotal?.inc).toHaveBeenCalledWith(0);
|
||||
});
|
||||
|
||||
it('should set up route metrics with `express-prom-bundle`', async () => {
|
||||
config.set('endpoints.metrics.includeApiEndpoints', true);
|
||||
config.set('endpoints.metrics.includeApiPathLabel', true);
|
||||
config.set('endpoints.metrics.includeApiMethodLabel', true);
|
||||
config.set('endpoints.metrics.includeApiStatusCodeLabel', true);
|
||||
const service = new PrometheusMetricsService(mock(), mock(), globalConfig);
|
||||
|
||||
const app = mock<express.Application>();
|
||||
|
||||
await service.init(app);
|
||||
prometheusMetricsService.enableMetric('routes');
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
expect(promBundle).toHaveBeenCalledWith({
|
||||
autoregister: false,
|
||||
includeUp: false,
|
||||
includePath: true,
|
||||
includeMethod: true,
|
||||
includeStatusCode: true,
|
||||
includePath: false,
|
||||
includeMethod: false,
|
||||
includeStatusCode: false,
|
||||
});
|
||||
|
||||
expect(app.use).toHaveBeenCalledWith(
|
||||
@@ -137,12 +155,52 @@ describe('PrometheusMetricsService', () => {
|
||||
});
|
||||
|
||||
it('should set up event bus metrics', async () => {
|
||||
const eventBus = mock<MessageEventBus>();
|
||||
const service = new PrometheusMetricsService(mock(), eventBus, globalConfig);
|
||||
|
||||
await service.init(mock<express.Application>());
|
||||
prometheusMetricsService.enableMetric('logs');
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
expect(eventBus.on).toHaveBeenCalledWith('metrics.eventBus.event', expect.any(Function));
|
||||
});
|
||||
|
||||
it('should set up queue metrics if enabled', async () => {
|
||||
config.set('executions.mode', 'queue');
|
||||
prometheusMetricsService.enableMetric('queue');
|
||||
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
// call 1 is for `n8n_version_info` (always enabled)
|
||||
|
||||
expect(promClient.Gauge).toHaveBeenNthCalledWith(2, {
|
||||
name: 'n8n_scaling_mode_queue_jobs_waiting',
|
||||
help: 'Current number of enqueued jobs waiting for pickup in scaling mode.',
|
||||
});
|
||||
|
||||
expect(promClient.Gauge).toHaveBeenNthCalledWith(3, {
|
||||
name: 'n8n_scaling_mode_queue_jobs_active',
|
||||
help: 'Current number of jobs being processed across all workers in scaling mode.',
|
||||
});
|
||||
|
||||
expect(promClient.Counter).toHaveBeenNthCalledWith(1, {
|
||||
name: 'n8n_scaling_mode_queue_jobs_completed',
|
||||
help: 'Total number of jobs completed across all workers in scaling mode since instance start.',
|
||||
});
|
||||
|
||||
expect(promClient.Counter).toHaveBeenNthCalledWith(2, {
|
||||
name: 'n8n_scaling_mode_queue_jobs_failed',
|
||||
help: 'Total number of jobs failed across all workers in scaling mode since instance start.',
|
||||
});
|
||||
|
||||
expect(eventService.on).toHaveBeenCalledWith('job-counts-updated', expect.any(Function));
|
||||
});
|
||||
|
||||
it('should not set up queue metrics if enabled but not on scaling mode', async () => {
|
||||
config.set('executions.mode', 'regular');
|
||||
prometheusMetricsService.enableMetric('queue');
|
||||
|
||||
await prometheusMetricsService.init(app);
|
||||
|
||||
expect(promClient.Gauge).toHaveBeenCalledTimes(1); // version metric
|
||||
expect(promClient.Counter).toHaveBeenCalledTimes(0); // cache metrics
|
||||
expect(eventService.on).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { N8N_VERSION } from '@/constants';
|
||||
import type express from 'express';
|
||||
import promBundle from 'express-prom-bundle';
|
||||
import promClient, { type Counter } from 'prom-client';
|
||||
import promClient, { type Counter, type Gauge } from 'prom-client';
|
||||
import semverParse from 'semver/functions/parse';
|
||||
import { Service } from 'typedi';
|
||||
|
||||
@@ -11,6 +11,8 @@ import { EventMessageTypeNames } from 'n8n-workflow';
|
||||
import type { EventMessageTypes } from '@/eventbus';
|
||||
import type { Includes, MetricCategory, MetricLabel } from './types';
|
||||
import { GlobalConfig } from '@n8n/config';
|
||||
import { EventService } from '@/events/event.service';
|
||||
import config from '@/config';
|
||||
|
||||
@Service()
|
||||
export class PrometheusMetricsService {
|
||||
@@ -18,10 +20,13 @@ export class PrometheusMetricsService {
|
||||
private readonly cacheService: CacheService,
|
||||
private readonly eventBus: MessageEventBus,
|
||||
private readonly globalConfig: GlobalConfig,
|
||||
private readonly eventService: EventService,
|
||||
) {}
|
||||
|
||||
private readonly counters: { [key: string]: Counter<string> | null } = {};
|
||||
|
||||
private readonly gauges: Record<string, Gauge<string>> = {};
|
||||
|
||||
private readonly prefix = this.globalConfig.endpoints.metrics.prefix;
|
||||
|
||||
private readonly includes: Includes = {
|
||||
@@ -30,6 +35,7 @@ export class PrometheusMetricsService {
|
||||
routes: this.globalConfig.endpoints.metrics.includeApiEndpoints,
|
||||
cache: this.globalConfig.endpoints.metrics.includeCacheMetrics,
|
||||
logs: this.globalConfig.endpoints.metrics.includeMessageEventBusMetrics,
|
||||
queue: this.globalConfig.endpoints.metrics.includeQueueMetrics,
|
||||
},
|
||||
labels: {
|
||||
credentialsType: this.globalConfig.endpoints.metrics.includeCredentialTypeLabel,
|
||||
@@ -48,6 +54,7 @@ export class PrometheusMetricsService {
|
||||
this.initCacheMetrics();
|
||||
this.initEventBusMetrics();
|
||||
this.initRouteMetrics(app);
|
||||
this.initQueueMetrics();
|
||||
this.mountMetricsEndpoint(app);
|
||||
}
|
||||
|
||||
@@ -218,6 +225,42 @@ export class PrometheusMetricsService {
|
||||
});
|
||||
}
|
||||
|
||||
private initQueueMetrics() {
|
||||
if (!this.includes.metrics.queue || config.getEnv('executions.mode') !== 'queue') return;
|
||||
|
||||
this.gauges.waiting = new promClient.Gauge({
|
||||
name: this.prefix + 'scaling_mode_queue_jobs_waiting',
|
||||
help: 'Current number of enqueued jobs waiting for pickup in scaling mode.',
|
||||
});
|
||||
|
||||
this.gauges.active = new promClient.Gauge({
|
||||
name: this.prefix + 'scaling_mode_queue_jobs_active',
|
||||
help: 'Current number of jobs being processed across all workers in scaling mode.',
|
||||
});
|
||||
|
||||
this.counters.completed = new promClient.Counter({
|
||||
name: this.prefix + 'scaling_mode_queue_jobs_completed',
|
||||
help: 'Total number of jobs completed across all workers in scaling mode since instance start.',
|
||||
});
|
||||
|
||||
this.counters.failed = new promClient.Counter({
|
||||
name: this.prefix + 'scaling_mode_queue_jobs_failed',
|
||||
help: 'Total number of jobs failed across all workers in scaling mode since instance start.',
|
||||
});
|
||||
|
||||
this.gauges.waiting.set(0);
|
||||
this.gauges.active.set(0);
|
||||
this.counters.completed.inc(0);
|
||||
this.counters.failed.inc(0);
|
||||
|
||||
this.eventService.on('job-counts-updated', (jobCounts) => {
|
||||
this.gauges.waiting.set(jobCounts.waiting);
|
||||
this.gauges.active.set(jobCounts.active);
|
||||
this.counters.completed?.inc(jobCounts.completed);
|
||||
this.counters.failed?.inc(jobCounts.failed);
|
||||
});
|
||||
}
|
||||
|
||||
private toLabels(event: EventMessageTypes): Record<string, string> {
|
||||
const { __type, eventName, payload } = event;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export type MetricCategory = 'default' | 'routes' | 'cache' | 'logs';
|
||||
export type MetricCategory = 'default' | 'routes' | 'cache' | 'logs' | 'queue';
|
||||
|
||||
export type MetricLabel =
|
||||
| 'credentialsType'
|
||||
|
||||
Reference in New Issue
Block a user