refactor(core): Create pruning service (no-changelog) (#7564)

https://linear.app/n8n/issue/PAY-954/ensure-only-main-instance-leader-handles-pruning
This commit is contained in:
Iván Ovejero
2023-11-02 12:24:25 +01:00
committed by GitHub
parent be49778388
commit 921d213ae5
6 changed files with 243 additions and 188 deletions

View File

@@ -1,14 +1,5 @@
import { Service } from 'typedi';
import {
Brackets,
DataSource,
Not,
In,
IsNull,
LessThanOrEqual,
MoreThanOrEqual,
Repository,
} from 'typeorm';
import { DataSource, In, LessThanOrEqual, MoreThanOrEqual, Repository } from 'typeorm';
import { DateUtils } from 'typeorm/util/DateUtils';
import type {
FindManyOptions,
@@ -33,7 +24,6 @@ import type { ExecutionData } from '../entities/ExecutionData';
import { ExecutionEntity } from '../entities/ExecutionEntity';
import { ExecutionMetadata } from '../entities/ExecutionMetadata';
import { ExecutionDataRepository } from './executionData.repository';
import { TIME, inTest } from '@/constants';
import { Logger } from '@/Logger';
function parseFiltersToQueryBuilder(
@@ -79,19 +69,6 @@ function parseFiltersToQueryBuilder(
export class ExecutionRepository extends Repository<ExecutionEntity> {
private hardDeletionBatchSize = 100;
private rates: Record<string, number> = {
softDeletion: config.getEnv('executions.pruneDataIntervals.softDelete') * TIME.MINUTE,
hardDeletion: config.getEnv('executions.pruneDataIntervals.hardDelete') * TIME.MINUTE,
};
private softDeletionInterval: NodeJS.Timer | undefined;
private hardDeletionTimeout: NodeJS.Timeout | undefined;
private isMainInstance = config.get('generic.instanceType') === 'main';
private isPruningEnabled = config.getEnv('executions.pruneData');
constructor(
dataSource: DataSource,
private readonly logger: Logger,
@@ -99,43 +76,6 @@ export class ExecutionRepository extends Repository<ExecutionEntity> {
private readonly binaryDataService: BinaryDataService,
) {
super(ExecutionEntity, dataSource.manager);
if (!this.isMainInstance || inTest) return;
if (this.isPruningEnabled) this.setSoftDeletionInterval();
this.scheduleHardDeletion();
}
clearTimers() {
if (!this.isMainInstance) return;
this.logger.debug('Clearing soft-deletion interval and hard-deletion timeout (pruning cycle)');
clearInterval(this.softDeletionInterval);
clearTimeout(this.hardDeletionTimeout);
}
setSoftDeletionInterval(rateMs = this.rates.softDeletion) {
const when = [(rateMs / TIME.MINUTE).toFixed(2), 'min'].join(' ');
this.logger.debug(`Setting soft-deletion interval at every ${when} (pruning cycle)`);
this.softDeletionInterval = setInterval(
async () => this.softDeleteOnPruningCycle(),
this.rates.softDeletion,
);
}
scheduleHardDeletion(rateMs = this.rates.hardDeletion) {
const when = [(rateMs / TIME.MINUTE).toFixed(2), 'min'].join(' ');
this.logger.debug(`Scheduling hard-deletion for next ${when} (pruning cycle)`);
this.hardDeletionTimeout = setTimeout(
async () => this.hardDeleteOnPruningCycle(),
this.rates.hardDeletion,
);
}
async findMultipleExecutions(
@@ -478,115 +418,4 @@ export class ExecutionRepository extends Repository<ExecutionEntity> {
await this.delete(batch);
} while (executionIds.length > 0);
}
/**
* Mark executions as deleted based on age and count, in a pruning cycle.
*/
async softDeleteOnPruningCycle() {
this.logger.debug('Starting soft-deletion of executions (pruning cycle)');
const maxAge = config.getEnv('executions.pruneDataMaxAge'); // in h
const maxCount = config.getEnv('executions.pruneDataMaxCount');
// Find ids of all executions that were stopped longer that pruneDataMaxAge ago
const date = new Date();
date.setHours(date.getHours() - maxAge);
const toPrune: Array<FindOptionsWhere<ExecutionEntity>> = [
// date reformatting needed - see https://github.com/typeorm/typeorm/issues/2286
{ stoppedAt: LessThanOrEqual(DateUtils.mixedDateToUtcDatetimeString(date)) },
];
if (maxCount > 0) {
const executions = await this.find({
select: ['id'],
skip: maxCount,
take: 1,
order: { id: 'DESC' },
});
if (executions[0]) {
toPrune.push({ id: LessThanOrEqual(executions[0].id) });
}
}
const [timeBasedWhere, countBasedWhere] = toPrune;
const result = await this.createQueryBuilder()
.update(ExecutionEntity)
.set({ deletedAt: new Date() })
.where({
deletedAt: IsNull(),
// Only mark executions as deleted if they are in an end state
status: Not(In(['new', 'running', 'waiting'])),
})
.andWhere(
new Brackets((qb) =>
countBasedWhere
? qb.where(timeBasedWhere).orWhere(countBasedWhere)
: qb.where(timeBasedWhere),
),
)
.execute();
if (result.affected === 0) {
this.logger.debug('Found no executions to soft-delete (pruning cycle)');
}
}
/**
* Permanently remove all soft-deleted executions and their binary data, in a pruning cycle.
*/
private async hardDeleteOnPruningCycle() {
const date = new Date();
date.setHours(date.getHours() - config.getEnv('executions.pruneDataHardDeleteBuffer'));
const workflowIdsAndExecutionIds = (
await this.find({
select: ['workflowId', 'id'],
where: {
deletedAt: LessThanOrEqual(DateUtils.mixedDateToUtcDatetimeString(date)),
},
take: this.hardDeletionBatchSize,
/**
* @important This ensures soft-deleted executions are included,
* else `@DeleteDateColumn()` at `deletedAt` will exclude them.
*/
withDeleted: true,
})
).map(({ id: executionId, workflowId }) => ({ workflowId, executionId }));
const executionIds = workflowIdsAndExecutionIds.map((o) => o.executionId);
if (executionIds.length === 0) {
this.logger.debug('Found no executions to hard-delete (pruning cycle)');
this.scheduleHardDeletion();
return;
}
try {
this.logger.debug('Starting hard-deletion of executions (pruning cycle)', {
executionIds,
});
await this.binaryDataService.deleteMany(workflowIdsAndExecutionIds);
await this.delete({ id: In(executionIds) });
} catch (error) {
this.logger.error('Failed to hard-delete executions (pruning cycle)', {
executionIds,
error: error instanceof Error ? error.message : `${error}`,
});
}
/**
* For next batch, speed up hard-deletion cycle in high-volume case
* to prevent high concurrency from causing duplicate deletions.
*/
const isHighVolume = executionIds.length >= this.hardDeletionBatchSize;
const rate = isHighVolume ? 1 * TIME.SECOND : this.rates.hardDeletion;
this.scheduleHardDeletion(rate);
}
}