feat(core): Add execution runData recovery and status field (#5112)
* adds ExecutionEvents view modal to ExecutionList * fix time rendering and remove wf column * checks for unfinished executions and fails them * prevent re-setting stoppedAt for execution * some cleanup / manually create rundata after crash * quicksave * remove Threads lib, log worker rewrite * cleanup comment * fix sentry destination return value * test for tests... * run tests with single worker * fix tests * remove console log * add endpoint for execution data recovery * lint cleanup and some refactoring * fix accidental recursion * remove cyclic imports * add rundata recovery to Workflowrunner * remove comments * cleanup and refactor * adds a status field to executions * setExecutionStatus on queued worker * fix onWorkflowPostExecute * set waiting from worker * get crashed status into frontend * remove comment * merge fix * cleanup * catch empty rundata in recovery * refactor IExecutionsSummary and inject nodeExecution Errors * reduce default event log size to 10mb from 100mb * add per node execution status * lint fix * merge and lint fix * phrasing change * improve preview rendering and messaging * remove debug * Improve partial rundata recovery * fix labels * fix line through * send manual rundata to ui at crash * some type and msg push fixes * improve recovered item rendering in preview * update workflowStatistics on recover * merge fix * review fixes * merge fix * notify eventbus when ui is back up * add a small timeout to make sure the UI is back up * increase reconnect timeout to 30s * adjust recover timeout and ui connection lost msg * do not stop execution in editor after x reconnects * add executionRecovered push event * fix recovered connection not green * remove reconnect toast and merge existing rundata * merge editor and recovered data for own mode
This commit is contained in:
committed by
GitHub
parent
3a9c257f55
commit
d143f3f2ec
@@ -55,6 +55,8 @@ import { initErrorHandling } from '@/ErrorReporting';
|
||||
import { PermissionChecker } from '@/UserManagement/PermissionChecker';
|
||||
import type { Push } from '@/push';
|
||||
import { getPushInstance } from '@/push';
|
||||
import { eventBus } from './eventbus';
|
||||
import { recoverExecutionDataFromEventLogMessages } from './eventbus/MessageEventBus/recoverEvents';
|
||||
|
||||
export class WorkflowRunner {
|
||||
activeExecutions: ActiveExecutions.ActiveExecutions;
|
||||
@@ -103,8 +105,40 @@ export class WorkflowRunner {
|
||||
mode: executionMode,
|
||||
startedAt,
|
||||
stoppedAt: new Date(),
|
||||
status: 'error',
|
||||
};
|
||||
|
||||
// The following will attempt to recover runData from event logs
|
||||
// Note that this will only work as long as the event logs actually contain the events from this workflow execution
|
||||
// Since processError is run almost immediately after the workflow execution has failed, it is likely that the event logs
|
||||
// does contain those messages.
|
||||
try {
|
||||
// Search for messages for this executionId in event logs
|
||||
const eventLogMessages = await eventBus.getEventsByExecutionId(executionId);
|
||||
// Attempt to recover more better runData from these messages (but don't update the execution db entry yet)
|
||||
if (eventLogMessages.length > 0) {
|
||||
const eventLogExecutionData = await recoverExecutionDataFromEventLogMessages(
|
||||
executionId,
|
||||
eventLogMessages,
|
||||
false,
|
||||
);
|
||||
if (eventLogExecutionData) {
|
||||
fullRunData.data.resultData.runData = eventLogExecutionData.resultData.runData;
|
||||
fullRunData.status = 'crashed';
|
||||
}
|
||||
}
|
||||
|
||||
const executionFlattedData = await Db.collections.Execution.findOneBy({ id: executionId });
|
||||
|
||||
void InternalHooksManager.getInstance().onWorkflowCrashed(
|
||||
executionId,
|
||||
executionMode,
|
||||
executionFlattedData?.workflowData,
|
||||
);
|
||||
} catch {
|
||||
// Ignore errors
|
||||
}
|
||||
|
||||
// Remove from active execution with empty data. That will
|
||||
// set the execution to failed.
|
||||
this.activeExecutions.remove(executionId, fullRunData);
|
||||
@@ -287,6 +321,10 @@ export class WorkflowRunner {
|
||||
},
|
||||
];
|
||||
|
||||
additionalData.setExecutionStatus = WorkflowExecuteAdditionalData.setExecutionStatus.bind({
|
||||
executionId,
|
||||
});
|
||||
|
||||
additionalData.sendMessageToUI = WorkflowExecuteAdditionalData.sendMessageToUI.bind({
|
||||
sessionId: data.sessionId,
|
||||
});
|
||||
@@ -354,6 +392,7 @@ export class WorkflowRunner {
|
||||
if (workflowExecution.isCanceled) {
|
||||
fullRunData.finished = false;
|
||||
}
|
||||
fullRunData.status = this.activeExecutions.getStatus(executionId);
|
||||
this.activeExecutions.remove(executionId, fullRunData);
|
||||
})
|
||||
.catch((error) => {
|
||||
@@ -708,7 +747,7 @@ export class WorkflowRunner {
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/await-thenable
|
||||
await this.activeExecutions.remove(message.data.executionId, message.data.result);
|
||||
this.activeExecutions.remove(message.data.executionId, message.data.result);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -733,7 +772,7 @@ export class WorkflowRunner {
|
||||
);
|
||||
// Process did exit with error code, so something went wrong.
|
||||
const executionError = new WorkflowOperationError(
|
||||
'Workflow execution process did crash for an unknown reason!',
|
||||
'Workflow execution process crashed for an unknown reason!',
|
||||
);
|
||||
|
||||
await this.processError(
|
||||
@@ -752,7 +791,7 @@ export class WorkflowRunner {
|
||||
// Instead of pending forever as executing when it
|
||||
// actually isn't anymore.
|
||||
// eslint-disable-next-line @typescript-eslint/await-thenable, no-await-in-loop
|
||||
await this.activeExecutions.remove(executionId);
|
||||
this.activeExecutions.remove(executionId);
|
||||
}
|
||||
|
||||
clearTimeout(executionTimeout);
|
||||
|
||||
Reference in New Issue
Block a user