feat(core): Add execution runData recovery and status field (#5112)

* adds ExecutionEvents view modal to ExecutionList

* fix time rendering and remove wf column

* checks for unfinished executions and fails them

* prevent re-setting stoppedAt for execution

* some cleanup / manually create rundata after crash

* quicksave

* remove Threads lib, log worker rewrite

* cleanup comment

* fix sentry destination return value

* test for tests...

* run tests with single worker

* fix tests

* remove console log

* add endpoint for execution data recovery

* lint cleanup and some refactoring

* fix accidental recursion

* remove cyclic imports

* add rundata recovery to Workflowrunner

* remove comments

* cleanup and refactor

* adds a status field to executions

* setExecutionStatus on queued worker

* fix onWorkflowPostExecute

* set waiting from worker

* get crashed status into frontend

* remove comment

* merge fix

* cleanup

* catch empty rundata in recovery

* refactor IExecutionsSummary and inject nodeExecution Errors

* reduce default event log size to 10mb from 100mb

* add per node execution status

* lint fix

* merge and lint fix

* phrasing change

* improve preview rendering and messaging

* remove debug

* Improve partial rundata recovery

* fix labels

* fix line through

* send manual rundata to ui at crash

* some type and msg push fixes

* improve recovered item rendering in preview

* update workflowStatistics on recover

* merge fix

* review fixes

* merge fix

* notify eventbus when ui is back up

* add a small timeout to make sure the UI is back up

* increase reconnect timeout to 30s

* adjust recover timeout and ui connection lost msg

* do not stop execution in editor after x reconnects

* add executionRecovered push event

* fix recovered connection not green

* remove reconnect toast and  merge existing rundata

* merge editor and recovered data for own mode
This commit is contained in:
Michael Auerswald
2023-02-17 10:54:07 +01:00
committed by GitHub
parent 3a9c257f55
commit d143f3f2ec
71 changed files with 1245 additions and 307 deletions

View File

@@ -55,6 +55,8 @@ import { initErrorHandling } from '@/ErrorReporting';
import { PermissionChecker } from '@/UserManagement/PermissionChecker';
import type { Push } from '@/push';
import { getPushInstance } from '@/push';
import { eventBus } from './eventbus';
import { recoverExecutionDataFromEventLogMessages } from './eventbus/MessageEventBus/recoverEvents';
export class WorkflowRunner {
activeExecutions: ActiveExecutions.ActiveExecutions;
@@ -103,8 +105,40 @@ export class WorkflowRunner {
mode: executionMode,
startedAt,
stoppedAt: new Date(),
status: 'error',
};
// The following will attempt to recover runData from event logs
// Note that this will only work as long as the event logs actually contain the events from this workflow execution
// Since processError is run almost immediately after the workflow execution has failed, it is likely that the event logs
// does contain those messages.
try {
// Search for messages for this executionId in event logs
const eventLogMessages = await eventBus.getEventsByExecutionId(executionId);
// Attempt to recover more better runData from these messages (but don't update the execution db entry yet)
if (eventLogMessages.length > 0) {
const eventLogExecutionData = await recoverExecutionDataFromEventLogMessages(
executionId,
eventLogMessages,
false,
);
if (eventLogExecutionData) {
fullRunData.data.resultData.runData = eventLogExecutionData.resultData.runData;
fullRunData.status = 'crashed';
}
}
const executionFlattedData = await Db.collections.Execution.findOneBy({ id: executionId });
void InternalHooksManager.getInstance().onWorkflowCrashed(
executionId,
executionMode,
executionFlattedData?.workflowData,
);
} catch {
// Ignore errors
}
// Remove from active execution with empty data. That will
// set the execution to failed.
this.activeExecutions.remove(executionId, fullRunData);
@@ -287,6 +321,10 @@ export class WorkflowRunner {
},
];
additionalData.setExecutionStatus = WorkflowExecuteAdditionalData.setExecutionStatus.bind({
executionId,
});
additionalData.sendMessageToUI = WorkflowExecuteAdditionalData.sendMessageToUI.bind({
sessionId: data.sessionId,
});
@@ -354,6 +392,7 @@ export class WorkflowRunner {
if (workflowExecution.isCanceled) {
fullRunData.finished = false;
}
fullRunData.status = this.activeExecutions.getStatus(executionId);
this.activeExecutions.remove(executionId, fullRunData);
})
.catch((error) => {
@@ -708,7 +747,7 @@ export class WorkflowRunner {
}
// eslint-disable-next-line @typescript-eslint/await-thenable
await this.activeExecutions.remove(message.data.executionId, message.data.result);
this.activeExecutions.remove(message.data.executionId, message.data.result);
}
});
@@ -733,7 +772,7 @@ export class WorkflowRunner {
);
// Process did exit with error code, so something went wrong.
const executionError = new WorkflowOperationError(
'Workflow execution process did crash for an unknown reason!',
'Workflow execution process crashed for an unknown reason!',
);
await this.processError(
@@ -752,7 +791,7 @@ export class WorkflowRunner {
// Instead of pending forever as executing when it
// actually isn't anymore.
// eslint-disable-next-line @typescript-eslint/await-thenable, no-await-in-loop
await this.activeExecutions.remove(executionId);
this.activeExecutions.remove(executionId);
}
clearTimeout(executionTimeout);