🐛 Fix "unknown", never-end workflow and not displaying error message (#1978)
* Added try catch blocks to avoid endlessly running workflows * Added handling for subworkflows * ⚡ Fix one cause of "unkown" status of worklows with "main" mode * ⚡ Fix one cause of "unkown" status of worklows with "own" mode * ⚡ Fix one cause of "unkown" status of worklows with "queue" mode * Saving database recovery * 🐛 Fix issue that errors did not get saved correctly and also not displayed * ⚡ Save workflow timeout correctly as error * Adding error capture to queued jobs * ⚡ Mark canceled executions as not finished consistently across all modes Co-authored-by: Jan Oberhauser <jan.oberhauser@gmail.com>
This commit is contained in:
@@ -387,9 +387,9 @@ function hookFunctionsSave(parentProcessMode?: string): IWorkflowExecuteHooks {
|
||||
}
|
||||
|
||||
// Leave log message before flatten as that operation increased memory usage a lot and the chance of a crash is highest here
|
||||
Logger.debug(`Save execution data to database for execution ID ${this.executionId}`, {
|
||||
executionId: this.executionId,
|
||||
workflowId: this.workflowData.id,
|
||||
Logger.debug(`Save execution data to database for execution ID ${this.executionId}`, {
|
||||
executionId: this.executionId,
|
||||
workflowId: this.workflowData.id,
|
||||
finished: fullExecutionData.finished,
|
||||
stoppedAt: fullExecutionData.stoppedAt,
|
||||
});
|
||||
@@ -409,12 +409,12 @@ function hookFunctionsSave(parentProcessMode?: string): IWorkflowExecuteHooks {
|
||||
executeErrorWorkflow(this.workflowData, fullRunData, this.mode, this.executionId, this.retryOf);
|
||||
}
|
||||
} catch (error) {
|
||||
Logger.error(`Failed saving execution data to DB on execution ID ${this.executionId}`, {
|
||||
executionId: this.executionId,
|
||||
Logger.error(`Failed saving execution data to DB on execution ID ${this.executionId}`, {
|
||||
executionId: this.executionId,
|
||||
workflowId: this.workflowData.id,
|
||||
error,
|
||||
});
|
||||
|
||||
|
||||
if (!isManualMode) {
|
||||
executeErrorWorkflow(this.workflowData, fullRunData, this.mode, undefined, this.retryOf);
|
||||
}
|
||||
@@ -608,44 +608,78 @@ export async function executeWorkflow(workflowInfo: IExecuteWorkflowInfo, additi
|
||||
executionId = parentExecutionId !== undefined ? parentExecutionId : await ActiveExecutions.getInstance().add(runData);
|
||||
}
|
||||
|
||||
const runExecutionData = runData.executionData as IRunExecutionData;
|
||||
let data;
|
||||
try {
|
||||
// Get the needed credentials for the current workflow as they will differ to the ones of the
|
||||
// calling workflow.
|
||||
const credentials = await WorkflowCredentials(workflowData!.nodes);
|
||||
|
||||
// Get the needed credentials for the current workflow as they will differ to the ones of the
|
||||
// calling workflow.
|
||||
const credentials = await WorkflowCredentials(workflowData!.nodes);
|
||||
// Create new additionalData to have different workflow loaded and to call
|
||||
// different webooks
|
||||
const additionalDataIntegrated = await getBase(credentials);
|
||||
additionalDataIntegrated.hooks = getWorkflowHooksIntegrated(runData.executionMode, executionId, workflowData!, { parentProcessMode: additionalData.hooks!.mode });
|
||||
// Make sure we pass on the original executeWorkflow function we received
|
||||
// This one already contains changes to talk to parent process
|
||||
// and get executionID from `activeExecutions` running on main process
|
||||
additionalDataIntegrated.executeWorkflow = additionalData.executeWorkflow;
|
||||
|
||||
let subworkflowTimeout = additionalData.executionTimeoutTimestamp;
|
||||
if (workflowData.settings?.executionTimeout !== undefined && workflowData.settings.executionTimeout > 0) {
|
||||
// We might have received a max timeout timestamp from the parent workflow
|
||||
// If we did, then we get the minimum time between the two timeouts
|
||||
// If no timeout was given from the parent, then we use our timeout.
|
||||
subworkflowTimeout = Math.min(additionalData.executionTimeoutTimestamp || Number.MAX_SAFE_INTEGER, Date.now() + (workflowData.settings.executionTimeout as number * 1000));
|
||||
}
|
||||
|
||||
// Create new additionalData to have different workflow loaded and to call
|
||||
// different webooks
|
||||
const additionalDataIntegrated = await getBase(credentials);
|
||||
additionalDataIntegrated.hooks = getWorkflowHooksIntegrated(runData.executionMode, executionId, workflowData!, { parentProcessMode: additionalData.hooks!.mode });
|
||||
// Make sure we pass on the original executeWorkflow function we received
|
||||
// This one already contains changes to talk to parent process
|
||||
// and get executionID from `activeExecutions` running on main process
|
||||
additionalDataIntegrated.executeWorkflow = additionalData.executeWorkflow;
|
||||
additionalDataIntegrated.executionTimeoutTimestamp = subworkflowTimeout;
|
||||
|
||||
let subworkflowTimeout = additionalData.executionTimeoutTimestamp;
|
||||
if (workflowData.settings?.executionTimeout !== undefined && workflowData.settings.executionTimeout > 0) {
|
||||
// We might have received a max timeout timestamp from the parent workflow
|
||||
// If we did, then we get the minimum time between the two timeouts
|
||||
// If no timeout was given from the parent, then we use our timeout.
|
||||
subworkflowTimeout = Math.min(additionalData.executionTimeoutTimestamp || Number.MAX_SAFE_INTEGER, Date.now() + (workflowData.settings.executionTimeout as number * 1000));
|
||||
}
|
||||
const runExecutionData = runData.executionData as IRunExecutionData;
|
||||
|
||||
additionalDataIntegrated.executionTimeoutTimestamp = subworkflowTimeout;
|
||||
|
||||
|
||||
// Execute the workflow
|
||||
const workflowExecute = new WorkflowExecute(additionalDataIntegrated, runData.executionMode, runExecutionData);
|
||||
if (parentExecutionId !== undefined) {
|
||||
// Must be changed to become typed
|
||||
return {
|
||||
// Execute the workflow
|
||||
const workflowExecute = new WorkflowExecute(additionalDataIntegrated, runData.executionMode, runExecutionData);
|
||||
if (parentExecutionId !== undefined) {
|
||||
// Must be changed to become typed
|
||||
return {
|
||||
startedAt: new Date(),
|
||||
workflow,
|
||||
workflowExecute,
|
||||
};
|
||||
}
|
||||
data = await workflowExecute.processRunExecutionData(workflow);
|
||||
} catch (error) {
|
||||
const fullRunData: IRun = {
|
||||
data: {
|
||||
resultData: {
|
||||
error,
|
||||
runData: {},
|
||||
},
|
||||
},
|
||||
finished: false,
|
||||
mode: 'integrated',
|
||||
startedAt: new Date(),
|
||||
workflow,
|
||||
workflowExecute,
|
||||
stoppedAt: new Date(),
|
||||
};
|
||||
// When failing, we might not have finished the execution
|
||||
// Therefore, database might not contain finished errors.
|
||||
// Force an update to db as there should be no harm doing this
|
||||
|
||||
const fullExecutionData: IExecutionDb = {
|
||||
data: fullRunData.data,
|
||||
mode: fullRunData.mode,
|
||||
finished: fullRunData.finished ? fullRunData.finished : false,
|
||||
startedAt: fullRunData.startedAt,
|
||||
stoppedAt: fullRunData.stoppedAt,
|
||||
workflowData,
|
||||
};
|
||||
|
||||
const executionData = ResponseHelper.flattenExecutionData(fullExecutionData);
|
||||
|
||||
await Db.collections.Execution!.update(executionId, executionData as IExecutionFlattedDb);
|
||||
throw {
|
||||
...error,
|
||||
stack: error!.stack,
|
||||
};
|
||||
}
|
||||
const data = await workflowExecute.processRunExecutionData(workflow);
|
||||
|
||||
await externalHooks.run('workflow.postExecute', [data, workflowData]);
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
IBullJobResponse,
|
||||
ICredentialsOverwrite,
|
||||
ICredentialsTypeData,
|
||||
IExecutionDb,
|
||||
IExecutionFlattedDb,
|
||||
IExecutionResponse,
|
||||
IProcessMessageDataHook,
|
||||
@@ -29,6 +30,7 @@ import {
|
||||
import {
|
||||
ExecutionError,
|
||||
IRun,
|
||||
IWorkflowBase,
|
||||
LoggerProxy as Logger,
|
||||
Workflow,
|
||||
WorkflowExecuteMode,
|
||||
@@ -85,11 +87,15 @@ export class WorkflowRunner {
|
||||
* @param {string} executionId
|
||||
* @memberof WorkflowRunner
|
||||
*/
|
||||
processError(error: ExecutionError, startedAt: Date, executionMode: WorkflowExecuteMode, executionId: string) {
|
||||
async processError(error: ExecutionError, startedAt: Date, executionMode: WorkflowExecuteMode, executionId: string, hooks?: WorkflowHooks) {
|
||||
const fullRunData: IRun = {
|
||||
data: {
|
||||
resultData: {
|
||||
error,
|
||||
error: {
|
||||
...error,
|
||||
message: error.message,
|
||||
stack: error.stack,
|
||||
},
|
||||
runData: {},
|
||||
},
|
||||
},
|
||||
@@ -102,6 +108,10 @@ export class WorkflowRunner {
|
||||
// Remove from active execution with empty data. That will
|
||||
// set the execution to failed.
|
||||
this.activeExecutions.remove(executionId, fullRunData);
|
||||
|
||||
if (hooks) {
|
||||
await hooks.executeHookFunctions('workflowExecuteAfter', [fullRunData]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -179,28 +189,34 @@ export class WorkflowRunner {
|
||||
// Register the active execution
|
||||
const executionId = await this.activeExecutions.add(data, undefined);
|
||||
Logger.verbose(`Execution for workflow ${data.workflowData.name} was assigned id ${executionId}`, {executionId});
|
||||
|
||||
let workflowExecution: PCancelable<IRun>;
|
||||
additionalData.hooks = WorkflowExecuteAdditionalData.getWorkflowHooksMain(data, executionId, true);
|
||||
|
||||
additionalData.sendMessageToUI = WorkflowExecuteAdditionalData.sendMessageToUI.bind({sessionId: data.sessionId});
|
||||
try {
|
||||
additionalData.sendMessageToUI = WorkflowExecuteAdditionalData.sendMessageToUI.bind({sessionId: data.sessionId});
|
||||
|
||||
let workflowExecution: PCancelable<IRun>;
|
||||
if (data.executionData !== undefined) {
|
||||
Logger.debug(`Execution ID ${executionId} had Execution data. Running with payload.`, {executionId});
|
||||
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode, data.executionData);
|
||||
workflowExecution = workflowExecute.processRunExecutionData(workflow);
|
||||
} else if (data.runData === undefined || data.startNodes === undefined || data.startNodes.length === 0 || data.destinationNode === undefined) {
|
||||
Logger.debug(`Execution ID ${executionId} will run executing all nodes.`, {executionId});
|
||||
// Execute all nodes
|
||||
if (data.executionData !== undefined) {
|
||||
Logger.debug(`Execution ID ${executionId} had Execution data. Running with payload.`, {executionId});
|
||||
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode, data.executionData);
|
||||
workflowExecution = workflowExecute.processRunExecutionData(workflow);
|
||||
} else if (data.runData === undefined || data.startNodes === undefined || data.startNodes.length === 0 || data.destinationNode === undefined) {
|
||||
Logger.debug(`Execution ID ${executionId} will run executing all nodes.`, {executionId});
|
||||
// Execute all nodes
|
||||
|
||||
// Can execute without webhook so go on
|
||||
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
|
||||
workflowExecution = workflowExecute.run(workflow, undefined, data.destinationNode);
|
||||
} else {
|
||||
Logger.debug(`Execution ID ${executionId} is a partial execution.`, {executionId});
|
||||
// Execute only the nodes between start and destination nodes
|
||||
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
|
||||
workflowExecution = workflowExecute.runPartialWorkflow(workflow, data.runData, data.startNodes, data.destinationNode);
|
||||
// Can execute without webhook so go on
|
||||
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
|
||||
workflowExecution = workflowExecute.run(workflow, undefined, data.destinationNode);
|
||||
} else {
|
||||
Logger.debug(`Execution ID ${executionId} is a partial execution.`, {executionId});
|
||||
// Execute only the nodes between start and destination nodes
|
||||
const workflowExecute = new WorkflowExecute(additionalData, data.executionMode);
|
||||
workflowExecution = workflowExecute.runPartialWorkflow(workflow, data.runData, data.startNodes, data.destinationNode);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
await this.processError(error, new Date(), data.executionMode, executionId, additionalData.hooks);
|
||||
|
||||
throw error;
|
||||
}
|
||||
|
||||
this.activeExecutions.attachWorkflowExecution(executionId, workflowExecution);
|
||||
@@ -247,7 +263,17 @@ export class WorkflowRunner {
|
||||
removeOnComplete: true,
|
||||
removeOnFail: true,
|
||||
};
|
||||
const job = await this.jobQueue.add(jobData, jobOptions);
|
||||
let job: Bull.Job;
|
||||
try {
|
||||
job = await this.jobQueue.add(jobData, jobOptions);
|
||||
} catch (error) {
|
||||
// We use "getWorkflowHooksIntegrated" here as we are just integrated in the "workflowExecuteAfter"
|
||||
// hook anyway and other get so ignored
|
||||
const hooks = WorkflowExecuteAdditionalData.getWorkflowHooksIntegrated(data.executionMode, executionId, data.workflowData, { retryOf: data.retryOf ? data.retryOf.toString() : undefined });
|
||||
await this.processError(error, new Date(), data.executionMode, executionId, hooks);
|
||||
return executionId;
|
||||
}
|
||||
|
||||
console.log('Started with ID: ' + job.id.toString());
|
||||
|
||||
const hooks = WorkflowExecuteAdditionalData.getWorkflowHooksWorkerMain(data.executionMode, executionId, data.workflowData, { retryOf: data.retryOf ? data.retryOf.toString() : undefined });
|
||||
@@ -264,7 +290,7 @@ export class WorkflowRunner {
|
||||
const fullRunData :IRun = {
|
||||
data: {
|
||||
resultData: {
|
||||
error: new WorkflowOperationError('Workflow has been canceled!'),
|
||||
error: new WorkflowOperationError('Workflow-Execution has been canceled!'),
|
||||
runData: {},
|
||||
},
|
||||
},
|
||||
@@ -280,6 +306,9 @@ export class WorkflowRunner {
|
||||
|
||||
const queueRecoveryInterval = config.get('queue.bull.queueRecoveryInterval') as number;
|
||||
|
||||
const racingPromises: Array<Promise<IBullJobResponse | object>> = [jobData];
|
||||
|
||||
let clearWatchdogInterval;
|
||||
if (queueRecoveryInterval > 0) {
|
||||
/*************************************************
|
||||
* Long explanation about what this solves: *
|
||||
@@ -295,7 +324,7 @@ export class WorkflowRunner {
|
||||
*************************************************/
|
||||
let watchDogInterval: NodeJS.Timeout | undefined;
|
||||
|
||||
const watchDog = new Promise((res) => {
|
||||
const watchDog: Promise<object> = new Promise((res) => {
|
||||
watchDogInterval = setInterval(async () => {
|
||||
const currentJob = await this.jobQueue.getJob(job.id);
|
||||
// When null means job is finished (not found in queue)
|
||||
@@ -306,19 +335,43 @@ export class WorkflowRunner {
|
||||
}, queueRecoveryInterval * 1000);
|
||||
});
|
||||
|
||||
racingPromises.push(watchDog);
|
||||
|
||||
const clearWatchdogInterval = () => {
|
||||
clearWatchdogInterval = () => {
|
||||
if (watchDogInterval) {
|
||||
clearInterval(watchDogInterval);
|
||||
watchDogInterval = undefined;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
await Promise.race([jobData, watchDog]);
|
||||
clearWatchdogInterval();
|
||||
try {
|
||||
await Promise.race(racingPromises);
|
||||
if (clearWatchdogInterval !== undefined) {
|
||||
clearWatchdogInterval();
|
||||
}
|
||||
} catch (error) {
|
||||
const hooks = WorkflowExecuteAdditionalData.getWorkflowHooksWorkerExecuter(data.executionMode, executionId, data.workflowData, { retryOf: data.retryOf ? data.retryOf.toString() : undefined });
|
||||
Logger.error(`Problem with execution ${executionId}: ${error.message}. Aborting.`);
|
||||
if (clearWatchdogInterval !== undefined) {
|
||||
clearWatchdogInterval();
|
||||
}
|
||||
await this.processError(error, new Date(), data.executionMode, executionId, hooks);
|
||||
|
||||
} else {
|
||||
await jobData;
|
||||
const fullRunData :IRun = {
|
||||
data: {
|
||||
resultData: {
|
||||
error,
|
||||
runData: {},
|
||||
},
|
||||
},
|
||||
mode: data.executionMode,
|
||||
startedAt: new Date(),
|
||||
stoppedAt: new Date(),
|
||||
};
|
||||
this.activeExecutions.remove(executionId, fullRunData);
|
||||
resolve(fullRunData);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -427,8 +480,13 @@ export class WorkflowRunner {
|
||||
|
||||
const workflowHooks = WorkflowExecuteAdditionalData.getWorkflowHooksMain(data, executionId);
|
||||
|
||||
// Send all data to subprocess it needs to run the workflow
|
||||
subprocess.send({ type: 'startWorkflow', data } as IProcessMessage);
|
||||
try {
|
||||
// Send all data to subprocess it needs to run the workflow
|
||||
subprocess.send({ type: 'startWorkflow', data } as IProcessMessage);
|
||||
} catch (error) {
|
||||
await this.processError(error, new Date(), data.executionMode, executionId, workflowHooks);
|
||||
return executionId;
|
||||
}
|
||||
|
||||
// Start timeout for the execution
|
||||
let executionTimeout: NodeJS.Timeout;
|
||||
@@ -476,14 +534,14 @@ export class WorkflowRunner {
|
||||
} else if (message.type === 'processError') {
|
||||
clearTimeout(executionTimeout);
|
||||
const executionError = message.data.executionError as ExecutionError;
|
||||
this.processError(executionError, startedAt, data.executionMode, executionId);
|
||||
|
||||
await this.processError(executionError, startedAt, data.executionMode, executionId, workflowHooks);
|
||||
} else if (message.type === 'processHook') {
|
||||
this.processHookMessage(workflowHooks, message.data as IProcessMessageDataHook);
|
||||
} else if (message.type === 'timeout') {
|
||||
// Execution timed out and its process has been terminated
|
||||
const timeoutError = new WorkflowOperationError('Workflow execution timed out!');
|
||||
|
||||
// No need to add hook here as the subprocess takes care of calling the hooks
|
||||
this.processError(timeoutError, startedAt, data.executionMode, executionId);
|
||||
} else if (message.type === 'startExecution') {
|
||||
const executionId = await this.activeExecutions.add(message.data.runData);
|
||||
@@ -506,13 +564,13 @@ export class WorkflowRunner {
|
||||
// Execution timed out and its process has been terminated
|
||||
const timeoutError = new WorkflowOperationError('Workflow execution timed out!');
|
||||
|
||||
this.processError(timeoutError, startedAt, data.executionMode, executionId);
|
||||
await this.processError(timeoutError, startedAt, data.executionMode, executionId, workflowHooks);
|
||||
} else if (code !== 0) {
|
||||
Logger.debug(`Subprocess for execution ID ${executionId} finished with error code ${code}.`, {executionId});
|
||||
// Process did exit with error code, so something went wrong.
|
||||
const executionError = new WorkflowOperationError('Workflow execution process did crash for an unknown reason!');
|
||||
|
||||
this.processError(executionError, startedAt, data.executionMode, executionId);
|
||||
await this.processError(executionError, startedAt, data.executionMode, executionId, workflowHooks);
|
||||
}
|
||||
|
||||
for(const executionId of childExecutionIds) {
|
||||
|
||||
@@ -30,6 +30,7 @@ import {
|
||||
IWorkflowExecuteHooks,
|
||||
LoggerProxy,
|
||||
Workflow,
|
||||
WorkflowExecuteMode,
|
||||
WorkflowHooks,
|
||||
WorkflowOperationError,
|
||||
} from 'n8n-workflow';
|
||||
@@ -315,7 +316,7 @@ process.on('message', async (message: IProcessMessage) => {
|
||||
for (const executionId of executionIds) {
|
||||
const childWorkflowExecute = workflowRunner.childExecutions[executionId];
|
||||
runData = childWorkflowExecute.workflowExecute.getFullRunData(workflowRunner.childExecutions[executionId].startedAt);
|
||||
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : undefined;
|
||||
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : new WorkflowOperationError('Workflow-Execution has been canceled!');
|
||||
|
||||
// If there is any data send it to parent process, if execution timedout add the error
|
||||
await childWorkflowExecute.workflowExecute.processSuccessExecution(workflowRunner.childExecutions[executionId].startedAt, childWorkflowExecute.workflow, timeOutError);
|
||||
@@ -324,7 +325,7 @@ process.on('message', async (message: IProcessMessage) => {
|
||||
// Workflow started already executing
|
||||
runData = workflowRunner.workflowExecute.getFullRunData(workflowRunner.startedAt);
|
||||
|
||||
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : undefined;
|
||||
const timeOutError = message.type === 'timeout' ? new WorkflowOperationError('Workflow execution timed out!') : new WorkflowOperationError('Workflow-Execution has been canceled!');
|
||||
|
||||
// If there is any data send it to parent process, if execution timedout add the error
|
||||
await workflowRunner.workflowExecute.processSuccessExecution(workflowRunner.startedAt, workflowRunner.workflow!, timeOutError);
|
||||
@@ -336,8 +337,8 @@ process.on('message', async (message: IProcessMessage) => {
|
||||
runData: {},
|
||||
},
|
||||
},
|
||||
finished: message.type !== 'timeout',
|
||||
mode: workflowRunner.data!.executionMode,
|
||||
finished: false,
|
||||
mode: workflowRunner.data ? workflowRunner.data!.executionMode : 'own' as WorkflowExecuteMode,
|
||||
startedAt: workflowRunner.startedAt,
|
||||
stoppedAt: new Date(),
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user