// import { createHash } from 'crypto'; import { titleCase } from 'title-case'; import * as ExpressionError from '../ExpressionError'; import type { ExtensionMap } from './Extensions'; import CryptoJS from 'crypto-js'; import { encode } from 'js-base64'; import { transliterate } from 'transliteration'; const hashFunctions: Record = { md5: CryptoJS.MD5, sha1: CryptoJS.SHA1, sha224: CryptoJS.SHA224, sha256: CryptoJS.SHA256, sha384: CryptoJS.SHA384, sha512: CryptoJS.SHA512, sha3: CryptoJS.SHA3, ripemd160: CryptoJS.RIPEMD160, }; // All symbols from https://www.xe.com/symbols/ as for 2022/11/09 const CURRENCY_REGEXP = /(\u004c\u0065\u006b|\u060b|\u0024|\u0192|\u20bc|\u0042\u0072|\u0042\u005a\u0024|\u0024\u0062|\u004b\u004d|\u0050|\u043b\u0432|\u0052\u0024|\u17db|\u00a5|\u20a1|\u006b\u006e|\u20b1|\u004b\u010d|\u006b\u0072|\u0052\u0044\u0024|\u00a3|\u20ac|\u00a2|\u0051|\u004c|\u0046\u0074|\u20b9|\u0052\u0070|\ufdfc|\u20aa|\u004a\u0024|\u20a9|\u20ad|\u0434\u0435\u043d|\u0052\u004d|\u20a8|\u20ae|\u004d\u0054|\u0043\u0024|\u20a6|\u0042\u002f\u002e|\u0047\u0073|\u0053\u002f\u002e|\u007a\u0142|\u006c\u0065\u0069|\u20bd|\u0414\u0438\u043d\u002e|\u0053|\u0052|\u0043\u0048\u0046|\u004e\u0054\u0024|\u0e3f|\u0054\u0054\u0024|\u20ba|\u20b4|\u0024\u0055|\u0042\u0073|\u20ab|\u005a\u0024)/gu; /* Extract the domain part from various inputs, including URLs, email addresses, and plain domains. /^(?:(?:https?|ftp):\/\/)? // Match optional http, https, or ftp protocols (?:mailto:)? // Match optional mailto: (?:\/\/)? // Match optional double slashes (?:www\.)? // Match optional www prefix (?:[-\w]*\.)? // Match any optional subdomain ( // Capture the domain part (?:(?:[-\w]+\.)+ // Match one or more subdomains (?:[a-zA-Z]{2,}|xn--[a-zA-Z0-9]+) // Match top-level domain or Punycode encoded IDN(xn--80aswg.xn--p1ai) |localhost // Match localhost |\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} // Match IPv4 addresses ) ) (?::\d+)? // Match optional port number (?:\/[^\s?]*)? // Match optional path (?:\?[^\s#]*)? // Match optional query string (?:#[^\s]*)?$/i; // Match optional hash fragment */ const DOMAIN_EXTRACT_REGEXP = /^(?:(?:https?|ftp):\/\/)?(?:mailto:)?(?:\/\/)?((?:www\.)?(?:(?:[-\w]+\.)+(?:[a-zA-Z]{2,}|xn--[a-zA-Z0-9]+)|localhost|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))(?::\d+)?(?:\/[^\s?]*)?(?:\?[^\s#]*)?(?:#[^\s]*)?$/i; /* Matches domain names without the protocol or optional subdomains /^(?:www\.)? // Match optional www prefix ( // Capture the domain part (?:(?:[-\w]+\.)+ // Match one or more subdomains (?:[a-zA-Z]{2,}|xn--[a-zA-Z0-9]+) // Match top-level domain or Punycode encoded IDN |localhost // Match localhost |\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} // Match IPv4 addresses ) ) (?::\d+)? // Match optional port number (?:\/[^\s?]*)? // Match optional path (?:\?[^\s#]*)? // Match optional query string (?:#[^\s]*)?$/i; // Match optional fragment at the end of the string */ const DOMAIN_REGEXP = /^(?:www\.)?((?:(?:[-\w]+\.)+(?:[a-zA-Z]{2,}|xn--[a-zA-Z0-9]+)|localhost|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))(?::\d+)?(?:\/[^\s?]*)?(?:\?[^\s#]*)?(?:#[^\s]*)?$/i; /* Matches email addresses /( ( // Capture local part of the email address ([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*) // One or more characters not in the set, followed by a period, followed by one or more characters not in the set |(".+") // Or one or more characters inside quotes ) ) @ // Match @ symbol (?( // Capture the domain part of the email address \[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\] // Match IPv4 address inside brackets |(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}) // Or match domain with at least two subdomains and TLD ))/; */ const EMAIL_REGEXP = /(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@(?(\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/; /* Matches URLs with strict beginning and end of the string checks /^(?:(?:https?|ftp):\/\/) // Match http, https, or ftp protocols at the start of the string (?:www\.)? // Match optional www prefix ( // Capture the domain part (?:(?:[-\w]+\.)+ // Match one or more subdomains (?:[a-zA-Z]{2,}|xn--[a-zA-Z0-9]+) // Match top-level domain or Punycode encoded IDN |localhost // Match localhost |\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} // Match IPv4 addresses ) ) (?::\d+)? // Match optional port number (?:\/[^\s?#]*)? // Match optional path (?:\?[^\s#]*)? // Match optional query string (?=([^\s]+#.*)?) // Positive lookahead for the fragment identifier #?[^\s]*$/i; // Match optional fragment at the end of the string */ const URL_REGEXP_EXACT = /^(?:(?:https?|ftp):\/\/)(?:www\.)?((?:(?:[-\w]+\.)+(?:[a-zA-Z]{2,}|xn--[a-zA-Z0-9]+)|localhost|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))(?::\d+)?(?:\/[^\s?#]*)?(?:\?[^\s#]*)?(?=([^\s]+#.*)?)#?[^\s]*$/i; /* Same as URL_REGEXP_EXACT but without the strict beginning and end of the string checks to allow for matching URLs in the middle of a string */ const URL_REGEXP = /(?:(?:https?|ftp):\/\/)(?:www\.)?((?:(?:[-\w]+\.)+(?:[a-zA-Z]{2,}|xn--[a-zA-Z0-9]+)|localhost|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}))(?::\d+)?(?:\/[^\s?#]*)?(?:\?[^\s#]*)?(?=([^\s]+#.*)?)#?[^\s]*/i; const CHAR_TEST_REGEXP = /\p{L}/u; const PUNC_TEST_REGEXP = /[!?.]/; function hash(value: string, extraArgs?: unknown): string { const [algorithm = 'MD5'] = extraArgs as string[]; if (algorithm.toLowerCase() === 'base64') { // We're using a library instead of btoa because btoa only // works on ASCII return encode(value); } const hashFunction = hashFunctions[algorithm.toLowerCase()]; if (!hashFunction) { throw new ExpressionError.ExpressionExtensionError( `Unknown algorithm ${algorithm}. Available algorithms are: ${Object.keys(hashFunctions) .map((s) => s.toUpperCase()) .join(', ')}, and Base64.`, ); } return hashFunction(value.toString()).toString(); // return createHash(format).update(value.toString()).digest('hex'); } function isEmpty(value: string): boolean { return value === ''; } function isNotEmpty(value: string): boolean { return !isEmpty(value); } function length(value: string): number { return value.length; } function removeMarkdown(value: string): string { let output = value; try { output = output.replace(/^([\s\t]*)([*\-+]|\d\.)\s+/gm, '$1'); output = output // Header .replace(/\n={2,}/g, '\n') // Strikethrough .replace(/~~/g, '') // Fenced codeblocks .replace(/`{3}.*\n/g, ''); output = output // Remove HTML tags .replace(/<[\w|\s|=|'|"|:|(|)|,|;|/|0-9|.|-]+[>|\\>]/g, '') // Remove setext-style headers .replace(/^[=-]{2,}\s*$/g, '') // Remove footnotes? .replace(/\[\^.+?\](: .*?$)?/g, '') .replace(/\s{0,2}\[.*?\]: .*?$/g, '') // Remove images .replace(/!\[.*?\][[(].*?[\])]/g, '') // Remove inline links .replace(/\[(.*?)\][[(].*?[\])]/g, '$1') // Remove Blockquotes .replace(/>/g, '') // Remove reference-style links? .replace(/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/g, '') // Remove atx-style headers .replace(/^#{1,6}\s*([^#]*)\s*(#{1,6})?/gm, '$1') .replace(/([*_]{1,3})(\S.*?\S)\1/g, '$2') .replace(/(`{3,})(.*?)\1/gm, '$2') .replace(/^-{3,}\s*$/g, '') .replace(/`(.+?)`/g, '$1') .replace(/\n{2,}/g, '\n\n'); } catch (e) { return value; } return output; } function removeTags(value: string): string { return value.replace(/<[^>]*>?/gm, ''); } function toDate(value: string): Date { const date = new Date(Date.parse(value)); if (date.toString() === 'Invalid Date') { throw new ExpressionError.ExpressionExtensionError('cannot convert to date'); } // If time component is not specified, force 00:00h if (!/:/.test(value)) { date.setHours(0, 0, 0); } return date; } function urlDecode(value: string, extraArgs: boolean[]): string { const [entireString = false] = extraArgs; if (entireString) { return decodeURI(value.toString()); } return decodeURIComponent(value.toString()); } function urlEncode(value: string, extraArgs: boolean[]): string { const [entireString = false] = extraArgs; if (entireString) { return encodeURI(value.toString()); } return encodeURIComponent(value.toString()); } function toInt(value: string, extraArgs: Array) { const [radix] = extraArgs; const int = parseInt(value.replace(CURRENCY_REGEXP, ''), radix); if (isNaN(int)) { throw new ExpressionError.ExpressionExtensionError('cannot convert to integer'); } return int; } function toFloat(value: string) { if (value.includes(',')) { throw new ExpressionError.ExpressionExtensionError( 'cannot convert to float, expected . as decimal separator', ); } const float = parseFloat(value.replace(CURRENCY_REGEXP, '')); if (isNaN(float)) { throw new ExpressionError.ExpressionExtensionError('cannot convert to float'); } return float; } function quote(value: string, extraArgs: string[]) { const [quoteChar = '"'] = extraArgs; return `${quoteChar}${value .replace(/\\/g, '\\\\') .replace(new RegExp(`\\${quoteChar}`, 'g'), `\\${quoteChar}`)}${quoteChar}`; } function isNumeric(value: string) { if (value.includes(' ')) return false; return !isNaN(value as unknown as number) && !isNaN(parseFloat(value)); } function isUrl(value: string) { return URL_REGEXP_EXACT.test(value); } function isDomain(value: string) { return DOMAIN_REGEXP.test(value); } function isEmail(value: string) { const result = EMAIL_REGEXP.test(value); // email regex is loose so check manually for now if (result && value.includes(' ')) { return false; } return result; } function toTitleCase(value: string) { return titleCase(value); } function replaceSpecialChars(value: string) { return transliterate(value, { unknown: '?' }); } function toSentenceCase(value: string) { let current = value.slice(); let buffer = ''; while (CHAR_TEST_REGEXP.test(current)) { const charIndex = current.search(CHAR_TEST_REGEXP); current = current.slice(0, charIndex) + current[charIndex]!.toLocaleUpperCase() + current.slice(charIndex + 1).toLocaleLowerCase(); const puncIndex = current.search(PUNC_TEST_REGEXP); if (puncIndex === -1) { buffer += current; current = ''; break; } buffer += current.slice(0, puncIndex + 1); current = current.slice(puncIndex + 1); } return buffer; } function toSnakeCase(value: string) { return value .toLocaleLowerCase() .replace(/[ \-]/g, '_') .replace(/[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,.\/:;<=>?@\[\]^`{|}~]/g, ''); } function extractEmail(value: string) { const matched = EMAIL_REGEXP.exec(value); if (!matched) { return undefined; } return matched[0]; } function extractDomain(value: string) { if (isEmail(value)) { const matched = EMAIL_REGEXP.exec(value); // This shouldn't happen if (!matched) { return undefined; } return matched.groups?.domain; } const domainMatch = value.match(DOMAIN_EXTRACT_REGEXP); if (domainMatch) { return domainMatch[1]; } return undefined; } function extractUrl(value: string) { const matched = URL_REGEXP.exec(value); if (!matched) { return undefined; } return matched[0]; } removeMarkdown.doc = { name: 'removeMarkdown', description: 'Removes Markdown formatting from a string.', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-removeMarkdown', }; removeTags.doc = { name: 'removeTags', description: 'Removes tags, such as HTML or XML, from a string.', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-removeTags', }; toDate.doc = { name: 'toDate', description: 'Converts a string to a date.', returnType: 'Date', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-toDate', }; toFloat.doc = { name: 'toFloat', description: 'Converts a string to a decimal number.', returnType: 'number', aliases: ['toDecimalNumber'], docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-toDecimalNumber', }; toInt.doc = { name: 'toInt', description: 'Converts a string to an integer.', returnType: 'number', args: [{ name: 'radix?', type: 'number' }], aliases: ['toWholeNumber'], docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-toInt', }; toSentenceCase.doc = { name: 'toSentenceCase', description: 'Formats a string to sentence case. Example: "This is a sentence".', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-toSentenceCase', }; toSnakeCase.doc = { name: 'toSnakeCase', description: 'Formats a string to snake case. Example: "this_is_snake_case".', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-toSnakeCase', }; toTitleCase.doc = { name: 'toTitleCase', description: 'Formats a string to title case. Example: "This Is a Title".', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-toTitleCase', }; urlEncode.doc = { name: 'urlEncode', description: 'Encodes a string to be used/included in a URL.', args: [{ name: 'entireString?', type: 'boolean' }], returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-urlEncode', }; urlDecode.doc = { name: 'urlDecode', description: 'Decodes a URL-encoded string. It decodes any percent-encoded characters in the input string, and replaces them with their original characters.', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-urlDecode', }; replaceSpecialChars.doc = { name: 'replaceSpecialChars', description: 'Replaces non-ASCII characters in a string with an ASCII representation.', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-replaceSpecialChars', }; length.doc = { name: 'length', description: 'Returns the character count of a string.', returnType: 'number', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings', }; isDomain.doc = { name: 'isDomain', description: 'Checks if a string is a domain.', returnType: 'boolean', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-isDomain', }; isEmail.doc = { name: 'isEmail', description: 'Checks if a string is an email.', returnType: 'boolean', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-isEmail', }; isNumeric.doc = { name: 'isEmail', description: 'Checks if a string only contains digits.', returnType: 'boolean', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-isNumeric', }; isUrl.doc = { name: 'isUrl', description: 'Checks if a string is a valid URL.', returnType: 'boolean', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-isUrl', }; isEmpty.doc = { name: 'isEmpty', description: 'Checks if a string is empty.', returnType: 'boolean', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-isEmpty', }; isNotEmpty.doc = { name: 'isNotEmpty', description: 'Checks if a string has content.', returnType: 'boolean', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-isNotEmpty', }; extractEmail.doc = { name: 'extractEmail', description: 'Extracts an email from a string. Returns undefined if none is found.', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-extractEmail', }; extractDomain.doc = { name: 'extractDomain', description: 'Extracts a domain from a string containing a valid URL. Returns undefined if none is found.', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-extractDomain', }; extractUrl.doc = { name: 'extractUrl', description: 'Extracts a URL from a string. Returns undefined if none is found.', returnType: 'string', docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-extractUrl', }; hash.doc = { name: 'hash', description: 'Returns a string hashed with the given algorithm. Default algorithm is `md5`.', returnType: 'string', args: [{ name: 'algo?', type: 'Algorithm' }], docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-hash', }; quote.doc = { name: 'quote', description: 'Returns a string wrapped in the quotation marks. Default quotation is `"`.', returnType: 'string', args: [{ name: 'mark?', type: 'string' }], docURL: 'https://docs.n8n.io/code-examples/expressions/data-transformation-functions/strings/#string-quote', }; export const stringExtensions: ExtensionMap = { typeName: 'String', functions: { hash, removeMarkdown, removeTags, toDate, toDecimalNumber: toFloat, toFloat, toInt, toWholeNumber: toInt, toSentenceCase, toSnakeCase, toTitleCase, urlDecode, urlEncode, quote, replaceSpecialChars, length, isDomain, isEmail, isNumeric, isUrl, isEmpty, isNotEmpty, extractEmail, extractDomain, extractUrl, }, };