diff --git a/package-lock.json b/package-lock.json index aead053..739fa32 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "cyberchef", - "version": "7.5.0", + "version": "7.5.1", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -10256,6 +10256,11 @@ "resolved": "https://registry.npmjs.org/xpath/-/xpath-0.0.27.tgz", "integrity": "sha512-fg03WRxtkCV6ohClePNAECYsmpKKTv5L8y/X3Dn1hQrec3POx2jHZ/0P2qQ6HvsrU1BmeqXcof3NGGueG6LxwQ==" }, + "xregexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/xregexp/-/xregexp-4.0.0.tgz", + "integrity": "sha512-PHyM+sQouu7xspQQwELlGwwd05mXUFqwFYfqPO0cC7x4fxyHnnuetmQr6CjJiafIDoH4MogHb9dOoJzR/Y4rFg==" + }, "xtend": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.1.tgz", diff --git a/package.json b/package.json index 1804e30..56dc55b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cyberchef", - "version": "7.5.0", + "version": "7.5.1", "description": "The Cyber Swiss Army Knife for encryption, encoding, compression and data analysis.", "author": "n1474335 ", "homepage": "https://gchq.github.io/CyberChef", @@ -103,6 +103,7 @@ "vkbeautify": "^0.99.3", "xmldom": "^0.1.27", "xpath": "0.0.27", + "xregexp": "^4.0.0", "zlibjs": "^0.3.1" }, "scripts": { diff --git a/src/core/config/OperationConfig.js b/src/core/config/OperationConfig.js index 41fbfe0..ec47384 100755 --- a/src/core/config/OperationConfig.js +++ b/src/core/config/OperationConfig.js @@ -30,6 +30,7 @@ import NetBIOS from "../operations/NetBIOS.js"; import PHP from "../operations/PHP.js"; import PublicKey from "../operations/PublicKey.js"; import Punycode from "../operations/Punycode.js"; +import Regex from "../operations/Regex.js"; import Rotate from "../operations/Rotate.js"; import SeqUtils from "../operations/SeqUtils.js"; import Shellcode from "../operations/Shellcode.js"; @@ -2058,9 +2059,8 @@ const OperationConfig = { args: [] }, "Find / Replace": { - module: "Default", + module: "Regex", description: "Replaces all occurrences of the first string with the second.

Includes support for regular expressions (regex), simple strings and extended strings (which support \\n, \\r, \\t, \\b, \\f and escaped hex bytes using \\x notation, e.g. \\x00 for a null byte).", - manualBake: true, inputType: "string", outputType: "string", args: [ @@ -2068,7 +2068,7 @@ const OperationConfig = { name: "Find", type: "toggleString", value: "", - toggleValues: StrUtils.SEARCH_TYPE + toggleValues: Regex.SEARCH_TYPE }, { name: "Replace", @@ -2078,17 +2078,17 @@ const OperationConfig = { { name: "Global match", type: "boolean", - value: StrUtils.FIND_REPLACE_GLOBAL, + value: Regex.FIND_REPLACE_GLOBAL, }, { name: "Case insensitive", type: "boolean", - value: StrUtils.FIND_REPLACE_CASE, + value: Regex.FIND_REPLACE_CASE, }, { name: "Multiline matching", type: "boolean", - value: StrUtils.FIND_REPLACE_MULTILINE, + value: Regex.FIND_REPLACE_MULTILINE, }, ] @@ -2125,20 +2125,19 @@ const OperationConfig = { args: [ { name: "Split delimiter", - type: "binaryShortString", - value: StrUtils.SPLIT_DELIM + type: "editableOption", + value: StrUtils.SPLIT_DELIM_OPTIONS }, { name: "Join delimiter", - type: "option", - value: StrUtils.DELIMITER_OPTIONS + type: "editableOption", + value: StrUtils.JOIN_DELIM_OPTIONS } ] }, "Filter": { module: "Default", description: "Splits up the input using the specified delimiter and then filters each branch based on a regular expression.", - manualBake: true, inputType: "string", outputType: "string", args: [ @@ -2160,16 +2159,26 @@ const OperationConfig = { ] }, "Strings": { - module: "Default", + module: "Regex", description: "Extracts all strings from the input.", inputType: "string", outputType: "string", args: [ + { + name: "Encoding", + type: "option", + value: Extract.ENCODING_LIST + }, { name: "Minimum length", type: "number", value: Extract.MIN_STRING_LEN }, + { + name: "Match", + type: "option", + value: Extract.STRING_MATCH_TYPE + }, { name: "Display total", type: "boolean", @@ -2178,7 +2187,7 @@ const OperationConfig = { ] }, "Extract IP addresses": { - module: "Default", + module: "Regex", description: "Extracts all IPv4 and IPv6 addresses.

Warning: Given a string 710.65.0.456, this will match 10.65.0.45 so always check the original input!", inputType: "string", outputType: "string", @@ -2206,7 +2215,7 @@ const OperationConfig = { ] }, "Extract email addresses": { - module: "Default", + module: "Regex", description: "Extracts all email addresses from the input.", inputType: "string", outputType: "string", @@ -2219,7 +2228,7 @@ const OperationConfig = { ] }, "Extract MAC addresses": { - module: "Default", + module: "Regex", description: "Extracts all Media Access Control (MAC) addresses from the input.", inputType: "string", outputType: "string", @@ -2232,7 +2241,7 @@ const OperationConfig = { ] }, "Extract URLs": { - module: "Default", + module: "Regex", description: "Extracts Uniform Resource Locators (URLs) from the input. The protocol (http, ftp etc.) is required otherwise there will be far too many false positives.", inputType: "string", outputType: "string", @@ -2245,7 +2254,7 @@ const OperationConfig = { ] }, "Extract domains": { - module: "Default", + module: "Regex", description: "Extracts domain names.
Note that this will not include paths. Use Extract URLs to find entire URLs.", inputType: "string", outputType: "string", @@ -2258,7 +2267,7 @@ const OperationConfig = { ] }, "Extract file paths": { - module: "Default", + module: "Regex", description: "Extracts anything that looks like a Windows or UNIX file path.

Note that if UNIX is selected, there will likely be a lot of false positives.", inputType: "string", outputType: "string", @@ -2281,7 +2290,7 @@ const OperationConfig = { ] }, "Extract dates": { - module: "Default", + module: "Regex", description: "Extracts dates in the following formatsDividers can be any of /, -, . or space", inputType: "string", outputType: "string", @@ -2294,16 +2303,15 @@ const OperationConfig = { ] }, "Regular expression": { - module: "Default", - description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.", - manualBake: true, + module: "Regex", + description: "Define your own regular expression (regex) to search the input data with, optionally choosing from a list of pre-defined patterns.

Supports extended regex syntax including the 'dot matches all' flag, named capture groups, full unicode coverage (including \\p{} categories and scripts as well as astral codes) and recursive matching.", inputType: "string", outputType: "html", args: [ { name: "Built in regexes", type: "populateOption", - value: StrUtils.REGEX_PRE_POPULATE, + value: Regex.REGEX_PRE_POPULATE, target: 1, }, { @@ -2314,22 +2322,37 @@ const OperationConfig = { { name: "Case insensitive", type: "boolean", - value: StrUtils.REGEX_CASE_INSENSITIVE + value: true }, { - name: "Multiline matching", + name: "^ and $ match at newlines", type: "boolean", - value: StrUtils.REGEX_MULTILINE_MATCHING + value: true + }, + { + name: "Dot matches all", + type: "boolean", + value: false + }, + { + name: "Unicode support", + type: "boolean", + value: false + }, + { + name: "Astral support", + type: "boolean", + value: false }, { name: "Display total", type: "boolean", - value: StrUtils.DISPLAY_TOTAL + value: Regex.DISPLAY_TOTAL }, { name: "Output format", type: "option", - value: StrUtils.OUTPUT_FORMAT + value: Regex.OUTPUT_FORMAT }, ] }, diff --git a/src/core/config/modules/Default.js b/src/core/config/modules/Default.js index 3963d94..1afb8bc 100644 --- a/src/core/config/modules/Default.js +++ b/src/core/config/modules/Default.js @@ -10,7 +10,6 @@ import Convert from "../../operations/Convert.js"; import DateTime from "../../operations/DateTime.js"; import Endian from "../../operations/Endian.js"; import Entropy from "../../operations/Entropy.js"; -import Extract from "../../operations/Extract.js"; import FileType from "../../operations/FileType.js"; import Hexdump from "../../operations/Hexdump.js"; import HTML from "../../operations/HTML.js"; @@ -99,11 +98,9 @@ OpModules.Default = { "Format MAC addresses": MAC.runFormat, "Encode NetBIOS Name": NetBIOS.runEncodeName, "Decode NetBIOS Name": NetBIOS.runDecodeName, - "Regular expression": StrUtils.runRegex, "Offset checker": StrUtils.runOffsetChecker, "To Upper case": StrUtils.runUpper, "To Lower case": StrUtils.runLower, - "Find / Replace": StrUtils.runFindReplace, "Split": StrUtils.runSplit, "Filter": StrUtils.runFilter, "Escape string": StrUtils.runEscape, @@ -133,14 +130,6 @@ OpModules.Default = { "Translate DateTime Format": DateTime.runTranslateFormat, "From UNIX Timestamp": DateTime.runFromUnixTimestamp, "To UNIX Timestamp": DateTime.runToUnixTimestamp, - "Strings": Extract.runStrings, - "Extract IP addresses": Extract.runIp, - "Extract email addresses": Extract.runEmail, - "Extract MAC addresses": Extract.runMac, - "Extract URLs": Extract.runUrls, - "Extract domains": Extract.runDomains, - "Extract file paths": Extract.runFilePaths, - "Extract dates": Extract.runDates, "Microsoft Script Decoder": MS.runDecodeScript, "Entropy": Entropy.runEntropy, "Frequency distribution": Entropy.runFreqDistrib, diff --git a/src/core/config/modules/OpModules.js b/src/core/config/modules/OpModules.js index 3f3963c..9a5e3ff 100644 --- a/src/core/config/modules/OpModules.js +++ b/src/core/config/modules/OpModules.js @@ -18,6 +18,7 @@ import HTTPModule from "./HTTP.js"; import ImageModule from "./Image.js"; import JSBNModule from "./JSBN.js"; import PublicKeyModule from "./PublicKey.js"; +import RegexModule from "./Regex.js"; import ShellcodeModule from "./Shellcode.js"; import URLModule from "./URL.js"; @@ -34,6 +35,7 @@ Object.assign( ImageModule, JSBNModule, PublicKeyModule, + RegexModule, ShellcodeModule, URLModule ); diff --git a/src/core/config/modules/Regex.js b/src/core/config/modules/Regex.js new file mode 100644 index 0000000..f7dc3d3 --- /dev/null +++ b/src/core/config/modules/Regex.js @@ -0,0 +1,30 @@ +import Extract from "../../operations/Extract.js"; +import Regex from "../../operations/Regex.js"; + + +/** + * Regex module. + * + * Libraries: + * - XRegExp + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + */ +let OpModules = typeof self === "undefined" ? {} : self.OpModules || {}; + +OpModules.Regex = { + "Regular expression": Regex.runRegex, + "Find / Replace": Regex.runFindReplace, + "Strings": Extract.runStrings, + "Extract IP addresses": Extract.runIp, + "Extract email addresses": Extract.runEmail, + "Extract MAC addresses": Extract.runMac, + "Extract URLs": Extract.runUrls, + "Extract domains": Extract.runDomains, + "Extract file paths": Extract.runFilePaths, + "Extract dates": Extract.runDates, +}; + +export default OpModules; diff --git a/src/core/operations/Cipher.js b/src/core/operations/Cipher.js index c253cfb..d47f956 100755 --- a/src/core/operations/Cipher.js +++ b/src/core/operations/Cipher.js @@ -521,6 +521,10 @@ DES uses a key length of 8 bytes (64 bits).`; * @default */ PRNG_BYTES: 32, + /** + * @constant + * @default + */ PRNG_OUTPUT: ["Hex", "Integer", "Byte array", "Raw"], /** diff --git a/src/core/operations/Extract.js b/src/core/operations/Extract.js index 54b25b3..221b282 100755 --- a/src/core/operations/Extract.js +++ b/src/core/operations/Extract.js @@ -1,3 +1,6 @@ +import XRegExp from "xregexp"; + + /** * Identifier extraction operations. * @@ -26,6 +29,11 @@ const Extract = { match; while ((match = searchRegex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === searchRegex.lastIndex) { + searchRegex.lastIndex++; + } + if (removeRegex && removeRegex.test(match[0])) continue; total++; @@ -43,7 +51,20 @@ const Extract = { * @constant * @default */ - MIN_STRING_LEN: 3, + MIN_STRING_LEN: 4, + /** + * @constant + * @default + */ + STRING_MATCH_TYPE: [ + "[ASCII]", "Alphanumeric + punctuation (A)", "All printable chars (A)", "Null-terminated strings (A)", + "[Unicode]", "Alphanumeric + punctuation (U)", "All printable chars (U)", "Null-terminated strings (U)" + ], + /** + * @constant + * @default + */ + ENCODING_LIST: ["Single byte", "16-bit littleendian", "16-bit bigendian", "All"], /** * @constant * @default @@ -58,10 +79,59 @@ const Extract = { * @returns {string} */ runStrings: function(input, args) { - let minLen = args[0] || Extract.MIN_STRING_LEN, - displayTotal = args[1], - strings = "[A-Z\\d/\\-:.,_$%'\"()<>= !\\[\\]{}@]", - regex = new RegExp(strings + "{" + minLen + ",}", "ig"); + const encoding = args[0], + minLen = args[1], + matchType = args[2], + displayTotal = args[3], + alphanumeric = "A-Z\\d", + punctuation = "/\\-:.,_$%'\"()<>= !\\[\\]{}@", + printable = "\x20-\x7e", + uniAlphanumeric = "\\pL\\pN", + uniPunctuation = "\\pP\\pZ", + uniPrintable = "\\pL\\pM\\pZ\\pS\\pN\\pP"; + + let strings = ""; + + switch (matchType) { + case "Alphanumeric + punctuation (A)": + strings = `[${alphanumeric + punctuation}]`; + break; + case "All printable chars (A)": + case "Null-terminated strings (A)": + strings = `[${printable}]`; + break; + case "Alphanumeric + punctuation (U)": + strings = `[${uniAlphanumeric + uniPunctuation}]`; + break; + case "All printable chars (U)": + case "Null-terminated strings (U)": + strings = `[${uniPrintable}]`; + break; + } + + // UTF-16 support is hacked in by allowing null bytes on either side of the matched chars + switch (encoding) { + case "All": + strings = `(\x00?${strings}\x00?)`; + break; + case "16-bit littleendian": + strings = `(${strings}\x00)`; + break; + case "16-bit bigendian": + strings = `(\x00${strings})`; + break; + case "Single byte": + default: + break; + } + + strings = `${strings}{${minLen},}`; + + if (matchType.includes("Null-terminated")) { + strings += "\x00"; + } + + const regex = new XRegExp(strings, "ig"); return Extract._search(input, regex, null, displayTotal); }, diff --git a/src/core/operations/Regex.js b/src/core/operations/Regex.js new file mode 100644 index 0000000..9c6b2f8 --- /dev/null +++ b/src/core/operations/Regex.js @@ -0,0 +1,278 @@ +import XRegExp from "xregexp"; +import Utils from "../Utils.js"; + + +/** + * Regex operations. + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + * + * @namespace + */ +const Regex = { + + /** + * @constant + * @default + */ + REGEX_PRE_POPULATE: [ + { + name: "User defined", + value: "" + }, + { + name: "IPv4 address", + value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?" + }, + { + name: "IPv6 address", + value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})" + }, + { + name: "Email address", + value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})" + }, + { + name: "URL", + value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" + }, + { + name: "Domain", + value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" + }, + { + name: "Windows file path", + value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?" + }, + { + name: "UNIX file path", + value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+" + }, + { + name: "MAC address", + value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}" + }, + { + name: "Date (yyyy-mm-dd)", + value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" + }, + { + name: "Date (dd/mm/yyyy)", + value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Date (mm/dd/yyyy)", + value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)" + }, + { + name: "Strings", + value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" + }, + ], + /** + * @constant + * @default + */ + OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"], + /** + * @constant + * @default + */ + DISPLAY_TOTAL: false, + + /** + * Regular expression operation. + * + * @param {string} input + * @param {Object[]} args + * @returns {html} + */ + runRegex: function(input, args) { + const userRegex = args[1], + i = args[2], + m = args[3], + s = args[4], + u = args[5], + a = args[6], + displayTotal = args[7], + outputFormat = args[8]; + let modifiers = "g"; + + if (i) modifiers += "i"; + if (m) modifiers += "m"; + if (s) modifiers += "s"; + if (u) modifiers += "u"; + if (a) modifiers += "A"; + + if (userRegex && userRegex !== "^" && userRegex !== "$") { + try { + const regex = new XRegExp(userRegex, modifiers); + + switch (outputFormat) { + case "Highlight matches": + return Regex._regexHighlight(input, regex, displayTotal); + case "List matches": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, false)); + case "List capture groups": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, false, true)); + case "List matches with capture groups": + return Utils.escapeHtml(Regex._regexList(input, regex, displayTotal, true, true)); + default: + return "Error: Invalid output format"; + } + } catch (err) { + return "Invalid regex. Details: " + err.message; + } + } else { + return Utils.escapeHtml(input); + } + }, + + + /** + * @constant + * @default + */ + SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"], + /** + * @constant + * @default + */ + FIND_REPLACE_GLOBAL: true, + /** + * @constant + * @default + */ + FIND_REPLACE_CASE: false, + /** + * @constant + * @default + */ + FIND_REPLACE_MULTILINE: true, + + /** + * Find / Replace operation. + * + * @param {string} input + * @param {Object[]} args + * @returns {string} + */ + runFindReplace: function(input, args) { + let find = args[0].string, + type = args[0].option, + replace = args[1], + g = args[2], + i = args[3], + m = args[4], + modifiers = ""; + + if (g) modifiers += "g"; + if (i) modifiers += "i"; + if (m) modifiers += "m"; + + if (type === "Regex") { + find = new RegExp(find, modifiers); + return input.replace(find, replace); + } + + if (type.indexOf("Extended") === 0) { + find = Utils.parseEscapedChars(find); + } + + find = new RegExp(Utils.escapeRegex(find), modifiers); + + return input.replace(find, replace); + }, + + + /** + * Adds HTML highlights to matches within a string. + * + * @private + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @returns {string} + */ + _regexHighlight: function(input, regex, displayTotal) { + let output = "", + m, + hl = 1, + i = 0, + total = 0; + + while ((m = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (m.index === regex.lastIndex) { + regex.lastIndex++; + } + + // Add up to match + output += Utils.escapeHtml(input.slice(i, m.index)); + + // Add match with highlighting + output += "" + Utils.escapeHtml(m[0]) + ""; + + // Switch highlight + hl = hl === 1 ? 2 : 1; + + i = regex.lastIndex; + total++; + } + + // Add all after final match + output += Utils.escapeHtml(input.slice(i, input.length)); + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output; + }, + + + /** + * Creates a string listing the matches within a string. + * + * @private + * @param {string} input + * @param {RegExp} regex + * @param {boolean} displayTotal + * @param {boolean} matches - Display full match + * @param {boolean} captureGroups - Display each of the capture groups separately + * @returns {string} + */ + _regexList: function(input, regex, displayTotal, matches, captureGroups) { + let output = "", + total = 0, + match; + + while ((match = regex.exec(input))) { + // Moves pointer when an empty string is matched (prevents infinite loop) + if (match.index === regex.lastIndex) { + regex.lastIndex++; + } + + total++; + if (matches) { + output += match[0] + "\n"; + } + if (captureGroups) { + for (let i = 1; i < match.length; i++) { + if (matches) { + output += " Group " + i + ": "; + } + output += match[i] + "\n"; + } + } + } + + if (displayTotal) + output = "Total found: " + total + "\n\n" + output; + + return output.slice(0, -1); + }, +}; + +export default Regex; diff --git a/src/core/operations/StrUtils.js b/src/core/operations/StrUtils.js index dd15b32..6ca9325 100755 --- a/src/core/operations/StrUtils.js +++ b/src/core/operations/StrUtils.js @@ -12,128 +12,6 @@ import Utils from "../Utils.js"; */ const StrUtils = { - /** - * @constant - * @default - */ - REGEX_PRE_POPULATE: [ - { - name: "User defined", - value: "" - }, - { - name: "IPv4 address", - value: "(?:(?:\\d|[01]?\\d\\d|2[0-4]\\d|25[0-5])\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d|\\d)(?:\\/\\d{1,2})?" - }, - { - name: "IPv6 address", - value: "((?=.*::)(?!.*::.+::)(::)?([\\dA-Fa-f]{1,4}:(:|\\b)|){5}|([\\dA-Fa-f]{1,4}:){6})((([\\dA-Fa-f]{1,4}((?!\\3)::|:\\b|(?![\\dA-Fa-f])))|(?!\\2\\3)){2}|(((2[0-4]|1\\d|[1-9])?\\d|25[0-5])\\.?\\b){4})" - }, - { - name: "Email address", - value: "(\\w[-.\\w]*)@([-\\w]+(?:\\.[-\\w]+)*)\\.([A-Za-z]{2,4})" - }, - { - name: "URL", - value: "([A-Za-z]+://)([-\\w]+(?:\\.\\w[-\\w]*)+)(:\\d+)?(/[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]*(?:[.!,?]+[^.!,?\"<>\\[\\]{}\\s\\x7F-\\xFF]+)*)?" - }, - { - name: "Domain", - value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b" - }, - { - name: "Windows file path", - value: "([A-Za-z]):\\\\((?:[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61}\\\\?)*[A-Za-z\\d][A-Za-z\\d\\- \\x27_\\(\\)]{0,61})(\\.[A-Za-z\\d]{1,6})?" - }, - { - name: "UNIX file path", - value: "(?:/[A-Za-z\\d.][A-Za-z\\d\\-.]{0,61})+" - }, - { - name: "MAC address", - value: "[A-Fa-f\\d]{2}(?:[:-][A-Fa-f\\d]{2}){5}" - }, - { - name: "Date (yyyy-mm-dd)", - value: "((?:19|20)\\d\\d)[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" - }, - { - name: "Date (dd/mm/yyyy)", - value: "(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.]((?:19|20)\\d\\d)" - }, - { - name: "Date (mm/dd/yyyy)", - value: "(0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])[- /.]((?:19|20)\\d\\d)" - }, - { - name: "Strings", - value: "[A-Za-z\\d/\\-:.,_$%\\x27\"()<>= !\\[\\]{}@]{4,}" - }, - ], - /** - * @constant - * @default - */ - REGEX_CASE_INSENSITIVE: true, - /** - * @constant - * @default - */ - REGEX_MULTILINE_MATCHING: true, - /** - * @constant - * @default - */ - OUTPUT_FORMAT: ["Highlight matches", "List matches", "List capture groups", "List matches with capture groups"], - /** - * @constant - * @default - */ - DISPLAY_TOTAL: false, - - /** - * Regular expression operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {html} - */ - runRegex: function(input, args) { - let userRegex = args[1], - i = args[2], - m = args[3], - displayTotal = args[4], - outputFormat = args[5], - modifiers = "g"; - - if (i) modifiers += "i"; - if (m) modifiers += "m"; - - if (userRegex && userRegex !== "^" && userRegex !== "$") { - try { - const regex = new RegExp(userRegex, modifiers); - - switch (outputFormat) { - case "Highlight matches": - return StrUtils._regexHighlight(input, regex, displayTotal); - case "List matches": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, false)); - case "List capture groups": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, false, true)); - case "List matches with capture groups": - return Utils.escapeHtml(StrUtils._regexList(input, regex, displayTotal, true, true)); - default: - return "Error: Invalid output format"; - } - } catch (err) { - return "Invalid regex. Details: " + err.message; - } - } else { - return Utils.escapeHtml(input); - } - }, - - /** * @constant * @default @@ -187,68 +65,28 @@ const StrUtils = { * @constant * @default */ - SEARCH_TYPE: ["Regex", "Extended (\\n, \\t, \\x...)", "Simple string"], + SPLIT_DELIM_OPTIONS: [ + {name: "Comma", value: ","}, + {name: "Space", value: " "}, + {name: "Line feed", value: "\\n"}, + {name: "CRLF", value: "\\r\\n"}, + {name: "Semi-colon", value: ";"}, + {name: "Colon", value: ":"}, + {name: "Nothing (separate chars)", value: ""} + ], /** * @constant * @default */ - FIND_REPLACE_GLOBAL: true, - /** - * @constant - * @default - */ - FIND_REPLACE_CASE: false, - /** - * @constant - * @default - */ - FIND_REPLACE_MULTILINE: true, - - /** - * Find / Replace operation. - * - * @param {string} input - * @param {Object[]} args - * @returns {string} - */ - runFindReplace: function(input, args) { - let find = args[0].string, - type = args[0].option, - replace = args[1], - g = args[2], - i = args[3], - m = args[4], - modifiers = ""; - - if (g) modifiers += "g"; - if (i) modifiers += "i"; - if (m) modifiers += "m"; - - if (type === "Regex") { - find = new RegExp(find, modifiers); - return input.replace(find, replace); - } - - if (type.indexOf("Extended") === 0) { - find = Utils.parseEscapedChars(find); - } - - find = new RegExp(Utils.escapeRegex(find), modifiers); - - return input.replace(find, replace); - }, - - - /** - * @constant - * @default - */ - SPLIT_DELIM: ",", - /** - * @constant - * @default - */ - DELIMITER_OPTIONS: ["Line feed", "CRLF", "Space", "Comma", "Semi-colon", "Colon", "Nothing (separate chars)"], + JOIN_DELIM_OPTIONS: [ + {name: "Line feed", value: "\\n"}, + {name: "CRLF", value: "\\r\\n"}, + {name: "Space", value: " "}, + {name: "Comma", value: ","}, + {name: "Semi-colon", value: ";"}, + {name: "Colon", value: ":"}, + {name: "Nothing (join chars)", value: ""} + ], /** * Split operation. @@ -258,8 +96,8 @@ const StrUtils = { * @returns {string} */ runSplit: function(input, args) { - let splitDelim = args[0] || StrUtils.SPLIT_DELIM, - joinDelim = Utils.charRep[args[1]], + let splitDelim = args[0], + joinDelim = args[1], sections = input.split(splitDelim); return sections.join(joinDelim); @@ -576,84 +414,6 @@ const StrUtils = { return dist.toString(); }, - - - /** - * Adds HTML highlights to matches within a string. - * - * @private - * @param {string} input - * @param {RegExp} regex - * @param {boolean} displayTotal - * @returns {string} - */ - _regexHighlight: function(input, regex, displayTotal) { - let output = "", - m, - hl = 1, - i = 0, - total = 0; - - while ((m = regex.exec(input))) { - // Add up to match - output += Utils.escapeHtml(input.slice(i, m.index)); - - // Add match with highlighting - output += "" + Utils.escapeHtml(m[0]) + ""; - - // Switch highlight - hl = hl === 1 ? 2 : 1; - - i = regex.lastIndex; - total++; - } - - // Add all after final match - output += Utils.escapeHtml(input.slice(i, input.length)); - - if (displayTotal) - output = "Total found: " + total + "\n\n" + output; - - return output; - }, - - - /** - * Creates a string listing the matches within a string. - * - * @private - * @param {string} input - * @param {RegExp} regex - * @param {boolean} displayTotal - * @param {boolean} matches - Display full match - * @param {boolean} captureGroups - Display each of the capture groups separately - * @returns {string} - */ - _regexList: function(input, regex, displayTotal, matches, captureGroups) { - let output = "", - total = 0, - match; - - while ((match = regex.exec(input))) { - total++; - if (matches) { - output += match[0] + "\n"; - } - if (captureGroups) { - for (let i = 1; i < match.length; i++) { - if (matches) { - output += " Group " + i + ": "; - } - output += match[i] + "\n"; - } - } - } - - if (displayTotal) - output = "Total found: " + total + "\n\n" + output; - - return output; - }, }; export default StrUtils; diff --git a/test/index.js b/test/index.js index 5c397de..e58d7e2 100644 --- a/test/index.js +++ b/test/index.js @@ -30,6 +30,7 @@ import "./tests/operations/MS.js"; import "./tests/operations/PHP.js"; import "./tests/operations/NetBIOS.js"; import "./tests/operations/OTP.js"; +import "./tests/operations/Regex.js"; import "./tests/operations/StrUtils.js"; import "./tests/operations/SeqUtils.js"; diff --git a/test/tests/operations/Regex.js b/test/tests/operations/Regex.js new file mode 100644 index 0000000..dc16910 --- /dev/null +++ b/test/tests/operations/Regex.js @@ -0,0 +1,59 @@ +/** + * StrUtils tests. + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2017 + * @license Apache-2.0 + */ +import TestRegister from "../../TestRegister.js"; + +TestRegister.addTests([ + { + name: "Regex: non-HTML op", + input: "/<>", + expectedOutput: "/<>", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "", true, true, false, false, false, false, "Highlight matches"] + }, + { + "op": "Remove whitespace", + "args": [true, true, true, true, true, false] + } + ], + }, + { + name: "Regex: Dot matches all", + input: "Hello\nWorld", + expectedOutput: "Hello\nWorld", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", ".+", true, true, true, false, false, false, "List matches"] + } + ], + }, + { + name: "Regex: Astral off", + input: "šŒ†šŸ˜†", + expectedOutput: "", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "\\pS", true, true, false, false, false, false, "List matches"] + } + ], + }, + { + name: "Regex: Astral on", + input: "šŒ†šŸ˜†", + expectedOutput: "šŒ†\nšŸ˜†", + recipeConfig: [ + { + "op": "Regular expression", + "args": ["User defined", "\\pS", true, true, false, false, true, false, "List matches"] + } + ], + } +]); diff --git a/test/tests/operations/StrUtils.js b/test/tests/operations/StrUtils.js index 6e66b26..8110d06 100644 --- a/test/tests/operations/StrUtils.js +++ b/test/tests/operations/StrUtils.js @@ -8,21 +8,6 @@ import TestRegister from "../../TestRegister.js"; TestRegister.addTests([ - { - name: "Regex, non-HTML op", - input: "/<>", - expectedOutput: "/<>", - recipeConfig: [ - { - "op": "Regular expression", - "args": ["User defined", "", true, true, false, "Highlight matches"] - }, - { - "op": "Remove whitespace", - "args": [true, true, true, true, true, false] - } - ], - }, { name: "Diff, basic usage", input: "testing23\n\ntesting123",