Merge branch 'feature-hamming'

feature-extract-files
n1474335 2018-01-12 18:28:55 +00:00
commit 8e5d43dfa8
4 changed files with 94 additions and 0 deletions

View File

@ -189,6 +189,7 @@ const Categories = [
"Find / Replace",
"Regular expression",
"Offset checker",
"Hamming Distance",
"Convert distance",
"Convert area",
"Convert mass",

View File

@ -3913,6 +3913,29 @@ const OperationConfig = {
}
]
},
"Hamming Distance": {
module: "Default",
description: "In information theory, the Hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different. In other words, it measures the minimum number of substitutions required to change one string into the other, or the minimum number of errors that could have transformed one string into the other. In a more general context, the Hamming distance is one of several string metrics for measuring the edit distance between two sequences.",
inputType: "string",
outputType: "string",
args: [
{
name: "Delimiter",
type: "binaryShortString",
value: StrUtils.HAMMING_DELIM
},
{
name: "Unit",
type: "option",
value: StrUtils.HAMMING_UNIT
},
{
name: "Input type",
type: "option",
value: StrUtils.HAMMING_INPUT_TYPE
}
]
}
};

View File

@ -110,6 +110,7 @@ OpModules.Default = {
"Unescape string": StrUtils.runUnescape,
"Head": StrUtils.runHead,
"Tail": StrUtils.runTail,
"Hamming Distance": StrUtils.runHamming,
"Remove whitespace": Tidy.runRemoveWhitespace,
"Remove null bytes": Tidy.runRemoveNulls,
"Drop bytes": Tidy.runDropBytes,

View File

@ -509,6 +509,75 @@ const StrUtils = {
},
/**
* @constant
* @default
*/
HAMMING_DELIM: "\\n\\n",
/**
* @constant
* @default
*/
HAMMING_INPUT_TYPE: ["Raw string", "Hex"],
/**
* @constant
* @default
*/
HAMMING_UNIT: ["Byte", "Bit"],
/**
* Hamming Distance operation.
*
* @author GCHQ Contributor [2]
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
runHamming: function(input, args) {
const delim = args[0],
byByte = args[1] === "Byte",
inputType = args[2],
samples = input.split(delim);
if (samples.length !== 2) {
return "Error: You can only calculae the edit distance between 2 strings. Please ensure exactly two inputs are provided, separated by the specified delimiter.";
}
if (samples[0].length !== samples[1].length) {
return "Error: Both inputs must be of the same length.";
}
if (inputType === "Hex") {
samples[0] = Utils.fromHex(samples[0]);
samples[1] = Utils.fromHex(samples[1]);
} else {
samples[0] = Utils.strToByteArray(samples[0]);
samples[1] = Utils.strToByteArray(samples[1]);
}
let dist = 0;
for (let i = 0; i < samples[0].length; i++) {
const lhs = samples[0][i],
rhs = samples[1][i];
if (byByte && lhs !== rhs) {
dist++;
} else if (!byByte) {
let xord = lhs ^ rhs;
while (xord) {
dist++;
xord &= xord - 1;
}
}
}
return dist.toString();
},
/**
* Adds HTML highlights to matches within a string.
*