Added speculative execution of recipes to determine the most likely decodings.

feature-extract-files
n1474335 2018-01-22 22:06:26 +00:00
parent 57314b77e5
commit 28abd00d82
3 changed files with 71 additions and 8 deletions

View File

@ -171,7 +171,7 @@ const FlowControl = {
* @returns {Object} The updated state of the recipe.
*/
runJump: function(state) {
const ings = state.opList[state.progress].getIngValues(),
const ings = state.opList[state.progress].getIngValues(),
label = ings[0],
maxJumps = ings[1],
jmpIndex = FlowControl._getLabelIndex(label, state);
@ -199,7 +199,7 @@ const FlowControl = {
* @returns {Object} The updated state of the recipe.
*/
runCondJump: function(state) {
const ings = state.opList[state.progress].getIngValues(),
const ings = state.opList[state.progress].getIngValues(),
dish = state.dish,
regexStr = ings[0],
invert = ings[1],
@ -263,13 +263,14 @@ const FlowControl = {
* @param {Operation[]} state.opList - The list of operations in the recipe.
* @returns {Object} The updated state of the recipe.
*/
runMagic: function(state) {
const dish = state.dish,
runMagic: async function(state) {
const ings = state.opList[state.progress].getIngValues(),
depth = ings[0],
dish = state.dish,
magic = new Magic(dish.get(Dish.ARRAY_BUFFER));
const output = JSON.stringify(magic.findMatchingOps(), null, 2) + "\n\n" +
JSON.stringify(magic.detectFileType(), null, 2) + "\n\n" +
JSON.stringify(magic.detectLanguage(), null, 2);
const specExec = await magic.speculativeExecution(depth+1);
const output = JSON.stringify(specExec, null, 2);
dish.set(output, Dish.STRING);
return state;

View File

@ -87,7 +87,13 @@ const OperationConfig = {
inputType: "ArrayBuffer",
outputType: "string",
flowControl: true,
args: []
args: [
{
name: "Depth",
type: "number",
value: 3
}
]
},
"Fork": {
module: "Default",

View File

@ -1,5 +1,7 @@
import OperationConfig from "../config/MetaConfig.js";
import Utils from "../Utils.js";
import Recipe from "../Recipe.js";
import Dish from "../Dish.js";
import FileType from "../operations/FileType.js";
@ -90,6 +92,60 @@ class Magic {
return FileType.magicType(this.inputBuffer);
}
/**
* Speculatively executes matching operations, recording metadata of each result.
*
* @param {number} [depth=1] - How many levels to try to execute
* @param {Object[]} [recipeConfig=[]] - The recipe configuration up to this point
* @returns {Object[]} A sorted list of the recipes most likely to result in correct decoding
*/
async speculativeExecution(depth = 1, recipeConfig = []) {
if (depth === 0) return [];
let results = [];
// Record the properties of the current data
results.push({
recipe: recipeConfig,
data: this.inputStr.slice(0, 100),
languageScores: this.detectLanguage(),
fileType: this.detectFileType(),
});
// Find any operations that can be run on this data
const matchingOps = this.findMatchingOps();
// Execute each of those operations, then recursively call the speculativeExecution() method
// on the resulting data, recording the properties of each option.
await Promise.all(matchingOps.map(async op => {
const dish = new Dish(this.inputBuffer, Dish.ARRAY_BUFFER),
opConfig = {
op: op.op,
args: op.args
},
recipe = new Recipe([opConfig]);
await recipe.execute(dish, 0);
const magic = new Magic(dish.get(Dish.ARRAY_BUFFER)),
speculativeResults = await magic.speculativeExecution(depth-1, [...recipeConfig, opConfig]);
results = results.concat(speculativeResults);
}));
// Return a sorted list of possible recipes along with their properties
return results.sort((a, b) => {
// Each option is sorted based on its most likely language (lower is better)
let aScore = a.languageScores[0].chiSqr,
bScore = b.languageScores[0].chiSqr;
// If a recipe results in a file being detected, it receives a relatively good score
if (a.fileType) aScore = 500;
if (b.fileType) bScore = 500;
return aScore - bScore;
});
}
/**
* Calculates the number of times each byte appears in the input
*