Added ELF extractor. You can now specific which categories to search for in file type operations.

feature-extract-files
n1474335 2019-01-14 18:55:10 +00:00
parent 2307325af8
commit cd2c8078c8
5 changed files with 111 additions and 38 deletions

View File

@ -678,7 +678,7 @@ export const FILE_SIGNATURES = {
2: 0x4c,
3: 0x46
},
extractor: null
extractor: extractELF
},
{
name: "Adobe Flash",
@ -1474,6 +1474,50 @@ export function extractZlib(bytes, offset) {
}
/**
* ELF extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractELF(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Skip over magic number
stream.moveForwardsBy(4);
// Read architecture (x86 == 1, x64 == 2)
const x86 = stream.readInt(1) === 1;
// Read endianness (1 == little, 2 == big)
const endian = stream.readInt(1) === 1 ? "le" : "be";
// Skip over header values
stream.moveForwardsBy(x86 ? 26 : 34);
// Read section header table offset
const shoff = x86 ? stream.readInt(4, endian) : stream.readInt(8, endian);
// Skip over flags, header size and program header size and entries
stream.moveForwardsBy(10);
// Read section header table entry size
const shentsize = stream.readInt(2, endian);
// Read number of entries in the section header table
const shnum = stream.readInt(2, endian);
// Jump to section header table
stream.moveTo(shoff);
// Move past each section header
stream.moveForwardsBy(shentsize * shnum);
return stream.carve();
}
/**
* Steps through a DEFLATE stream
*

View File

@ -75,22 +75,29 @@ function bytesMatch(sig, buf, offset=0) {
* extension and mime type.
*
* @param {Uint8Array} buf
* @param {string[]} [categories=All] - Which categories of file to look for
* @returns {Object[]} types
* @returns {string} type.name - Name of file type
* @returns {string} type.ext - File extension
* @returns {string} type.mime - Mime type
* @returns {string} [type.desc] - Description
*/
export function detectFileType(buf) {
export function detectFileType(buf, categories=Object.keys(FILE_SIGNATURES)) {
if (!(buf && buf.length > 1)) {
return [];
}
const matchingFiles = [];
const signatures = {};
// TODO allow user to select which categories to check
for (const cat in FILE_SIGNATURES) {
const category = FILE_SIGNATURES[cat];
if (categories.includes(cat)) {
signatures[cat] = FILE_SIGNATURES[cat];
}
}
for (const cat in signatures) {
const category = signatures[cat];
category.forEach(filetype => {
if (signatureMatches(filetype.signature, buf)) {
@ -107,6 +114,7 @@ export function detectFileType(buf) {
* the extensions and mime types.
*
* @param {Uint8Array} buf
* @param {string[]} [categories=All] - Which categories of file to look for
* @returns {Object[]} foundFiles
* @returns {number} foundFiles.offset - The position in the buffer at which this file was found
* @returns {Object} foundFiles.fileDetails
@ -115,16 +123,22 @@ export function detectFileType(buf) {
* @returns {string} foundFiles.fileDetails.mime - Mime type
* @returns {string} [foundFiles.fileDetails.desc] - Description
*/
export function scanForFileTypes(buf) {
export function scanForFileTypes(buf, categories=Object.keys(FILE_SIGNATURES)) {
if (!(buf && buf.length > 1)) {
return [];
}
const foundFiles = [];
const signatures = {};
// TODO allow user to select which categories to check
for (const cat in FILE_SIGNATURES) {
const category = FILE_SIGNATURES[cat];
if (categories.includes(cat)) {
signatures[cat] = FILE_SIGNATURES[cat];
}
}
for (const cat in signatures) {
const category = signatures[cat];
for (let i = 0; i < category.length; i++) {
const filetype = category[i];

View File

@ -6,6 +6,7 @@
import Operation from "../Operation";
import {detectFileType} from "../lib/FileType";
import {FILE_SIGNATURES} from "../lib/FileSignatures";
/**
* Detect File Type operation
@ -24,7 +25,13 @@ class DetectFileType extends Operation {
this.infoURL = "https://wikipedia.org/wiki/List_of_file_signatures";
this.inputType = "ArrayBuffer";
this.outputType = "string";
this.args = [];
this.args = Object.keys(FILE_SIGNATURES).map(cat => {
return {
name: cat,
type: "boolean",
value: true
};
});
}
/**
@ -34,7 +41,13 @@ class DetectFileType extends Operation {
*/
run(input, args) {
const data = new Uint8Array(input),
types = detectFileType(data);
categories = [];
args.forEach((cat, i) => {
if (cat) categories.push(Object.keys(FILE_SIGNATURES)[i]);
});
const types = detectFileType(data, categories);
if (!types.length) {
return "Unknown file type. Have you tried checking the entropy of this data to determine whether it might be encrypted or compressed?";

View File

@ -8,6 +8,7 @@ import Operation from "../Operation";
// import OperationError from "../errors/OperationError";
import Utils from "../Utils";
import {scanForFileTypes, extractFile} from "../lib/FileType";
import {FILE_SIGNATURES} from "../lib/FileSignatures";
/**
* Extract Files operation
@ -27,7 +28,13 @@ class ExtractFiles extends Operation {
this.inputType = "ArrayBuffer";
this.outputType = "List<File>";
this.presentType = "html";
this.args = [];
this.args = Object.keys(FILE_SIGNATURES).map(cat => {
return {
name: cat,
type: "boolean",
value: cat === "Miscellaneous" ? false : true
};
});
}
/**
@ -36,10 +43,15 @@ class ExtractFiles extends Operation {
* @returns {List<File>}
*/
run(input, args) {
const bytes = new Uint8Array(input);
const bytes = new Uint8Array(input),
categories = [];
args.forEach((cat, i) => {
if (cat) categories.push(Object.keys(FILE_SIGNATURES)[i]);
});
// Scan for embedded files
const detectedFiles = scanForFileTypes(bytes);
const detectedFiles = scanForFileTypes(bytes, categories);
// Extract each file that we support
const files = [];

View File

@ -7,6 +7,7 @@
import Operation from "../Operation";
import Utils from "../Utils";
import {scanForFileTypes} from "../lib/FileType";
import {FILE_SIGNATURES} from "../lib/FileSignatures";
/**
* Scan for Embedded Files operation
@ -25,13 +26,13 @@ class ScanForEmbeddedFiles extends Operation {
this.infoURL = "https://wikipedia.org/wiki/List_of_file_signatures";
this.inputType = "ArrayBuffer";
this.outputType = "string";
this.args = [
{
"name": "Ignore common byte sequences",
"type": "boolean",
"value": true
}
];
this.args = Object.keys(FILE_SIGNATURES).map(cat => {
return {
name: cat,
type: "boolean",
value: cat === "Miscellaneous" ? false : true
};
});
}
/**
@ -41,21 +42,18 @@ class ScanForEmbeddedFiles extends Operation {
*/
run(input, args) {
let output = "Scanning data for 'magic bytes' which may indicate embedded files. The following results may be false positives and should not be treat as reliable. Any suffiently long file is likely to contain these magic bytes coincidentally.\n",
numFound = 0,
numCommonFound = 0;
const ignoreCommon = args[0],
commonExts = ["ttf", "utf16le", ""],
data = new Uint8Array(input),
types = scanForFileTypes(data);
numFound = 0;
const categories = [],
data = new Uint8Array(input);
args.forEach((cat, i) => {
if (cat) categories.push(Object.keys(FILE_SIGNATURES)[i]);
});
const types = scanForFileTypes(data, categories);
if (types.length) {
types.forEach(type => {
if (ignoreCommon && commonExts.indexOf(type.fileDetails.extension) > -1) {
numCommonFound++;
return;
}
numFound++;
output += "\nOffset " + type.offset + " (0x" + Utils.hex(type.offset) + "):\n" +
" File extension: " + type.fileDetails.extension + "\n" +
@ -71,14 +69,6 @@ class ScanForEmbeddedFiles extends Operation {
output += "\nNo embedded files were found.";
}
if (numCommonFound > 0) {
output += "\n\n" + numCommonFound;
output += numCommonFound === 1 ?
" file type was detected that has a common byte sequence. This is likely to be a false positive." :
" file types were detected that have common byte sequences. These are likely to be false positives.";
output += " Run this operation with the 'Ignore common byte sequences' option unchecked to see details.";
}
return output;
}