Tweaks to 'XPath expression' and 'CSS selector' operations. Closes #13.

feature-extract-files
n1474335 2016-12-20 18:49:25 +00:00
parent 650fd9a940
commit 39d50093ae
9 changed files with 189 additions and 169 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -243,6 +243,8 @@ const Categories = [
"SQL Minify",
"CSS Beautify",
"CSS Minify",
"XPath expression",
"CSS selector",
"Strip HTML tags",
"Diff",
]

View File

@ -1939,38 +1939,38 @@ const OperationConfig = {
]
},
"XPath expression": {
description: "Extract information from an xml document with an XPath query",
run: Extract.run_xpath,
description: "Extract information from an XML document with an XPath query",
run: Code.run_xpath,
input_type: "string",
output_type: "string",
args: [
{
name: "XPath",
type: "string",
value: Extract.XPATH_INITIAL
value: Code.XPATH_INITIAL
},
{
name: "Result delimiter",
type: "binary_short_string",
value: Extract.XPATH_DELIMITER
value: Code.XPATH_DELIMITER
}
]
},
"CSS selector": {
description: "Extract information from an HTML document with an CSS selector",
run: Extract.run_css_query,
description: "Extract information from an HTML document with a CSS selector",
run: Code.run_css_query,
input_type: "string",
output_type: "string",
args: [
{
name: "CSS selector",
type: "string",
value: Extract.SELECTOR_INITIAL
value: Code.CSS_SELECTOR_INITIAL
},
{
name: "Delimiter",
type: "binary_short_string",
value: Extract.CSS_QUERY_DELIMITER
value: Code.CSS_QUERY_DELIMITER
},
]
},

14
src/js/lib/xpath.js Normal file → Executable file
View File

@ -1,11 +1,11 @@
(function(){/*
* XPath.js - Pure JavaScript implementation of XPath 2.0 parser and evaluator
*
* Copyright (c) 2012 Sergey Ilinsky
* Dual licensed under the MIT and GPL licenses.
*
*
/** @license
========================================================================
XPath.js - Pure JavaScript implementation of XPath 2.0 parser and evaluator
Copyright (c) 2012 Sergey Ilinsky
Dual licensed under the MIT and GPL licenses.
*/
(function(){
// Javascript objects
var cString = window.String,

View File

@ -1,4 +1,4 @@
/* globals prettyPrintOne, vkbeautify */
/* globals prettyPrintOne, vkbeautify, xpath */
/**
* Code operations.
@ -304,4 +304,119 @@ var Code = {
}
},
/**
* @constant
* @default
*/
XPATH_INITIAL: "",
/**
* @constant
* @default
*/
XPATH_DELIMITER: "\\n",
/**
* XPath expression operation.
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_xpath:function(input, args) {
const query = args[0],
delimiter = args[1];
var xml;
try {
xml = $.parseXML(input);
} catch (err) {
return "Invalid input XML.";
}
var result;
try {
result = xpath.evaluate(xml, query);
} catch (err) {
return "Invalid XPath. Details:\n" + err.message;
}
const serializer = new XMLSerializer();
const node_to_string = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return serializer.serializeToString(node);
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.DOCUMENT_NODE: return serializer.serializeToString(node);
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Object.keys(result).map(function(key) {
return result[key];
}).slice(0, -1) // all values except last (length)
.map(node_to_string)
.join(delimiter);
},
/**
* @constant
* @default
*/
CSS_SELECTOR_INITIAL: "",
/**
* @constant
* @default
*/
CSS_QUERY_DELIMITER: "\\n",
/**
* CSS selector operation.
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_css_query: function(input, args) {
const query = args[0],
delimiter = args[1];
var html;
try {
html = $.parseHTML(input);
} catch (err) {
return "Invalid input HTML.";
}
var result;
try {
result = $(html).find(query);
} catch (err) {
return "Invalid CSS Selector. Details:\n" + err.message;
}
const node_to_string = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return node.outerHTML;
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.TEXT_NODE: return node.wholeText;
case Node.DOCUMENT_NODE: return node.outerHTML;
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Array.apply(null, Array(result.length))
.map(function(_, i) {
return result[i];
})
.map(node_to_string)
.join(delimiter);
},
};

View File

@ -1,5 +1,3 @@
/* globals xpath */
/**
* Identifier extraction operations.
*
@ -296,117 +294,4 @@ var Extract = {
return output;
},
/**
* @constant
* @default
*/
XPATH_INITIAL: "",
/**
* @constant
* @default
*/
XPATH_DELIMITER: "\\n",
/**
* Extract information (from an xml document) with an XPath query
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_xpath:function(input, args) {
const query = args[0];
const delimiter = args[1];
var xml;
try {
xml = $.parseXML(input);
} catch (err) {
return "Invalid input XML.";
}
var result;
try {
result = xpath.evaluate(xml, query);
} catch (err) {
return "Invalid XPath. Details:\n" + err.message;
}
const serializer = new XMLSerializer();
const nodeToString = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return serializer.serializeToString(node);
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.DOCUMENT_NODE: return serializer.serializeToString(node);
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Object.values(result).slice(0, -1) // all values except last (length)
.map(nodeToString)
.join(delimiter);
},
/**
* @constant
* @default
*/
SELECTOR_INITIAL: "",
/**
* @constant
* @default
*/
CSS_QUERY_DELIMITER: "\\n",
/**
* Extract information (from an hmtl document) with an css selector
*
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
*
* @param {string} input
* @param {Object[]} args
* @returns {string}
*/
run_css_query: function(input, args) {
const query = args[0];
const delimiter = args[1];
var html;
try {
html = $.parseHTML(input);
} catch (err) {
return "Invalid input HTML.";
}
var result;
try {
result = $(html).find(query);
} catch (err) {
return "Invalid CSS Selector. Details:\n" + err.message;
}
const nodeToString = function(node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE: return node.outerHTML;
case Node.ATTRIBUTE_NODE: return node.value;
case Node.COMMENT_NODE: return node.data;
case Node.TEXT_NODE: return node.wholeText;
case Node.DOCUMENT_NODE: return node.outerHTML;
default: throw new Error("Unknown Node Type: " + node.nodeType);
}
};
return Array.apply(null, Array(result.length))
.map(function(_, i) {
return result[i];
})
.map(nodeToString)
.join(delimiter);
},
};

View File

@ -1,21 +1,21 @@
203 source files
104466 lines
4.0M size
204 source files
113086 lines
4.2M size
136 JavaScript source files
95316 lines
3.4M size
137 JavaScript source files
103936 lines
3.7M size
78 third party JavaScript source files
76377 lines
2.7M size
79 third party JavaScript source files
84842 lines
3.0M size
58 first party JavaScript source files
18939 lines
19094 lines
724K size
3.2M uncompressed JavaScript size
3.4M uncompressed JavaScript size
1.7M compressed JavaScript size
15 categories
155 operations
157 operations