From ec488e910c46dbec7ca7029f19359b60da71283e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sat, 17 Sep 2022 11:51:24 +0200 Subject: [PATCH 01/10] cleanup comments --- index.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index f1394a4..9ecf241 100644 --- a/index.js +++ b/index.js @@ -31,9 +31,10 @@ const excludes = [ * @returns {string} an string with the name of the section related with the input heading */ function getSectionNameFromHeadingContent(children) { + // visit nodes in depth const walk = (children, depth) => children.reduce((text, node, index) => { - if (!node || !node.type) return text; + if (!node || !node.type) return text; // not AST, maybe plain text switch (node.type) { // // meaningfull nodes From 74d932fd214625b972ade57275008f6f9fd0b4e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sat, 17 Sep 2022 12:08:38 +0200 Subject: [PATCH 02/10] introduce wrapText function --- index.js | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 9ecf241..7eb5ac7 100644 --- a/index.js +++ b/index.js @@ -22,6 +22,18 @@ const excludes = [ "SUMMARY.md", ]; +/** + * Wraps a string between other that acts as token. + * @param {string} text - the text to wrap + * @param {string} token - the text to wrap with between + * @returns a string in the form `${token}${text}${token}` + */ +function wrapText(text, token = "") { + // avoid mix concatenate/sum string/numbers using array join hack + //return `${token}${text}${token}`; + return [token, token].join(String(text)); +} + /** * Parses the contents of a heading from remark-parse into a readable format. * @@ -40,13 +52,13 @@ function getSectionNameFromHeadingContent(children) { // meaningfull nodes // case "emphasis": - text += "_" + walk(node.children, depth + 1) + "_"; + text += wrapText(walk(node.children, depth + 1), "_"); break; case "inlineCode": - text += "`" + node.value + "`"; + text += wrapText(node.value, "`"); break; case "strong": - text += "**" + walk(node.children, depth + 1) + "**"; + text += wrapText(walk(node.children, depth + 1), "**"); break; case "text": text += node.value; From db5f8992ef07b9efefff6f8b94d21f682089b9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sat, 17 Sep 2022 12:38:25 +0200 Subject: [PATCH 03/10] extract algorithm to `getLinkTextFromLinkNodes` preserving current behaviour --- index.js | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 7eb5ac7..a041565 100644 --- a/index.js +++ b/index.js @@ -80,6 +80,18 @@ function getSectionNameFromHeadingContent(children) { return walk(children, 0); } +/** + * Parses the contents of a heading from remark-parse into a readable format. + * + * @param {Array} children - an array of AST items defined by remark-parse for + * the content of headings (H1..H7) + * + * @returns {string} an string with the name of the section related with the input heading + */ +function getLinkTextFromLinkNodes(children) { + return children[0].value; +} + /** * Parses a list item generated from remark-parse into a readable format. * @@ -103,8 +115,8 @@ function parseListItem(listItem) { let leftParen, rightParen = -1; // If we need to parse parenthesized text const [link, ...otherStuff] = listItem; // head of listItem = url, the rest is "other stuff" + entry.title = getLinkTextFromLinkNodes(link.children); entry.url = link.url; - entry.title = link.children[0].value; // remember to get OTHER STUFF!! remember there may be multiple links! for (let i of otherStuff) { if (s === "") { @@ -130,7 +142,7 @@ function parseListItem(listItem) { // other links found if (entry.otherLinks === undefined) entry.otherLinks = []; entry.otherLinks.push({ - title: stripParens(i.children[0].value), + title: stripParens(getLinkTextFromLinkNodes(i.children)), url: i.url, }); // entry.otherLinks = [...entry.otherLinks, {title: i.children[0].value, url: i.url}]; // <-- i wish i could get this syntax to work with arrays From 8f9bd348ea4aa90605356a48f950da23663d59bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sat, 17 Sep 2022 12:43:21 +0200 Subject: [PATCH 04/10] refactor: move `stripParens` to next upper closure --- index.js | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/index.js b/index.js index a041565..2a25784 100644 --- a/index.js +++ b/index.js @@ -22,6 +22,16 @@ const excludes = [ "SUMMARY.md", ]; +/** + * Strip wrapped parenthesis from a string. + * @param {string} s - the string to process + * @returns {string} the stripped string if parens found, the input string if don't + */ +function stripParens(s) { + if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1); + return s; +} + /** * Wraps a string between other that acts as token. * @param {string} text - the text to wrap @@ -106,10 +116,6 @@ function getLinkTextFromLinkNodes(children) { * @return {Object} Returns an Object containing details about the piece of media. */ function parseListItem(listItem) { - let stripParens = function (s) { - if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1); - return s; - }; let entry = {}; let s = ""; // If we need to build up a string over multiple listItem elements let leftParen, From 8cab19ca3d2cf9901356e2f147a540bb7ea1b1d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sat, 17 Sep 2022 14:44:17 +0200 Subject: [PATCH 05/10] implements `getLinkTextFromLinkNodes` Resolves #8 --- index.js | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index 2a25784..01e53d6 100644 --- a/index.js +++ b/index.js @@ -32,6 +32,21 @@ function stripParens(s) { return s; } +/** + * To String + * @param {any} o - the object to get it text representation + * @returns the `o` as string + */ +function toString(o) { + // null or undefined + if (o === null || o === void 0) return o; + if (typeof o === "string") return o; + // has a toString function in their prototype + if (typeof o.toString === "function") return o.toString(); + // as string in the latest intent + return String(o); +} + /** * Wraps a string between other that acts as token. * @param {string} text - the text to wrap @@ -99,7 +114,54 @@ function getSectionNameFromHeadingContent(children) { * @returns {string} an string with the name of the section related with the input heading */ function getLinkTextFromLinkNodes(children) { - return children[0].value; + // visit nodes in depth + const walk = (children, depth) => { + // not AST, maybe plain text + if (!Array.isArray(children)) return toString(children); + // AST children array nodes + return children.reduce((text, node, index) => { + if (!node || !node.type) return text; // not AST, maybe plain text + switch (node.type) { + // + // rebuild meaningfull nodes + // + case "emphasis": + // {type: 'emphasis', children: [...], position: {...}} + text += wrapText(walk(node.children, depth + 1), "_"); + break; + case "image": + // {type: 'image', title: '...', url: '...', alt: '...', position: {...}} + text += `![${node.alt || node.title}](${node.url})`; + break; + case "inlineCode": + // {type: 'inlineCode', value: '...', position: {...}} + text += wrapText(node.value, "`"); + break; + case "strong": + // {type: 'strong', children: [...], position: {...}} + text += wrapText(walk(node.children, depth + 1), "**"); + break; + case "text": + // {type: 'text', value: '...', position: {...}} + text += node.value; + break; + // + // skipped nodes + // + default: + console.log( + "getLinkTextFromLinkNodes::skipped", + depth, + node.type, + node + ); + break; + } + return text; + }, ""); + }; + + return walk(children, 0); } /** @@ -120,8 +182,10 @@ function parseListItem(listItem) { let s = ""; // If we need to build up a string over multiple listItem elements let leftParen, rightParen = -1; // If we need to parse parenthesized text - const [link, ...otherStuff] = listItem; // head of listItem = url, the rest is "other stuff" - entry.title = getLinkTextFromLinkNodes(link.children); + // head of listItem = url, the rest is "other stuff" + const [link, ...otherStuff] = listItem; + // link.children || link.value => weak way to check if link.type === "link" + entry.title = getLinkTextFromLinkNodes(link.children || link.value); entry.url = link.url; // remember to get OTHER STUFF!! remember there may be multiple links! for (let i of otherStuff) { From bdd243733a3660774a4fac51850216279ed46e05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sat, 17 Sep 2022 14:47:01 +0200 Subject: [PATCH 06/10] revert swap url-title in json --- index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.js b/index.js index 01e53d6..4104178 100644 --- a/index.js +++ b/index.js @@ -184,9 +184,9 @@ function parseListItem(listItem) { rightParen = -1; // If we need to parse parenthesized text // head of listItem = url, the rest is "other stuff" const [link, ...otherStuff] = listItem; + entry.url = link.url; // link.children || link.value => weak way to check if link.type === "link" entry.title = getLinkTextFromLinkNodes(link.children || link.value); - entry.url = link.url; // remember to get OTHER STUFF!! remember there may be multiple links! for (let i of otherStuff) { if (s === "") { From 42040f180047add9b68d48e91a355306a6727d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sun, 18 Sep 2022 09:20:59 +0200 Subject: [PATCH 07/10] cleanup + refactor + modularize - cleanup: use `remarkTokenAST` to get the node template - extract String functions to a new file under `lib/functions` - move `stripParens`, `toString`, `wrapText` to it. - Use `Strings.templater` to expand AST template with values --- index.js | 122 +++++++++++++++++++-------------------- lib/functions/Strings.js | 61 ++++++++++++++++++++ lib/functions/index.js | 5 ++ 3 files changed, 126 insertions(+), 62 deletions(-) create mode 100644 lib/functions/Strings.js create mode 100644 lib/functions/index.js diff --git a/index.js b/index.js index 4104178..6226625 100644 --- a/index.js +++ b/index.js @@ -3,6 +3,7 @@ const fs = require("fs"); const path = require("path"); const remark = require("remark"); +const { Strings } = require("./lib/functions"); const languages = require("./languages"); const commandLineArgs = require("command-line-args"); @@ -22,43 +23,6 @@ const excludes = [ "SUMMARY.md", ]; -/** - * Strip wrapped parenthesis from a string. - * @param {string} s - the string to process - * @returns {string} the stripped string if parens found, the input string if don't - */ -function stripParens(s) { - if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1); - return s; -} - -/** - * To String - * @param {any} o - the object to get it text representation - * @returns the `o` as string - */ -function toString(o) { - // null or undefined - if (o === null || o === void 0) return o; - if (typeof o === "string") return o; - // has a toString function in their prototype - if (typeof o.toString === "function") return o.toString(); - // as string in the latest intent - return String(o); -} - -/** - * Wraps a string between other that acts as token. - * @param {string} text - the text to wrap - * @param {string} token - the text to wrap with between - * @returns a string in the form `${token}${text}${token}` - */ -function wrapText(text, token = "") { - // avoid mix concatenate/sum string/numbers using array join hack - //return `${token}${text}${token}`; - return [token, token].join(String(text)); -} - /** * Parses the contents of a heading from remark-parse into a readable format. * @@ -77,16 +41,16 @@ function getSectionNameFromHeadingContent(children) { // meaningfull nodes // case "emphasis": - text += wrapText(walk(node.children, depth + 1), "_"); + case "strong": + text += Strings.templater(remarkTokenAST(node), { + text: walk(node.children, depth + 1), + }); break; case "inlineCode": - text += wrapText(node.value, "`"); - break; - case "strong": - text += wrapText(walk(node.children, depth + 1), "**"); - break; case "text": - text += node.value; + text += Strings.templater(remarkTokenAST(node), { + text: node.value, + }); break; // // skipped nodes @@ -117,7 +81,7 @@ function getLinkTextFromLinkNodes(children) { // visit nodes in depth const walk = (children, depth) => { // not AST, maybe plain text - if (!Array.isArray(children)) return toString(children); + if (!Array.isArray(children)) return Strings.toString(children); // AST children array nodes return children.reduce((text, node, index) => { if (!node || !node.type) return text; // not AST, maybe plain text @@ -125,25 +89,23 @@ function getLinkTextFromLinkNodes(children) { // // rebuild meaningfull nodes // - case "emphasis": - // {type: 'emphasis', children: [...], position: {...}} - text += wrapText(walk(node.children, depth + 1), "_"); - break; case "image": - // {type: 'image', title: '...', url: '...', alt: '...', position: {...}} - text += `![${node.alt || node.title}](${node.url})`; + text += Strings.templater(remarkTokenAST(node), { + text: node.alt || node.title, + url: node.url, + }); break; case "inlineCode": - // {type: 'inlineCode', value: '...', position: {...}} - text += wrapText(node.value, "`"); - break; - case "strong": - // {type: 'strong', children: [...], position: {...}} - text += wrapText(walk(node.children, depth + 1), "**"); - break; case "text": - // {type: 'text', value: '...', position: {...}} - text += node.value; + text += Strings.templater(remarkTokenAST(node), { + text: node.value, + }); + break; + case "emphasis": + case "strong": + text += Strings.templater(remarkTokenAST(node), { + text: walk(node.children, depth + 1), + }); break; // // skipped nodes @@ -164,6 +126,40 @@ function getLinkTextFromLinkNodes(children) { return walk(children, 0); } +/** + * Gets the template related with AST remark-parse node. + * @param {Object} node - AST node defined by remark-parse + * @returns {string} - the template string + */ +function remarkTokenAST(node) { + if (node && node.type) { + switch (node.type) { + case "emphasis": // {type: 'emphasis', children: [...], position: {...}} + return Strings.wrap("{{text}}", "_"); + case "heading": // {type: 'heading', depth: 1, children: [...], position: {...}} + return ["#".repeat(item.depth || 0), "{{text}}"].join(""); + case "image": // {type: 'image', title: '...', url: '...', alt: '...', position: {...}} + return "![{{text}}]({{url}})"; + case "inlineCode": // {type: 'inlineCode', value: '...', position: {...}} + return Strings.wrap("{{text}}", "`"); + case "link": // {type: 'link', title: '...', url: '...', children: [...], position: {...}} + return "[{{text}}]({{url}})"; + case "list": // {type: 'list', ordered: false, start: null, spread: false, children: [...], position: {...}} + case "listItem": // {type: 'listItem', spread: false, checked: null, children: [...], position: {...}} + // TODO: generate token for list/listItem + break; + case "strong": // {type: 'strong', children: [...], position: {...}} + return Strings.wrap("{{text}}", "**"); + case "html": // {type: 'html', value: '...', position: {...}} + case "text": // {type: 'text', value: '...', position: {...}} + return Strings.wrap("{{text}}"); // identity + default: + break; + } + } + throw new Error("Unrecognized remark node type: " + (node && node.type)); +} + /** * Parses a list item generated from remark-parse into a readable format. * @@ -212,7 +208,7 @@ function parseListItem(listItem) { // other links found if (entry.otherLinks === undefined) entry.otherLinks = []; entry.otherLinks.push({ - title: stripParens(getLinkTextFromLinkNodes(i.children)), + title: Strings.stripParens(getLinkTextFromLinkNodes(i.children)), url: i.url, }); // entry.otherLinks = [...entry.otherLinks, {title: i.children[0].value, url: i.url}]; // <-- i wish i could get this syntax to work with arrays @@ -249,7 +245,9 @@ function parseListItem(listItem) { s += i.value; } else { // finally, we have reached the end of the note - entry.notes.push(stripParens(s + i.value.slice(0, rightParen + 1))); + entry.notes.push( + Strings.stripParens(s + i.value.slice(0, rightParen + 1)) + ); s = ""; // this is a copypaste of another block of code. probably not a good thing tbh. leftParen = i.value.indexOf("("); diff --git a/lib/functions/Strings.js b/lib/functions/Strings.js new file mode 100644 index 0000000..71fddd0 --- /dev/null +++ b/lib/functions/Strings.js @@ -0,0 +1,61 @@ +/** + * Strip wrapped parenthesis from a string. + * @param {string} s - the string to process + * @returns {string} the stripped string if parens found, the input string if don't + */ +function stripParens(s) { + // null or undefined + if (s === null || s === void 0) return s; + // is wrapped by ( and )?, then unwrap + if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1); + // leave as it is + return s; +} + +/** + * Replaces a data tokens in a template string. + * @param {string} template - the template string + * @param {object} context - the data used to replace the tokens with + * @returns string replace + */ +function templater(template, context = {}) { + // replaceAll using a replacer function + return template.replace( + /{{([^{}]+)}}/g, // {{key}} + (matchedText, key) => context[key] || "" + ); +} + +/** + * To string + * @param {any} o - the object to get it text representation + * @returns {string} the `o` as string + */ +function toString(o) { + // null or undefined + if (o === null || o === void 0) return o; + if (typeof o === "string") return o; + // has a toString function in their prototype + if (typeof o.toString === "function") return o.toString(); + // as string in the latest intent + return String(o); +} + +/** + * Wraps a string between other that acts as token. + * @param {string} s - the text to wrap + * @param {string} token - the text to wrap with between + * @returns a string in the form `${token}${s}${token}` + */ +function wrap(s, token = "") { + // avoid mix concatenate/sum string/numbers using array join hack + //return `${token}${s}${token}`; + return [token, token].join(s); +} + +module.exports = { + stripParens, + templater, + toString, + wrap, +}; diff --git a/lib/functions/index.js b/lib/functions/index.js new file mode 100644 index 0000000..293aedc --- /dev/null +++ b/lib/functions/index.js @@ -0,0 +1,5 @@ +const Strings = require("./Strings"); + +module.exports = { + Strings, +}; From af804ee32958414dacd9019ac174958c153c179e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sun, 18 Sep 2022 14:30:45 +0200 Subject: [PATCH 08/10] register break and paragraph as valid AST types --- index.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/index.js b/index.js index 6226625..f2e3437 100644 --- a/index.js +++ b/index.js @@ -134,6 +134,8 @@ function getLinkTextFromLinkNodes(children) { function remarkTokenAST(node) { if (node && node.type) { switch (node.type) { + case "break": // {type: 'break', position: {...}} + return "
"; case "emphasis": // {type: 'emphasis', children: [...], position: {...}} return Strings.wrap("{{text}}", "_"); case "heading": // {type: 'heading', depth: 1, children: [...], position: {...}} @@ -151,6 +153,7 @@ function remarkTokenAST(node) { case "strong": // {type: 'strong', children: [...], position: {...}} return Strings.wrap("{{text}}", "**"); case "html": // {type: 'html', value: '...', position: {...}} + case "paragraph": // {type: 'paragraph', children: [...], position: {...}} case "text": // {type: 'text', value: '...', position: {...}} return Strings.wrap("{{text}}"); // identity default: From d9dacf2dd1167ddfe2341190b68e789e18593455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Sun, 18 Sep 2022 14:36:10 +0200 Subject: [PATCH 09/10] docs: fix jsdocs --- index.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/index.js b/index.js index f2e3437..96a44dc 100644 --- a/index.js +++ b/index.js @@ -70,12 +70,12 @@ function getSectionNameFromHeadingContent(children) { } /** - * Parses the contents of a heading from remark-parse into a readable format. + * Parses the contents of a link from remark-parse into a readable format. * * @param {Array} children - an array of AST items defined by remark-parse for - * the content of headings (H1..H7) + * the content of a link (A) * - * @returns {string} an string with the name of the section related with the input heading + * @returns {string} an string with the text of the related input link */ function getLinkTextFromLinkNodes(children) { // visit nodes in depth From f4b24bdefebe86f70b6c71ab5bb673524400a71a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Ord=C3=A1s?= <3125580+davorpa@users.noreply.github.com> Date: Tue, 20 Sep 2022 17:55:32 +0200 Subject: [PATCH 10/10] refactor: move `String.toString` -> `Objects.toString` --- index.js | 4 ++-- lib/functions/Objects.js | 19 +++++++++++++++++++ lib/functions/Strings.js | 16 ---------------- lib/functions/index.js | 2 ++ 4 files changed, 23 insertions(+), 18 deletions(-) create mode 100644 lib/functions/Objects.js diff --git a/index.js b/index.js index 96a44dc..25ec128 100644 --- a/index.js +++ b/index.js @@ -3,7 +3,7 @@ const fs = require("fs"); const path = require("path"); const remark = require("remark"); -const { Strings } = require("./lib/functions"); +const { Objects, Strings } = require("./lib/functions"); const languages = require("./languages"); const commandLineArgs = require("command-line-args"); @@ -81,7 +81,7 @@ function getLinkTextFromLinkNodes(children) { // visit nodes in depth const walk = (children, depth) => { // not AST, maybe plain text - if (!Array.isArray(children)) return Strings.toString(children); + if (!Array.isArray(children)) return Objects.toString(children); // AST children array nodes return children.reduce((text, node, index) => { if (!node || !node.type) return text; // not AST, maybe plain text diff --git a/lib/functions/Objects.js b/lib/functions/Objects.js new file mode 100644 index 0000000..9c1fa12 --- /dev/null +++ b/lib/functions/Objects.js @@ -0,0 +1,19 @@ +/** + * To string + * @param {any} o - the object to get it text representation + * @returns {string} the `o` as string + */ +function toString(o) { + // null or undefined + if (o === null || o === void 0) return o; + // is string + if (typeof o === "string") return o; + // has a toString function in their prototype + if (typeof o.toString === "function") return o.toString(); + // as string in the latest intent + return String(o); +} + +module.exports = { + toString, +}; diff --git a/lib/functions/Strings.js b/lib/functions/Strings.js index 71fddd0..a660b8b 100644 --- a/lib/functions/Strings.js +++ b/lib/functions/Strings.js @@ -26,21 +26,6 @@ function templater(template, context = {}) { ); } -/** - * To string - * @param {any} o - the object to get it text representation - * @returns {string} the `o` as string - */ -function toString(o) { - // null or undefined - if (o === null || o === void 0) return o; - if (typeof o === "string") return o; - // has a toString function in their prototype - if (typeof o.toString === "function") return o.toString(); - // as string in the latest intent - return String(o); -} - /** * Wraps a string between other that acts as token. * @param {string} s - the text to wrap @@ -56,6 +41,5 @@ function wrap(s, token = "") { module.exports = { stripParens, templater, - toString, wrap, }; diff --git a/lib/functions/index.js b/lib/functions/index.js index 293aedc..8664237 100644 --- a/lib/functions/index.js +++ b/lib/functions/index.js @@ -1,5 +1,7 @@ +const Objects = require("./Objects"); const Strings = require("./Strings"); module.exports = { + Objects, Strings, };