Run formatter and make correction to improve English listings

pull/3/head
Brogan Clements 2022-03-31 23:56:02 -04:00
parent ecbf40aa4c
commit 21dd110fe6
1 changed files with 242 additions and 258 deletions

500
index.js
View File

@ -7,16 +7,11 @@ const languages = require("./languages");
const commandLineArgs = require("command-line-args"); const commandLineArgs = require("command-line-args");
const optionDefinitions = [ const optionDefinitions = [
{ name: "input", multiple: true, defaultValue: ["./fpb/books"] }, { name: "input", multiple: true, defaultValue: ["./fpb/books"] },
{ name: "output", defaultValue: "./parser/fpb.json" }, { name: "output", defaultValue: "./parser/fpb.json" },
]; ];
const excludes = [ const excludes = ["README.md", "CONTRIBUTING.md", "CODE_OF_CONDUCT.md", "SUMMARY.md"];
"README.md",
"CONTRIBUTING.md",
"CODE_OF_CONDUCT.md",
"SUMMARY.md",
];
/** /**
* Parses a list item generated from remark-parse into a readable format. * Parses a list item generated from remark-parse into a readable format.
@ -32,102 +27,94 @@ const excludes = [
* @return {Object} Returns an Object containing details about the piece of media. * @return {Object} Returns an Object containing details about the piece of media.
*/ */
function parseListItem(listItem) { function parseListItem(listItem) {
let stripParens = function (s) { let stripParens = function (s) {
if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1); if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1);
return s; return s;
}; };
let entry = {}; let entry = {};
let s = ""; // If we need to build up a string over multiple listItem elements let s = ""; // If we need to build up a string over multiple listItem elements
let leftParen, let leftParen,
rightParen = -1; // If we need to parse parenthesized text rightParen = -1; // If we need to parse parenthesized text
const [link, ...otherStuff] = listItem; // head of listItem = url, the rest is "other stuff" const [link, ...otherStuff] = listItem; // head of listItem = url, the rest is "other stuff"
entry.url = link.url; entry.url = link.url;
entry.title = link.children[0].value; entry.title = link.children[0].value;
// remember to get OTHER STUFF!! remember there may be multiple links! // remember to get OTHER STUFF!! remember there may be multiple links!
for (let i of otherStuff) { for (let i of otherStuff) {
if (s === "") { if (s === "") {
// this is almost always, except for when we are parsing a multi-element note // this is almost always, except for when we are parsing a multi-element note
if (i.type === "text" && i.value.slice(0, 3) === " - ") { if (i.type === "text" && i.value.slice(0, 3) === " - ") {
// author found // author found
let parenIndex = i.value.indexOf("("); let parenIndex = i.value.indexOf("(");
if (parenIndex === -1) { if (parenIndex === -1) {
entry.author = i.value.slice(3).trim(); entry.author = i.value.slice(3).trim();
} else {
entry.author = i.value.slice(3, parenIndex).trim(); // go from " - " until the first "("
}
}
if (
i.type === "emphasis" &&
i.children[0].value.slice(0, 1) === "(" &&
i.children[0].value.slice(-1) === ")"
) {
// access notes found (currently assumes exactly one child, so far this is always the case)
entry.accessNotes = i.children[0].value.slice(1, -1);
}
if (i.type === "link") {
// other links found
if (entry.otherLinks === undefined) entry.otherLinks = [];
entry.otherLinks.push({
title: stripParens(i.children[0].value),
url: i.url,
});
// entry.otherLinks = [...entry.otherLinks, {title: i.children[0].value, url: i.url}]; // <-- i wish i could get this syntax to work with arrays
}
if (i.type === "text" && i.value.indexOf("(") !== -1) {
// notes found (currently assumes no nested parentheses)
if (entry.notes === undefined) entry.notes = [];
leftParen = i.value.indexOf("(");
while (leftParen != -1) {
rightParen = i.value.indexOf(")", leftParen);
if (rightParen === -1) {
// there must be some *emphasis* found
s += i.value.slice(leftParen);
break;
}
entry.notes.push(i.value.slice(leftParen + 1, rightParen));
leftParen = i.value.indexOf("(", rightParen);
}
}
} else { } else {
// for now we assume that all previous ifs are mutually exclusive with this, may polish later entry.author = i.value.slice(3, parenIndex).trim(); // go from " - " until the first "("
if (i.type === "emphasis") {
// this is the emphasis, add it in boldface and move on
s += "*" + i.children[0].value + "*";
} else if (i.type === "link") {
// something has gone terribly wrong. this book must be viewed and edited manually.
entry.manualReviewRequired = true;
break;
} else {
// hopefully this is the end of the note
let rightParen = i.value.indexOf(")");
if (rightParen === -1) {
// we have to go AGAIN
s += i.value;
} else {
// finally, we have reached the end of the note
entry.notes.push(
stripParens(s + i.value.slice(0, rightParen + 1))
);
s = "";
// this is a copypaste of another block of code. probably not a good thing tbh.
leftParen = i.value.indexOf("(");
while (leftParen != -1) {
rightParen = i.value.indexOf(")", leftParen);
if (rightParen === -1) {
// there must be some *emphasis* found
s += i.value.slice(leftParen);
break;
}
entry.notes.push(
i.value.slice(leftParen + 1, rightParen)
);
leftParen = i.value.indexOf("(", rightParen);
}
}
}
} }
}
if (i.type === "emphasis" && i.children[0].value.slice(0, 1) === "(" && i.children[0].value.slice(-1) === ")") {
// access notes found (currently assumes exactly one child, so far this is always the case)
entry.accessNotes = i.children[0].value.slice(1, -1);
}
if (i.type === "link") {
// other links found
if (entry.otherLinks === undefined) entry.otherLinks = [];
entry.otherLinks.push({
title: stripParens(i.children[0].value),
url: i.url,
});
// entry.otherLinks = [...entry.otherLinks, {title: i.children[0].value, url: i.url}]; // <-- i wish i could get this syntax to work with arrays
}
if (i.type === "text" && i.value.indexOf("(") !== -1) {
// notes found (currently assumes no nested parentheses)
if (entry.notes === undefined) entry.notes = [];
leftParen = i.value.indexOf("(");
while (leftParen != -1) {
rightParen = i.value.indexOf(")", leftParen);
if (rightParen === -1) {
// there must be some *emphasis* found
s += i.value.slice(leftParen);
break;
}
entry.notes.push(i.value.slice(leftParen + 1, rightParen));
leftParen = i.value.indexOf("(", rightParen);
}
}
} else {
// for now we assume that all previous ifs are mutually exclusive with this, may polish later
if (i.type === "emphasis") {
// this is the emphasis, add it in boldface and move on
s += "*" + i.children[0].value + "*";
} else if (i.type === "link") {
// something has gone terribly wrong. this book must be viewed and edited manually.
entry.manualReviewRequired = true;
break;
} else {
// hopefully this is the end of the note
let rightParen = i.value.indexOf(")");
if (rightParen === -1) {
// we have to go AGAIN
s += i.value;
} else {
// finally, we have reached the end of the note
entry.notes.push(stripParens(s + i.value.slice(0, rightParen + 1)));
s = "";
// this is a copypaste of another block of code. probably not a good thing tbh.
leftParen = i.value.indexOf("(");
while (leftParen != -1) {
rightParen = i.value.indexOf(")", leftParen);
if (rightParen === -1) {
// there must be some *emphasis* found
s += i.value.slice(leftParen);
break;
}
entry.notes.push(i.value.slice(leftParen + 1, rightParen));
leftParen = i.value.indexOf("(", rightParen);
}
}
}
} }
return entry; }
return entry;
} }
/** /**
@ -137,16 +124,21 @@ function parseListItem(listItem) {
* @returns {String} The language the file is * @returns {String} The language the file is
*/ */
function getLangFromFilename(filename) { function getLangFromFilename(filename) {
const dash = filename.lastIndexOf("-"); const dash = filename.lastIndexOf("-");
const dot = filename.lastIndexOf("."); const dot = filename.lastIndexOf(".");
let lang = filename.slice(dash + 1, dot).replace(/_/, "-"); let lang = filename.slice(dash + 1, dot).replace(/_/, "-");
if (!languages.hasOwnProperty(lang)) { let isSubject = false;
if (/^[a-z]{2}$/.test(lang) || /^[a-z]{2}-[A-Z]{2}$/.test(lang)) { if (!languages.hasOwnProperty(lang)) {
return ""; if (/^[a-z]{2}$/.test(lang) || /^[a-z]{2}-[A-Z]{2}$/.test(lang)) {
} return "";
lang = "en-US";
} }
return lang; // console.log(lang);
if (lang === "subjects") {
isSubject = true;
}
lang = "en";
}
return { lang: lang, isSubject: isSubject };
} }
/** /**
@ -155,13 +147,10 @@ function getLangFromFilename(filename) {
* @returns A list of all md files in a directory, excluding those in the excludes array * @returns A list of all md files in a directory, excluding those in the excludes array
*/ */
function getFilesFromDir(dir) { function getFilesFromDir(dir) {
return fs return fs
.readdirSync(dir) .readdirSync(dir)
.filter( .filter((file) => path.extname(file) === ".md" && excludes.indexOf(file) === -1)
(file) => .map((file) => path.join(dir, file));
path.extname(file) === ".md" && excludes.indexOf(file) === -1
)
.map((file) => path.join(dir, file));
} }
/** /**
@ -170,9 +159,9 @@ function getFilesFromDir(dir) {
* @returns {String} The extracted directory name * @returns {String} The extracted directory name
*/ */
function getMediaFromDirectory(dir) { function getMediaFromDirectory(dir) {
const slash = dir.lastIndexOf("/"); const slash = dir.lastIndexOf("/");
let mediaType = dir.slice(2, slash); let mediaType = dir.slice(2, slash);
return mediaType; return mediaType;
} }
/** /**
@ -181,77 +170,71 @@ function getMediaFromDirectory(dir) {
* @returns {object} Json object of entries in the md file * @returns {object} Json object of entries in the md file
*/ */
function parseMarkdown(doc) { function parseMarkdown(doc) {
let tree = remark.parse(doc).children; let tree = remark.parse(doc).children;
let sections = []; // This will go into root object later let sections = []; // This will go into root object later
let errors = []; let errors = [];
let currentDepth = 3; // used to determine if the last heading was an h4 or h3 let currentDepth = 3; // used to determine if the last heading was an h4 or h3
// find where Index ends // find where Index ends
// probably could be done better, review later // probably could be done better, review later
let i = 0, let i = 0,
count = 0; count = 0;
for (i; i < tree.length; i++) { for (i; i < tree.length; i++) {
if (tree[i].type == "heading" && tree[i].depth == "3") count++; if (tree[i].type == "heading" && tree[i].depth == "3") count++;
if (count == 2) break; if (count == 2) break;
} }
tree.slice(i).forEach((item) => { tree.slice(i).forEach((item) => {
// Start iterating after Index // Start iterating after Index
try { try {
if (item.type == "heading" && item.children[0].value == "Index") if (item.type == "heading" && item.children[0].value == "Index") return;
return;
if (item.type == "heading") { if (item.type == "heading") {
if (item.depth == 3) { if (item.depth == 3) {
// Heading is an h3 // Heading is an h3
currentDepth = 3; currentDepth = 3;
let newSection = { let newSection = {
section: item.children[0].value, // Get the name of the section section: item.children[0].value, // Get the name of the section
entries: [], entries: [],
subsections: [], subsections: [],
}; };
sections.push(newSection); // Push the section to the output array sections.push(newSection); // Push the section to the output array
} else if (item.depth == 4) { } else if (item.depth == 4) {
// Heading is an h4 // Heading is an h4
currentDepth = 4; currentDepth = 4;
let newSubsection = { let newSubsection = {
section: item.children[0].value, // Get the name of the subsection section: item.children[0].value, // Get the name of the subsection
entries: [], entries: [],
}; };
sections[sections.length - 1].subsections.push( sections[sections.length - 1].subsections.push(newSubsection); // Add to subsection array of most recent h3
newSubsection
); // Add to subsection array of most recent h3
}
} else if (item.type == "list") {
item.children.forEach((listItem) => {
let content = listItem.children[0].children; // gets array containing a remark-link and a remark-paragraph
// if(content[0].type !== 'link'){ // SKIPS OVER bad formatting
// return;
// }
if (currentDepth == 3) {
let contentJson = parseListItem(content);
sections[sections.length - 1].entries.push(contentJson); // add the entry to most recent h3
} else if (currentDepth == 4) {
let lastSection = sections.length - 1;
let lastSubSec =
sections[lastSection].subsections.length - 1;
let contentJson = parseListItem(content);
sections[lastSection].subsections[
lastSubSec
].entries.push(contentJson); // add entry to most recent h4
}
});
}
} catch (e) {
// if there was an error while parsing, print the error to an error log
// looks really ugly, maybe try to refine output later
let errStart = JSON.stringify(item.position.start.line);
let errEnd = JSON.stringify(item.position.end.line);
str = `Error at line ${errStart} - line ${errEnd}.`;
errors.push(str);
} }
}); } else if (item.type == "list") {
return { sections: sections, errors: errors }; item.children.forEach((listItem) => {
let content = listItem.children[0].children; // gets array containing a remark-link and a remark-paragraph
// if(content[0].type !== 'link'){ // SKIPS OVER bad formatting
// return;
// }
if (currentDepth == 3) {
let contentJson = parseListItem(content);
sections[sections.length - 1].entries.push(contentJson); // add the entry to most recent h3
} else if (currentDepth == 4) {
let lastSection = sections.length - 1;
let lastSubSec = sections[lastSection].subsections.length - 1;
let contentJson = parseListItem(content);
sections[lastSection].subsections[lastSubSec].entries.push(contentJson); // add entry to most recent h4
}
});
}
} catch (e) {
// if there was an error while parsing, print the error to an error log
// looks really ugly, maybe try to refine output later
let errStart = JSON.stringify(item.position.start.line);
let errEnd = JSON.stringify(item.position.end.line);
str = `Error at line ${errStart} - line ${errEnd}.`;
errors.push(str);
}
});
return { sections: sections, errors: errors };
} }
/** /**
@ -263,47 +246,48 @@ function parseMarkdown(doc) {
* an error occur while parsing. * an error occur while parsing.
*/ */
function parseDirectory(directory) { function parseDirectory(directory) {
let dirChildren = []; // this will hold the output each markdown doc let dirChildren = []; // this will hold the output each markdown doc
let dirErrors = []; //contains error for a given directory let dirErrors = []; //contains error for a given directory
let mediaType = getMediaFromDirectory(directory); let mediaType = getMediaFromDirectory(directory);
const filenames = getFilesFromDir(path.resolve(directory)); const filenames = getFilesFromDir(path.resolve(directory));
filenames.forEach((filename) => { filenames.forEach((filename) => {
const doc = fs.readFileSync(filename); const doc = fs.readFileSync(filename);
let { sections, errors } = parseMarkdown(doc); // parse the markdown document let { sections, errors } = parseMarkdown(doc); // parse the markdown document
const langCode = getLangFromFilename(filename); const { lang, isSubject } = getLangFromFilename(filename);
// Entries // Entries
let docJson = { let docJson = {
language: { language: {
code: langCode, code: lang,
name: languages[langCode], name: languages[lang],
}, },
index: {}, index: {},
sections: sections, sections: sections,
};
dirChildren.push(docJson);
// Errors
if (errors.length !== 0) {
let docErrors = {
file: path.basename(filename),
errors: errors,
};
dirErrors.push(docErrors);
}
});
// File entries
let dirJson = {
type: mediaType,
index: {},
children: dirChildren,
}; };
if (lang === "en") docJson.language.isSubject = isSubject;
dirChildren.push(docJson);
// Errors // Errors
if (errors.length !== 0) {
let docErrors = {
file: path.basename(filename),
errors: errors,
};
dirErrors.push(docErrors);
}
});
return { dirJson: dirJson, dirErrors: dirErrors }; // File entries
let dirJson = {
type: mediaType,
index: {},
children: dirChildren,
};
// Errors
return { dirJson: dirJson, dirErrors: dirErrors };
} }
/** /**
@ -313,45 +297,45 @@ function parseDirectory(directory) {
* @param {String} output A string for the path that the output should be placed in * @param {String} output A string for the path that the output should be placed in
*/ */
function parseAll(directories, output) { function parseAll(directories, output) {
let rootChildren = []; // this will hold the output of each directory let rootChildren = []; // this will hold the output of each directory
let rootErrors = []; let rootErrors = [];
directories.forEach((directory) => { directories.forEach((directory) => {
let { dirJson, dirErrors } = parseDirectory(directory); let { dirJson, dirErrors } = parseDirectory(directory);
rootChildren.push(dirJson); rootChildren.push(dirJson);
if (dirErrors.length !== 0) { if (dirErrors.length !== 0) {
rootErrors.push({ rootErrors.push({
directory: path.basename(directory), directory: path.basename(directory),
files: dirErrors, files: dirErrors,
}); });
} }
}); });
// ALl entries // ALl entries
let rootJson = { let rootJson = {
type: "root", type: "root",
children: rootChildren, children: rootChildren,
}; };
// Errors // Errors
let allErrors = { let allErrors = {
type: "root", type: "root",
directories: rootErrors, directories: rootErrors,
}; };
fs.writeFileSync(output, JSON.stringify(rootJson, null, 3), function (err) { fs.writeFileSync(output, JSON.stringify(rootJson, null, 3), function (err) {
if (err) { if (err) {
console.log(err); console.log(err);
} }
}); });
fs.writeFileSync( // fs.writeFileSync(
"./parser/fpb.log", // "./parser/fpb.log",
JSON.stringify(allErrors, null, 3), // JSON.stringify(allErrors, null, 3),
function (err) { // function (err) {
if (err) { // if (err) {
console.log(err); // console.log(err);
} // }
} // }
); // );
} }
let { input, output } = commandLineArgs(optionDefinitions); let { input, output } = commandLineArgs(optionDefinitions);