Run formatter and make correction to improve English listings

2022-03-31 23:56:02 -04:00 · 2022-03-31 23:56:02 -04:00 · 21dd110fe6
parent ecbf40aa4c
commit 21dd110fe6
1 changed files with 242 additions and 258 deletions
--- a/index.js
+++ b/index.js
@ -7,16 +7,11 @@ const languages = require("./languages");
 const commandLineArgs = require("command-line-args");
 const optionDefinitions = [
-    { name: "input", multiple: true, defaultValue: ["./fpb/books"] },
+  { name: "input", multiple: true, defaultValue: ["./fpb/books"] },
-    { name: "output", defaultValue: "./parser/fpb.json" },
+  { name: "output", defaultValue: "./parser/fpb.json" },
 ];
-const excludes = [
+const excludes = ["README.md", "CONTRIBUTING.md", "CODE_OF_CONDUCT.md", "SUMMARY.md"];
    "README.md",
    "CONTRIBUTING.md",
    "CODE_OF_CONDUCT.md",
    "SUMMARY.md",
 ];
 /**
 * Parses a list item generated from remark-parse into a readable format.
@ -32,102 +27,94 @@ const excludes = [
 * @return {Object} Returns an Object containing details about the piece of media.
 */
 function parseListItem(listItem) {
-    let stripParens = function (s) {
+  let stripParens = function (s) {
-        if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1);
+    if (s.slice(0, 1) === "(" && s.slice(-1) === ")") return s.slice(1, -1);
-        return s;
+    return s;
-    };
+  };
-    let entry = {};
+  let entry = {};
-    let s = ""; // If we need to build up a string over multiple listItem elements
+  let s = ""; // If we need to build up a string over multiple listItem elements
-    let leftParen,
+  let leftParen,
-        rightParen = -1; // If we need to parse parenthesized text
+    rightParen = -1; // If we need to parse parenthesized text
-    const [link, ...otherStuff] = listItem; // head of listItem = url, the rest is "other stuff"
+  const [link, ...otherStuff] = listItem; // head of listItem = url, the rest is "other stuff"
-    entry.url = link.url;
+  entry.url = link.url;
-    entry.title = link.children[0].value;
+  entry.title = link.children[0].value;
-    // remember to get OTHER STUFF!! remember there may be multiple links!
+  // remember to get OTHER STUFF!! remember there may be multiple links!
-    for (let i of otherStuff) {
+  for (let i of otherStuff) {
-        if (s === "") {
+    if (s === "") {
-            // this is almost always, except for when we are parsing a multi-element note
+      // this is almost always, except for when we are parsing a multi-element note
-            if (i.type === "text" && i.value.slice(0, 3) === " - ") {
+      if (i.type === "text" && i.value.slice(0, 3) === " - ") {
-                // author found
+        // author found
-                let parenIndex = i.value.indexOf("(");
+        let parenIndex = i.value.indexOf("(");
-                if (parenIndex === -1) {
+        if (parenIndex === -1) {
-                    entry.author = i.value.slice(3).trim();
+          entry.author = i.value.slice(3).trim();
                } else {
                    entry.author = i.value.slice(3, parenIndex).trim(); // go from " - " until the first "("
                }
            }
            if (
                i.type === "emphasis" &&
                i.children[0].value.slice(0, 1) === "(" &&
                i.children[0].value.slice(-1) === ")"
            ) {
                // access notes found (currently assumes exactly one child, so far this is always the case)
                entry.accessNotes = i.children[0].value.slice(1, -1);
            }
            if (i.type === "link") {
                // other links found
                if (entry.otherLinks === undefined) entry.otherLinks = [];
                entry.otherLinks.push({
                    title: stripParens(i.children[0].value),
                    url: i.url,
                });
                // entry.otherLinks = [...entry.otherLinks, {title: i.children[0].value, url: i.url}];      // <-- i wish i could get this syntax to work with arrays
            }
            if (i.type === "text" && i.value.indexOf("(") !== -1) {
                // notes found (currently assumes no nested parentheses)
                if (entry.notes === undefined) entry.notes = [];
                leftParen = i.value.indexOf("(");
                while (leftParen != -1) {
                    rightParen = i.value.indexOf(")", leftParen);
                    if (rightParen === -1) {
                        // there must be some *emphasis* found
                        s += i.value.slice(leftParen);
                        break;
                    }
                    entry.notes.push(i.value.slice(leftParen + 1, rightParen));
                    leftParen = i.value.indexOf("(", rightParen);
                }
            }
        } else {
-            // for now we assume that all previous ifs are mutually exclusive with this, may polish later
+          entry.author = i.value.slice(3, parenIndex).trim(); // go from " - " until the first "("
            if (i.type === "emphasis") {
                // this is the emphasis, add it in boldface and move on
                s += "*" + i.children[0].value + "*";
            } else if (i.type === "link") {
                // something has gone terribly wrong. this book must be viewed and edited manually.
                entry.manualReviewRequired = true;
                break;
            } else {
                // hopefully this is the end of the note
                let rightParen = i.value.indexOf(")");
                if (rightParen === -1) {
                    // we have to go AGAIN
                    s += i.value;
                } else {
                    // finally, we have reached the end of the note
                    entry.notes.push(
                        stripParens(s + i.value.slice(0, rightParen + 1))
                    );
                    s = "";
                    // this is a copypaste of another block of code. probably not a good thing tbh.
                    leftParen = i.value.indexOf("(");
                    while (leftParen != -1) {
                        rightParen = i.value.indexOf(")", leftParen);
                        if (rightParen === -1) {
                            // there must be some *emphasis* found
                            s += i.value.slice(leftParen);
                            break;
                        }
                        entry.notes.push(
                            i.value.slice(leftParen + 1, rightParen)
                        );
                        leftParen = i.value.indexOf("(", rightParen);
                    }
                }
            }
        }
      }
      if (i.type === "emphasis" && i.children[0].value.slice(0, 1) === "(" && i.children[0].value.slice(-1) === ")") {
        // access notes found (currently assumes exactly one child, so far this is always the case)
        entry.accessNotes = i.children[0].value.slice(1, -1);
      }
      if (i.type === "link") {
        // other links found
        if (entry.otherLinks === undefined) entry.otherLinks = [];
        entry.otherLinks.push({
          title: stripParens(i.children[0].value),
          url: i.url,
        });
        // entry.otherLinks = [...entry.otherLinks, {title: i.children[0].value, url: i.url}];      // <-- i wish i could get this syntax to work with arrays
      }
      if (i.type === "text" && i.value.indexOf("(") !== -1) {
        // notes found (currently assumes no nested parentheses)
        if (entry.notes === undefined) entry.notes = [];
        leftParen = i.value.indexOf("(");
        while (leftParen != -1) {
          rightParen = i.value.indexOf(")", leftParen);
          if (rightParen === -1) {
            // there must be some *emphasis* found
            s += i.value.slice(leftParen);
            break;
          }
          entry.notes.push(i.value.slice(leftParen + 1, rightParen));
          leftParen = i.value.indexOf("(", rightParen);
        }
      }
    } else {
      // for now we assume that all previous ifs are mutually exclusive with this, may polish later
      if (i.type === "emphasis") {
        // this is the emphasis, add it in boldface and move on
        s += "*" + i.children[0].value + "*";
      } else if (i.type === "link") {
        // something has gone terribly wrong. this book must be viewed and edited manually.
        entry.manualReviewRequired = true;
        break;
      } else {
        // hopefully this is the end of the note
        let rightParen = i.value.indexOf(")");
        if (rightParen === -1) {
          // we have to go AGAIN
          s += i.value;
        } else {
          // finally, we have reached the end of the note
          entry.notes.push(stripParens(s + i.value.slice(0, rightParen + 1)));
          s = "";
          // this is a copypaste of another block of code. probably not a good thing tbh.
          leftParen = i.value.indexOf("(");
          while (leftParen != -1) {
            rightParen = i.value.indexOf(")", leftParen);
            if (rightParen === -1) {
              // there must be some *emphasis* found
              s += i.value.slice(leftParen);
              break;
            }
            entry.notes.push(i.value.slice(leftParen + 1, rightParen));
            leftParen = i.value.indexOf("(", rightParen);
          }
        }
      }
    }
-    return entry;
+  }
  return entry;
 }
 /**
@ -137,16 +124,21 @@ function parseListItem(listItem) {
 * @returns {String} The language the file is
 */
 function getLangFromFilename(filename) {
-    const dash = filename.lastIndexOf("-");
+  const dash = filename.lastIndexOf("-");
-    const dot = filename.lastIndexOf(".");
+  const dot = filename.lastIndexOf(".");
-    let lang = filename.slice(dash + 1, dot).replace(/_/, "-");
+  let lang = filename.slice(dash + 1, dot).replace(/_/, "-");
-    if (!languages.hasOwnProperty(lang)) {
+  let isSubject = false;
-        if (/^[a-z]{2}$/.test(lang) || /^[a-z]{2}-[A-Z]{2}$/.test(lang)) {
+  if (!languages.hasOwnProperty(lang)) {
-            return "";
+    if (/^[a-z]{2}$/.test(lang) || /^[a-z]{2}-[A-Z]{2}$/.test(lang)) {
-        }
+      return "";
        lang = "en-US";
    }
-    return lang;
+    // console.log(lang);
    if (lang === "subjects") {
      isSubject = true;
    }
    lang = "en";
  }
  return { lang: lang, isSubject: isSubject };
 }
 /**
@ -155,13 +147,10 @@ function getLangFromFilename(filename) {
 * @returns A list of all md files in a directory, excluding those in the excludes array
 */
 function getFilesFromDir(dir) {
-    return fs
+  return fs
-        .readdirSync(dir)
+    .readdirSync(dir)
-        .filter(
+    .filter((file) => path.extname(file) === ".md" && excludes.indexOf(file) === -1)
-            (file) =>
+    .map((file) => path.join(dir, file));
                path.extname(file) === ".md" && excludes.indexOf(file) === -1
        )
        .map((file) => path.join(dir, file));
 }
 /**
@ -170,9 +159,9 @@ function getFilesFromDir(dir) {
 * @returns {String} The extracted directory name
 */
 function getMediaFromDirectory(dir) {
-    const slash = dir.lastIndexOf("/");
+  const slash = dir.lastIndexOf("/");
-    let mediaType = dir.slice(2, slash);
+  let mediaType = dir.slice(2, slash);
-    return mediaType;
+  return mediaType;
 }
 /**
@ -181,77 +170,71 @@ function getMediaFromDirectory(dir) {
 * @returns {object} Json object of entries in the md file
 */
 function parseMarkdown(doc) {
-    let tree = remark.parse(doc).children;
+  let tree = remark.parse(doc).children;
-    let sections = []; // This will go into root object later
+  let sections = []; // This will go into root object later
-    let errors = [];
+  let errors = [];
-    let currentDepth = 3; // used to determine if the last heading was an h4 or h3
+  let currentDepth = 3; // used to determine if the last heading was an h4 or h3
-    // find where Index ends
+  // find where Index ends
-    // probably could be done better, review later
+  // probably could be done better, review later
-    let i = 0,
+  let i = 0,
-        count = 0;
+    count = 0;
-    for (i; i < tree.length; i++) {
+  for (i; i < tree.length; i++) {
-        if (tree[i].type == "heading" && tree[i].depth == "3") count++;
+    if (tree[i].type == "heading" && tree[i].depth == "3") count++;
-        if (count == 2) break;
+    if (count == 2) break;
-    }
+  }
-    tree.slice(i).forEach((item) => {
+  tree.slice(i).forEach((item) => {
-        // Start iterating after Index
+    // Start iterating after Index
-        try {
+    try {
-            if (item.type == "heading" && item.children[0].value == "Index")
+      if (item.type == "heading" && item.children[0].value == "Index") return;
                return;
-            if (item.type == "heading") {
+      if (item.type == "heading") {
-                if (item.depth == 3) {
+        if (item.depth == 3) {
-                    // Heading is an h3
+          // Heading is an h3
-                    currentDepth = 3;
+          currentDepth = 3;
-                    let newSection = {
+          let newSection = {
-                        section: item.children[0].value, // Get the name of the section
+            section: item.children[0].value, // Get the name of the section
-                        entries: [],
+            entries: [],
-                        subsections: [],
+            subsections: [],
-                    };
+          };
-                    sections.push(newSection); // Push the section to the output array
+          sections.push(newSection); // Push the section to the output array
-                } else if (item.depth == 4) {
+        } else if (item.depth == 4) {
-                    // Heading is an h4
+          // Heading is an h4
-                    currentDepth = 4;
+          currentDepth = 4;
-                    let newSubsection = {
+          let newSubsection = {
-                        section: item.children[0].value, // Get the name of the subsection
+            section: item.children[0].value, // Get the name of the subsection
-                        entries: [],
+            entries: [],
-                    };
+          };
-                    sections[sections.length - 1].subsections.push(
+          sections[sections.length - 1].subsections.push(newSubsection); // Add to subsection array of most recent h3
                        newSubsection
                    ); // Add to subsection array of most recent h3
                }
            } else if (item.type == "list") {
                item.children.forEach((listItem) => {
                    let content = listItem.children[0].children; // gets array containing a remark-link and a remark-paragraph
                    // if(content[0].type !== 'link'){ // SKIPS OVER bad formatting
                    //     return;
                    // }
                    if (currentDepth == 3) {
                        let contentJson = parseListItem(content);
                        sections[sections.length - 1].entries.push(contentJson); // add the entry to most recent h3
                    } else if (currentDepth == 4) {
                        let lastSection = sections.length - 1;
                        let lastSubSec =
                            sections[lastSection].subsections.length - 1;
                        let contentJson = parseListItem(content);
                        sections[lastSection].subsections[
                            lastSubSec
                        ].entries.push(contentJson); // add entry to most recent h4
                    }
                });
            }
        } catch (e) {
            // if there was an error while parsing, print the error to an error log
            // looks really ugly, maybe try to refine output later
            let errStart = JSON.stringify(item.position.start.line);
            let errEnd = JSON.stringify(item.position.end.line);
            str = `Error at line ${errStart} - line ${errEnd}.`;
            errors.push(str);
        }
-    });
+      } else if (item.type == "list") {
-    return { sections: sections, errors: errors };
+        item.children.forEach((listItem) => {
          let content = listItem.children[0].children; // gets array containing a remark-link and a remark-paragraph
          // if(content[0].type !== 'link'){ // SKIPS OVER bad formatting
          //     return;
          // }
          if (currentDepth == 3) {
            let contentJson = parseListItem(content);
            sections[sections.length - 1].entries.push(contentJson); // add the entry to most recent h3
          } else if (currentDepth == 4) {
            let lastSection = sections.length - 1;
            let lastSubSec = sections[lastSection].subsections.length - 1;
            let contentJson = parseListItem(content);
            sections[lastSection].subsections[lastSubSec].entries.push(contentJson); // add entry to most recent h4
          }
        });
      }
    } catch (e) {
      // if there was an error while parsing, print the error to an error log
      // looks really ugly, maybe try to refine output later
      let errStart = JSON.stringify(item.position.start.line);
      let errEnd = JSON.stringify(item.position.end.line);
      str = `Error at line ${errStart} - line ${errEnd}.`;
      errors.push(str);
    }
  });
  return { sections: sections, errors: errors };
 }
 /**
@ -263,47 +246,48 @@ function parseMarkdown(doc) {
 *                   an error occur while parsing.
 */
 function parseDirectory(directory) {
-    let dirChildren = []; // this will hold the output each markdown doc
+  let dirChildren = []; // this will hold the output each markdown doc
-    let dirErrors = []; //contains error for a given directory
+  let dirErrors = []; //contains error for a given directory
-    let mediaType = getMediaFromDirectory(directory);
+  let mediaType = getMediaFromDirectory(directory);
-    const filenames = getFilesFromDir(path.resolve(directory));
+  const filenames = getFilesFromDir(path.resolve(directory));
-    filenames.forEach((filename) => {
+  filenames.forEach((filename) => {
-        const doc = fs.readFileSync(filename);
+    const doc = fs.readFileSync(filename);
-        let { sections, errors } = parseMarkdown(doc); // parse the markdown document
+    let { sections, errors } = parseMarkdown(doc); // parse the markdown document
-        const langCode = getLangFromFilename(filename);
+    const { lang, isSubject } = getLangFromFilename(filename);
-        // Entries
+    // Entries
-        let docJson = {
+    let docJson = {
-            language: {
+      language: {
-                code: langCode,
+        code: lang,
-                name: languages[langCode],
+        name: languages[lang],
-            },
+      },
-            index: {},
+      index: {},
-            sections: sections,
+      sections: sections,
        };
        dirChildren.push(docJson);
        // Errors
        if (errors.length !== 0) {
            let docErrors = {
                file: path.basename(filename),
                errors: errors,
            };
            dirErrors.push(docErrors);
        }
    });
    // File entries
    let dirJson = {
        type: mediaType,
        index: {},
        children: dirChildren,
    };
    if (lang === "en") docJson.language.isSubject = isSubject;
    dirChildren.push(docJson);
    // Errors
    if (errors.length !== 0) {
      let docErrors = {
        file: path.basename(filename),
        errors: errors,
      };
      dirErrors.push(docErrors);
    }
  });
-    return { dirJson: dirJson, dirErrors: dirErrors };
+  // File entries
  let dirJson = {
    type: mediaType,
    index: {},
    children: dirChildren,
  };
  // Errors
  return { dirJson: dirJson, dirErrors: dirErrors };
 }
 /**
@ -313,45 +297,45 @@ function parseDirectory(directory) {
 * @param {String} output A string for the path that the output should be placed in
 */
 function parseAll(directories, output) {
-    let rootChildren = []; // this will hold the output of each directory
+  let rootChildren = []; // this will hold the output of each directory
-    let rootErrors = [];
+  let rootErrors = [];
-    directories.forEach((directory) => {
+  directories.forEach((directory) => {
-        let { dirJson, dirErrors } = parseDirectory(directory);
+    let { dirJson, dirErrors } = parseDirectory(directory);
-        rootChildren.push(dirJson);
+    rootChildren.push(dirJson);
-        if (dirErrors.length !== 0) {
+    if (dirErrors.length !== 0) {
-            rootErrors.push({
+      rootErrors.push({
-                directory: path.basename(directory),
+        directory: path.basename(directory),
-                files: dirErrors,
+        files: dirErrors,
-            });
+      });
-        }
+    }
-    });
+  });
-    // ALl entries
+  // ALl entries
-    let rootJson = {
+  let rootJson = {
-        type: "root",
+    type: "root",
-        children: rootChildren,
+    children: rootChildren,
-    };
+  };
-    // Errors
+  // Errors
-    let allErrors = {
+  let allErrors = {
-        type: "root",
+    type: "root",
-        directories: rootErrors,
+    directories: rootErrors,
-    };
+  };
-    fs.writeFileSync(output, JSON.stringify(rootJson, null, 3), function (err) {
+  fs.writeFileSync(output, JSON.stringify(rootJson, null, 3), function (err) {
-        if (err) {
+    if (err) {
-            console.log(err);
+      console.log(err);
-        }
+    }
-    });
+  });
-    fs.writeFileSync(
+  // fs.writeFileSync(
-        "./parser/fpb.log",
+  //     "./parser/fpb.log",
-        JSON.stringify(allErrors, null, 3),
+  //     JSON.stringify(allErrors, null, 3),
-        function (err) {
+  //     function (err) {
-            if (err) {
+  //         if (err) {
-                console.log(err);
+  //             console.log(err);
-            }
+  //         }
-        }
+  //     }
-    );
+  // );
 }
 let { input, output } = commandLineArgs(optionDefinitions);