
155 lines
5.8 KiB

const fs = require('fs');
const path = require('path');
const remark = require('remark');
const languages = require('./languages')
* Parses markdown from a file and returns an AST.
* Parses markdown into an AST from a markdown document using remark-parse format,
* and additionally writes the AST to a text file in JSON.
* @see remark.parse
* @param {string} filename - The filename of the markdown file to parse, in local context.
* @param {string} [outputFile=tree.txt] - The filename of the text file to write out to, in local context.
* @return {Object} Returns an AST as an object, based on the format defined in remark-parse.
let getAST = function(filename, outputFile="tree.json"){
//import test markdown file
let mk;
mk = fs.readFileSync(path.join(__dirname, filename), "utf8");
throw new Error("Could not open input file. Make sure the file exists.");
// parse into AST with remark
let tree = remark.parse(mk);
// write to file for human readibility
fs.writeFileSync(outputFile, JSON.stringify(tree, null, 3), function(err) {
if (err) {
// This is where actual content starts
tree = tree.children;
return tree;
* Given an array of JSON objects, adds it to an object and exports to a text file.
* Given an array of JSON objects, adds it to an object and exports to a text file.
* Additionally, writes the JSON object to a text file.
* @param {Object} children - The array full of Objects to insert into the root object.
* @param {string} [outputFile=root.txt] - The name of the text file to write to, in local context.
* @return {Object} Returns a JSON object, exact format still in progress.
let exportJSON = function(children, langCode, mediaType, outputFile="root.json"){
// This will be the JSON to export
let rootJSON = {
type: 'root',
langCode: langCode,
language: languages[langCode],
mediaType: mediaType,
children: children
fs.writeFile(outputFile, JSON.stringify(rootJSON, null, 3), function(err) {
if (err) {
return rootJSON;
// TODO!!
* Summary TBD.
* Desciption TBD.
* @param {Object} listItem - a listItem in AST format defined by remark-parse
* @return {Object} Returns an Object containing details about the piece of media Exact format TBD.
let parseListItem = function(listItem){
let main = function(inputFile, langCode, mediaType, outputFile){
let tree = getAST(inputFile);
let children = []; // This will go into root object later
let currentDepth = 3;
// find index to skip index and meta-lists
// probably could be done better, review later
let i=0, count = 0;
for(i; i < tree.length; i++){
if(tree[i].type=='heading' && tree[i].depth=='3')
if(count == 2)
for(i; i < tree.length; i++){
if(tree[i].type == "heading"){ // If any kind of section heading
if(tree[i].depth == 3){ // Make a new child of the root
currentDepth = 3;
let newGroup = {group: tree[i].children[0].value, entries: [], subsections: []};
else if(tree[i].depth == 4){ // Make a subsection of last group
currentDepth = 4;
let newSubsection = {group: tree[i].children[0].value, entries: []}
children[children.length-1].subsections.push(newSubsection); // Push subsection to most recently added element.
else if(tree[i].type == "list"){
for(let j = 0; j < tree[i].children.length; j++){ //for each listItem
let content = tree[i].children[j].children[0].children; //This starts at "type: link" for most entries. Needs parsing tho
if(currentDepth == 3){
// console.log(tree[i]);
return 1;
else if(currentDepth == 4){
let lastChild = children.length-1; // Index of last added h3 Group
let lastSubSec = children[lastChild].subsections.length-1; // Index of last added h4 group
// TODO: parse into subobject containing title, author, link, etc.
children[lastChild].subsections[lastSubSec].entries.push(parseListItem(content)); // push to entries of last h4 group
let rootJSON = exportJSON(children, langCode, mediaType, outputFile);
let usageMessage = "Usage:\n node index.js [input_file] [output_file]\n"
let args = process.argv.slice(2);
if(args.length < 1)
throw new Error(`Please provide an input filename.\n${usageMessage}`);
if(args.length > 2)
throw new Error(`Too many arguments.\n${usageMessage}`);
if(args.length == 1)
args[1] = "root.json";
let fileRegex= /\S+-\;
throw new Error(`Wrong filename format. Input must be in the format '`);
let splitPoint = args[0].lastIndexOf("-");
let langCode = args[0].substring(splitPoint + 1, args[0].length-3);
throw new Error('The language code must be in the ISO 639-1 set.');
let mediaType = args[0].substring(0, splitPoint);
main(args[0], langCode, mediaType, args[1]);