2021-11-05 00:29:01 +00:00
|
|
|
#!/usr/bin/env node
|
|
|
|
|
2021-10-07 18:56:44 +00:00
|
|
|
const fs = require('fs');
|
|
|
|
const path = require('path');
|
|
|
|
const remark = require('remark');
|
2021-10-15 22:06:27 +00:00
|
|
|
const languages = require('./languages')
|
2021-10-07 18:56:44 +00:00
|
|
|
|
2021-10-22 19:04:23 +00:00
|
|
|
const excludes = [
|
|
|
|
'README.md',
|
|
|
|
'CONTRIBUTING.md',
|
|
|
|
'CODE_OF_CONDUCT.md',
|
|
|
|
'SUMMARY.md'
|
|
|
|
]
|
2021-10-07 18:56:44 +00:00
|
|
|
|
|
|
|
// TODO!!
|
|
|
|
/**
|
|
|
|
* Summary TBD.
|
|
|
|
*
|
|
|
|
* Desciption TBD.
|
|
|
|
*
|
|
|
|
* @param {Object} listItem - a listItem in AST format defined by remark-parse
|
|
|
|
*
|
|
|
|
* @return {Object} Returns an Object containing details about the piece of media Exact format TBD.
|
|
|
|
*/
|
|
|
|
let parseListItem = function(listItem){
|
2021-10-16 19:57:23 +00:00
|
|
|
let entry = {};
|
|
|
|
const link = listItem[0];
|
|
|
|
entry.url = link.url;
|
|
|
|
entry.title = link.children[0].value;
|
|
|
|
// remember to get OTHER STUFF!! remember there may be multiple links!
|
|
|
|
return entry;
|
2021-10-07 18:56:44 +00:00
|
|
|
}
|
|
|
|
|
2021-10-22 19:04:23 +00:00
|
|
|
// from free-programming-books-lint
|
|
|
|
function getLangFromFilename (filename) {
|
|
|
|
const dash = filename.lastIndexOf('-')
|
|
|
|
const dot = filename.lastIndexOf('.')
|
|
|
|
let lang = filename.slice(dash + 1, dot).replace(/_/, '-')
|
|
|
|
if (!languages.hasOwnProperty(lang)) {
|
|
|
|
if (/^[a-z]{2}$/.test(lang) || /^[a-z]{2}-[A-Z]{2}$/.test(lang)) {
|
|
|
|
return ''
|
|
|
|
}
|
|
|
|
lang = 'en-US'
|
|
|
|
}
|
|
|
|
return lang
|
|
|
|
}
|
|
|
|
|
|
|
|
// from free-programming-books-lint
|
|
|
|
function getFilesFromDir (dir) {
|
|
|
|
return fs.readdirSync(dir).filter(file => path.extname(file) === '.md' && excludes.indexOf(file) === -1).map(file => path.join(dir, file))
|
|
|
|
}
|
|
|
|
|
|
|
|
function getMediaFromDirectory(dir){
|
|
|
|
const slash = dir.lastIndexOf('/');
|
|
|
|
let mediaType = dir.slice(2, slash);
|
|
|
|
return mediaType;
|
|
|
|
}
|
|
|
|
|
|
|
|
let parseMarkdown = function(doc){
|
|
|
|
let tree = remark.parse(doc).children;
|
2021-10-07 18:56:44 +00:00
|
|
|
let children = []; // This will go into root object later
|
2021-11-05 17:31:29 +00:00
|
|
|
let errors = [];
|
2021-10-07 18:56:44 +00:00
|
|
|
let currentDepth = 3;
|
|
|
|
|
2021-10-22 19:04:23 +00:00
|
|
|
// find where Index ends
|
2021-10-14 16:37:21 +00:00
|
|
|
// probably could be done better, review later
|
|
|
|
let i=0, count = 0;
|
|
|
|
for(i; i < tree.length; i++){
|
|
|
|
if(tree[i].type=='heading' && tree[i].depth=='3')
|
|
|
|
count++;
|
|
|
|
if(count == 2)
|
|
|
|
break;
|
|
|
|
}
|
2021-10-22 19:04:23 +00:00
|
|
|
|
|
|
|
tree.slice(i).forEach( (item) => {
|
2021-10-28 21:34:49 +00:00
|
|
|
try {
|
|
|
|
if(item.type == "heading" && item.children[0].value == 'Index')
|
2021-10-22 19:04:23 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
if(item.type == "heading"){
|
|
|
|
if(item.depth == 3){
|
2021-10-14 16:37:21 +00:00
|
|
|
currentDepth = 3;
|
2021-11-05 18:44:38 +00:00
|
|
|
console.log(item.children[0]);
|
2021-10-22 19:04:23 +00:00
|
|
|
let newGroup = {group: item.children[0].value, entries: [], subsections: []};
|
2021-10-07 18:56:44 +00:00
|
|
|
children.push(newGroup);
|
|
|
|
}
|
2021-10-22 19:04:23 +00:00
|
|
|
else if(item.depth == 4){
|
2021-10-14 16:37:21 +00:00
|
|
|
currentDepth = 4;
|
2021-10-22 19:04:23 +00:00
|
|
|
let newSubsection = {group: item.children[0].value, entries: []};
|
|
|
|
children[children.length-1].subsections.push(newSubsection);
|
2021-10-07 18:56:44 +00:00
|
|
|
}
|
|
|
|
}
|
2021-10-22 19:04:23 +00:00
|
|
|
else if(item.type == 'list'){
|
|
|
|
item.children.forEach( (listItem) => {
|
|
|
|
let content = listItem.children[0].children;
|
2021-10-28 21:34:49 +00:00
|
|
|
// if(content[0].type !== 'link'){ // SKIPS OVER bad formatting
|
|
|
|
// return;
|
|
|
|
// }
|
2021-10-07 18:56:44 +00:00
|
|
|
if(currentDepth == 3){
|
2021-10-22 19:04:23 +00:00
|
|
|
let contentJson = parseListItem(content);
|
|
|
|
children[children.length-1].entries.push(contentJson);
|
2021-10-07 18:56:44 +00:00
|
|
|
}
|
|
|
|
else if(currentDepth == 4){
|
2021-10-22 19:04:23 +00:00
|
|
|
let lastChild = children.length-1;
|
|
|
|
let lastSubSec = children[lastChild].subsections.length-1;
|
|
|
|
let contentJson = parseListItem(content);
|
|
|
|
children[lastChild].subsections[lastSubSec].entries.push(contentJson);
|
2021-10-07 18:56:44 +00:00
|
|
|
}
|
2021-10-22 19:04:23 +00:00
|
|
|
});
|
2021-10-07 18:56:44 +00:00
|
|
|
}
|
2021-10-28 21:34:49 +00:00
|
|
|
} catch (e) {
|
|
|
|
// if there was an error while parsing, print the error to an error log
|
|
|
|
// looks really ugly, maybe try to refine output later
|
2021-11-05 17:36:22 +00:00
|
|
|
// start_output = JSON.stringify(item.position.start)
|
|
|
|
// end_output = JSON.stringify(item.position.end)
|
|
|
|
// str = `Parser had an error while parsing the document starting at ${start_output} and ending at ${end_output}.`
|
|
|
|
// errors.push(str)
|
2021-10-28 21:34:49 +00:00
|
|
|
}
|
2021-10-22 19:04:23 +00:00
|
|
|
});
|
2021-11-05 17:42:05 +00:00
|
|
|
return children, errors;
|
2021-10-22 19:04:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function parseDirectory(directory){
|
|
|
|
let dirChildren = [];
|
|
|
|
|
|
|
|
let mediaType = getMediaFromDirectory(directory);
|
|
|
|
const filenames = getFilesFromDir(path.resolve(directory));
|
|
|
|
filenames.forEach((filename) => {
|
2021-11-05 18:19:58 +00:00
|
|
|
console.log(filename);
|
2021-11-05 18:27:14 +00:00
|
|
|
const doc = fs.readFileSync(filename);
|
2021-11-05 17:31:29 +00:00
|
|
|
let children, errors = parseMarkdown(doc);
|
2021-10-22 19:04:23 +00:00
|
|
|
const langCode = getLangFromFilename(filename);
|
|
|
|
let docJson = {
|
|
|
|
language: {
|
|
|
|
code: langCode,
|
|
|
|
name: languages[langCode],
|
|
|
|
},
|
|
|
|
index: {
|
|
|
|
|
|
|
|
},
|
|
|
|
children: children
|
|
|
|
};
|
2021-11-05 17:36:22 +00:00
|
|
|
// if (errors.length !== 0) {
|
|
|
|
// dir_errors.push(errors);
|
|
|
|
// }
|
2021-10-22 19:04:23 +00:00
|
|
|
dirChildren.push(docJson);
|
|
|
|
});
|
|
|
|
let dirJson = {
|
|
|
|
type: mediaType,
|
|
|
|
index: {
|
|
|
|
|
|
|
|
},
|
|
|
|
children: dirChildren
|
|
|
|
};
|
2021-11-05 17:36:22 +00:00
|
|
|
return dirJson; //, dir_errors;
|
2021-10-22 19:04:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function parseAll(dirArray){
|
|
|
|
let rootChildren = [];
|
|
|
|
|
|
|
|
dirArray.forEach( (directory) => {
|
|
|
|
let dirJson = parseDirectory(directory);
|
|
|
|
rootChildren.push(dirJson);
|
2021-11-05 17:43:44 +00:00
|
|
|
// if (errors.length !== 0) {
|
|
|
|
// errors_array.push(errors)
|
|
|
|
// }
|
2021-10-22 19:04:23 +00:00
|
|
|
});
|
|
|
|
let rootJson = {
|
|
|
|
type: 'root',
|
|
|
|
children: rootChildren
|
2021-10-07 18:56:44 +00:00
|
|
|
}
|
2021-11-05 18:27:14 +00:00
|
|
|
fs.writeFileSync('./parser/fpb.json', JSON.stringify(rootJson, null, 3), function(err) {
|
2021-10-22 19:04:23 +00:00
|
|
|
if (err) {
|
|
|
|
console.log(err);
|
|
|
|
}
|
|
|
|
});
|
2021-10-07 18:56:44 +00:00
|
|
|
}
|
|
|
|
|
2021-10-22 19:04:23 +00:00
|
|
|
console.time('Parse Time')
|
2021-11-05 17:00:31 +00:00
|
|
|
parseAll(['./fpb/books']);
|
2021-10-22 19:04:23 +00:00
|
|
|
console.timeEnd('Parse Time');
|