verify/parse language codes

pull/3/head
nrfq 2021-10-15 18:06:27 -04:00
parent 8095c60067
commit 8a450948c6
2 changed files with 209 additions and 8 deletions

View File

@ -1,6 +1,7 @@
const fs = require('fs');
const path = require('path');
const remark = require('remark');
const languages = require('./languages')
/**
* Parses markdown from a file and returns an AST.
@ -17,8 +18,9 @@ const remark = require('remark');
*/
let getAST = function(filename, outputFile="tree.json"){
//import test markdown file
let mk;
try{
let mk = fs.readFileSync(path.join(__dirname, filename), "utf8");
mk = fs.readFileSync(path.join(__dirname, filename), "utf8");
}
catch(e){
throw new Error("Could not open input file. Make sure the file exists.");
@ -48,10 +50,13 @@ let getAST = function(filename, outputFile="tree.json"){
*
* @return {Object} Returns a JSON object, exact format still in progress.
*/
let exportJSON = function(children, outputFile="root.json"){
let exportJSON = function(children, langCode, mediaType, outputFile="root.json"){
// This will be the JSON to export
let rootJSON = {
type: 'root',
langCode: langCode,
language: languages[langCode],
mediaType: mediaType,
children: children
};
fs.writeFile(outputFile, JSON.stringify(rootJSON, null, 3), function(err) {
@ -77,8 +82,8 @@ let parseListItem = function(listItem){
return "NEEDS TO BE IMPLEMENTED";
}
let main = function(args){
let tree = getAST(args[0]);
let main = function(inputFile, langCode, mediaType, outputFile){
let tree = getAST(inputFile);
let children = []; // This will go into root object later
let currentDepth = 3;
@ -128,15 +133,23 @@ let main = function(args){
}
}
}
let rootJSON = exportJSON(children, args[1]);
let rootJSON = exportJSON(children, langCode, mediaType, outputFile);
}
let usageMessage = "Usage:\n node index.js [input_file] [output_file]\n"
let args = process.argv.slice(2);
if(args.length < 1)
throw new SyntaxError(`Please provide an input filename.\n${usageMessage}`);
throw new Error(`Please provide an input filename.\n${usageMessage}`);
if(args.length > 2)
throw new SyntaxError(`Too many arguments.\n${usageMessage}`);
throw new Error(`Too many arguments.\n${usageMessage}`);
if(args.length == 1)
args[1] = "root.json";
main(args);
let fileRegex= /\S+-\S+.md/;
if(!fileRegex.test(args[0]))
throw new Error(`Wrong filename format. Input must be in the format 'free-media-type-languageCode.md`);
let splitPoint = args[0].lastIndexOf("-");
let langCode = args[0].substring(splitPoint + 1, args[0].length-3);
if(!languages.hasOwnProperty(langCode))
throw new Error('The language code must be in the ISO 639-1 set.');
let mediaType = args[0].substring(0, splitPoint);
main(args[0], langCode, mediaType, args[1]);

188
languages.js Normal file
View File

@ -0,0 +1,188 @@
const languages = {
'ab': 'Abkhazian',
'aa': 'Afar',
'af': 'Afrikaans',
'ak': 'Akan',
'sq': 'Albanian',
'am': 'Amharic',
'ar': 'Arabic',
'an': 'Aragonese',
'hy': 'Armenian',
'as': 'Assamese',
'av': 'Avaric',
'ae': 'Avestan',
'ay': 'Aymara',
'az': 'Azerbaijani',
'bm': 'Bambara',
'ba': 'Bashkir',
'eu': 'Basque',
'be': 'Belarusian',
'bn': 'Bengali',
'bh': 'Bihari languages',
'bi': 'Bislama',
'bs': 'Bosnian',
'br': 'Breton',
'bg': 'Bulgarian',
'my': 'Burmese',
'ca': 'Catalan, Valencian',
'km': 'Central Khmer',
'ch': 'Chamorro',
'ce': 'Chechen',
'ny': 'Chichewa, Chewa, Nyanja',
'zh': 'Chinese',
'cu': 'Church Slavonic, Old Bulgarian, Old Church Slavonic',
'cv': 'Chuvash',
'kw': 'Cornish',
'co': 'Corsican',
'cr': 'Cree',
'hr': 'Croatian',
'cs': 'Czech',
'da': 'Danish',
'dv': 'Divehi, Dhivehi, Maldivian',
'nl': 'Dutch, Flemish',
'dz': 'Dzongkha',
'en': 'English',
'eo': 'Esperanto',
'et': 'Estonian',
'ee': 'Ewe',
'fo': 'Faroese',
'fj': 'Fijian',
'fi': 'Finnish',
'fr': 'French',
'ff': 'Fulah',
'gd': 'Gaelic, Scottish Gaelic',
'gl': 'Galician',
'lg': 'Ganda',
'ka': 'Georgian',
'de': 'German',
'ki': 'Gikuyu, Kikuyu',
'el': 'Greek (Modern)',
'kl': 'Greenlandic, Kalaallisut',
'gn': 'Guarani',
'gu': 'Gujarati',
'ht': 'Haitian, Haitian Creole',
'ha': 'Hausa',
'he': 'Hebrew',
'hz': 'Herero',
'hi': 'Hindi',
'ho': 'Hiri Motu',
'hu': 'Hungarian',
'is': 'Icelandic',
'io': 'Ido',
'ig': 'Igbo',
'id': 'Indonesian',
'ia': 'Interlingua (International Auxiliary Language Association)',
'ie': 'Interlingue',
'iu': 'Inuktitut',
'ik': 'Inupiaq',
'ga': 'Irish',
'it': 'Italian',
'ja': 'Japanese',
'jv': 'Javanese',
'kn': 'Kannada',
'kr': 'Kanuri',
'ks': 'Kashmiri',
'kk': 'Kazakh',
'rw': 'Kinyarwanda',
'kv': 'Komi',
'kg': 'Kongo',
'ko': 'Korean',
'kj': 'Kwanyama, Kuanyama',
'ku': 'Kurdish',
'ky': 'Kyrgyz',
'lo': 'Lao',
'la': 'Latin',
'lv': 'Latvian',
'lb': 'Letzeburgesch, Luxembourgish',
'li': 'Limburgish, Limburgan, Limburger',
'ln': 'Lingala',
'lt': 'Lithuanian',
'lu': 'Luba-Katanga',
'mk': 'Macedonian',
'mg': 'Malagasy',
'ms': 'Malay',
'ml': 'Malayalam',
'mt': 'Maltese',
'gv': 'Manx',
'mi': 'Maori',
'mr': 'Marathi',
'mh': 'Marshallese',
'ro': 'Moldovan, Moldavian, Romanian',
'mn': 'Mongolian',
'na': 'Nauru',
'nv': 'Navajo, Navaho',
'nd': 'Northern Ndebele',
'ng': 'Ndonga',
'ne': 'Nepali',
'se': 'Northern Sami',
'no': 'Norwegian',
'nb': 'Norwegian Bokmål',
'nn': 'Norwegian Nynorsk',
'ii': 'Nuosu, Sichuan Yi',
'oc': 'Occitan (post 1500)',
'oj': 'Ojibwa',
'or': 'Oriya',
'om': 'Oromo',
'os': 'Ossetian, Ossetic',
'pi': 'Pali',
'pa': 'Panjabi, Punjabi',
'ps': 'Pashto, Pushto',
'fa': 'Persian',
'pl': 'Polish',
'pt': 'Portuguese',
'qu': 'Quechua',
'rm': 'Romansh',
'rn': 'Rundi',
'ru': 'Russian',
'sm': 'Samoan',
'sg': 'Sango',
'sa': 'Sanskrit',
'sc': 'Sardinian',
'sr': 'Serbian',
'sn': 'Shona',
'sd': 'Sindhi',
'si': 'Sinhala, Sinhalese',
'sk': 'Slovak',
'sl': 'Slovenian',
'so': 'Somali',
'st': 'Sotho, Southern',
'nr': 'South Ndebele',
'es': 'Spanish, Castilian',
'su': 'Sundanese',
'sw': 'Swahili',
'ss': 'Swati',
'sv': 'Swedish',
'tl': 'Tagalog',
'ty': 'Tahitian',
'tg': 'Tajik',
'ta': 'Tamil',
'tt': 'Tatar',
'te': 'Telugu',
'th': 'Thai',
'bo': 'Tibetan',
'ti': 'Tigrinya',
'to': 'Tonga (Tonga Islands)',
'ts': 'Tsonga',
'tn': 'Tswana',
'tr': 'Turkish',
'tk': 'Turkmen',
'tw': 'Twi',
'ug': 'Uighur, Uyghur',
'uk': 'Ukrainian',
'ur': 'Urdu',
'uz': 'Uzbek',
've': 'Venda',
'vi': 'Vietnamese',
'vo': 'Volap_k',
'wa': 'Walloon',
'cy': 'Welsh',
'fy': 'Western Frisian',
'wo': 'Wolof',
'xh': 'Xhosa',
'yi': 'Yiddish',
'yo': 'Yoruba',
'za': 'Zhuang, Chuang',
'zu': 'Zulu',
};
module.exports = languages;