2019-03-28 01:22:37 +00:00
|
|
|
import re
|
2017-12-23 23:29:16 +00:00
|
|
|
from django.conf.global_settings import LANGUAGES
|
|
|
|
|
2019-03-28 01:46:25 +00:00
|
|
|
lang2code = dict([(lang[1].lower(), lang[0]) for lang in LANGUAGES])
|
2018-04-13 18:39:03 +00:00
|
|
|
code2lang = dict(LANGUAGES)
|
2019-03-28 01:22:37 +00:00
|
|
|
iso639 = re.compile(r'^[a-z][a-z][a-z]?$')
|
|
|
|
|
2017-12-23 23:29:16 +00:00
|
|
|
|
|
|
|
def get_language_code(language):
|
2019-03-28 01:22:37 +00:00
|
|
|
if language is None or not language:
|
|
|
|
return ''
|
2018-04-13 18:39:03 +00:00
|
|
|
language = language.lower().strip()
|
2019-03-28 01:22:37 +00:00
|
|
|
language = sep.split(language)[0].strip()
|
2018-04-13 18:39:03 +00:00
|
|
|
if language in code2lang:
|
|
|
|
return language
|
2019-03-28 01:46:25 +00:00
|
|
|
|
2019-03-28 01:22:37 +00:00
|
|
|
# language names (english)
|
|
|
|
if language in lang2code:
|
|
|
|
return lang2code.get(language)
|
2019-03-28 01:46:25 +00:00
|
|
|
|
2019-03-28 01:22:37 +00:00
|
|
|
# mispellings and language names
|
|
|
|
if language in EXTRA_LANG_MAP:
|
|
|
|
return EXTRA_LANG_MAP.get(language)
|
2019-03-28 01:46:25 +00:00
|
|
|
|
2019-03-28 01:22:37 +00:00
|
|
|
# accept 2 and 3 letter codes
|
|
|
|
if iso639.match(language):
|
2019-03-28 01:46:25 +00:00
|
|
|
return language
|
2019-03-28 01:22:37 +00:00
|
|
|
return ''
|
|
|
|
|
2019-03-28 01:46:25 +00:00
|
|
|
# let's do a mapping of the DOAB languages into the language codes used
|
2019-03-28 01:22:37 +00:00
|
|
|
# mostly, we just handle mispellings
|
|
|
|
# also null -> xx
|
|
|
|
sep = re.compile(r'[ ;^,/\|\'\"\]\[\t\n\r\-]+')
|
|
|
|
lang_and_locale = re.compile(r'^[a-z][a-z]\-[A-Z][A-Z]$')
|
|
|
|
|
|
|
|
|
|
|
|
# mispellings and non-english language names
|
|
|
|
EXTRA_LANG_MAP = dict([
|
|
|
|
(u'chinese', 'de'),
|
|
|
|
(u'deutsch', 'de'),
|
|
|
|
(u'eng', 'en'),
|
|
|
|
(u'engli', 'en'),
|
|
|
|
(u'englilsh', 'en'),
|
|
|
|
(u'englilsh', 'en'),
|
|
|
|
(u'englisch', 'en'),
|
|
|
|
(u'espanol', 'es'),
|
|
|
|
(u'ger', 'de'),
|
|
|
|
(u'fra', 'fr'),
|
|
|
|
(u'fre', 'fr'),
|
|
|
|
(u'francese', 'fr'),
|
|
|
|
(u'ita', 'it'),
|
|
|
|
(u'itali', 'it'),
|
|
|
|
(u'italiano', 'it'),
|
|
|
|
(u'norwegian', 'no'),
|
|
|
|
(u'por', 'pt'),
|
|
|
|
(u'portugese', 'pt'),
|
|
|
|
(u'slovene', 'sl'),
|
|
|
|
(u'spa', 'es'),
|
|
|
|
(u'spagnolo', 'es'),
|
2021-05-27 12:35:20 +00:00
|
|
|
(u'un', 'xx'),
|
2019-03-28 01:22:37 +00:00
|
|
|
])
|
|
|
|
|
|
|
|
def lang_to_language_code(lang):
|
|
|
|
if lang is None:
|
|
|
|
return ''
|
|
|
|
lang = lang.strip()
|
2019-03-28 01:46:25 +00:00
|
|
|
|
2019-03-28 01:22:37 +00:00
|
|
|
#get codes like en-US
|
|
|
|
if lang_and_locale.match(lang):
|
|
|
|
return lang
|
|
|
|
|
|
|
|
# get first item of lists
|
|
|
|
for langitem in sep.split(lang):
|
|
|
|
if langitem:
|
|
|
|
lang = langitem
|
|
|
|
continue
|
|
|
|
|
|
|
|
code = get_language_code(lang)
|
|
|
|
if code:
|
|
|
|
return code
|
2019-03-28 01:46:25 +00:00
|
|
|
return ''
|
|
|
|
|