lint
parent
e837dd6ff2
commit
6dfa1bccb4
|
@ -4,41 +4,43 @@ methods to validate and clean identifiers
|
|||
'''
|
||||
import re
|
||||
import datetime
|
||||
from dateutil.parser import parse
|
||||
|
||||
from dateutil.parser import parse
|
||||
from PyPDF2 import PdfFileReader
|
||||
|
||||
from django.forms import ValidationError
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
||||
from regluit.pyepub import EPUB
|
||||
from regluit.mobi import Mobi
|
||||
from .isbn import ISBN
|
||||
|
||||
ID_VALIDATION = {
|
||||
'http': (re.compile(r"(https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+(/[^\s]*)?$",
|
||||
flags=re.IGNORECASE|re.S ),
|
||||
"The Web Address must be a valid http(s) URL."),
|
||||
'isbn': (r'^([\dxX\-–— ]+|delete)$',
|
||||
"The ISBN must be a valid ISBN-13."),
|
||||
'doab': (r'^(\d{1,6}|delete)$',
|
||||
"The value must be 1-6 digits."),
|
||||
flags=re.IGNORECASE|re.S),
|
||||
"The Web Address must be a valid http(s) URL."),
|
||||
'isbn': (r'^([\dxX\-–— ]+|delete)$',
|
||||
"The ISBN must be a valid ISBN-13."),
|
||||
'doab': (r'^(\d{1,6}|delete)$',
|
||||
"The value must be 1-6 digits."),
|
||||
'gtbg': (r'^(\d{1,6}|delete)$',
|
||||
"The Gutenberg number must be 1-6 digits."),
|
||||
'doi': (r'^(https?://dx\.doi\.org/|https?://doi\.org/)?(10\.\d+/\S+|delete)$',
|
||||
"The DOI value must be a valid DOI."),
|
||||
'oclc': (r'^(\d{8,12}|delete)$',
|
||||
"The OCLCnum must be 8 or more digits."),
|
||||
'goog': (r'^([a-zA-Z0-9\-_]{12}|delete)$',
|
||||
"The Google id must be 12 alphanumeric characters, dash or underscore."),
|
||||
'gdrd': (r'^(\d{1,8}|delete)$',
|
||||
"The Goodreads ID must be 1-8 digits."),
|
||||
'thng': (r'(^\d{1,8}|delete)$',
|
||||
"The LibraryThing ID must be 1-8 digits."),
|
||||
'olwk': (r'^(/works/\)?OLd{1,8}W|delete)$',
|
||||
"The Open Library Work ID looks like 'OL####W'."),
|
||||
'glue': (r'^(\d{1,6}|delete)$',
|
||||
"The Unglue.it ID must be 1-6 digits."),
|
||||
'ltwk': (r'^(\d{1,8}|delete)$',
|
||||
"The LibraryThing work ID must be 1-8 digits."),
|
||||
"The Gutenberg number must be 1-6 digits."),
|
||||
'doi': (r'^(https?://dx\.doi\.org/|https?://doi\.org/)?(10\.\d+/\S+|delete)$',
|
||||
"The DOI value must be a valid DOI."),
|
||||
'oclc': (r'^(\d{8,12}|delete)$',
|
||||
"The OCLCnum must be 8 or more digits."),
|
||||
'goog': (r'^([a-zA-Z0-9\-_]{12}|delete)$',
|
||||
"The Google id must be 12 alphanumeric characters, dash or underscore."),
|
||||
'gdrd': (r'^(\d{1,8}|delete)$',
|
||||
"The Goodreads ID must be 1-8 digits."),
|
||||
'thng': (r'(^\d{1,8}|delete)$',
|
||||
"The LibraryThing ID must be 1-8 digits."),
|
||||
'olwk': (r'^(/works/\)?OLd{1,8}W|delete)$',
|
||||
"The Open Library Work ID looks like 'OL####W'."),
|
||||
'glue': (r'^(\d{1,6}|delete)$',
|
||||
"The Unglue.it ID must be 1-6 digits."),
|
||||
'ltwk': (r'^(\d{1,8}|delete)$',
|
||||
"The LibraryThing work ID must be 1-8 digits."),
|
||||
}
|
||||
|
||||
def isbn_cleaner(value):
|
||||
|
@ -48,7 +50,7 @@ def isbn_cleaner(value):
|
|||
raise ValidationError('no identifier value found')
|
||||
elif value == 'delete':
|
||||
return value
|
||||
isbn=ISBN(value)
|
||||
isbn = ISBN(value)
|
||||
if isbn.error:
|
||||
raise ValidationError(isbn.error)
|
||||
isbn.validate()
|
||||
|
@ -59,7 +61,7 @@ def olwk_cleaner(value):
|
|||
value = '/works/{}'.format(value)
|
||||
return value
|
||||
|
||||
doi_match = re.compile( r'10\.\d+/\S+')
|
||||
doi_match = re.compile(r'10\.\d+/\S+')
|
||||
|
||||
def doi_cleaner(value):
|
||||
if not value == 'delete' and not value.startswith('10.'):
|
||||
|
@ -68,7 +70,7 @@ def doi_cleaner(value):
|
|||
except AttributeError:
|
||||
return ''
|
||||
return value
|
||||
|
||||
|
||||
ID_MORE_VALIDATION = {
|
||||
'isbn': isbn_cleaner,
|
||||
'olwk': olwk_cleaner,
|
||||
|
@ -105,18 +107,18 @@ def test_file(the_file, fformat):
|
|||
try:
|
||||
book = EPUB(the_file.file)
|
||||
except Exception as e:
|
||||
raise ValidationError(_('Are you sure this is an EPUB file?: %s' % e) )
|
||||
raise ValidationError(_('Are you sure this is an EPUB file?: %s' % e))
|
||||
elif fformat == 'mobi':
|
||||
try:
|
||||
book = Mobi(the_file.file)
|
||||
book.parse()
|
||||
except Exception as e:
|
||||
raise ValidationError(_('Are you sure this is a MOBI file?: %s' % e) )
|
||||
raise ValidationError(_('Are you sure this is a MOBI file?: %s' % e))
|
||||
elif fformat == 'pdf':
|
||||
try:
|
||||
doc = PdfFileReader(the_file.file)
|
||||
PdfFileReader(the_file.file)
|
||||
except Exception, e:
|
||||
raise ValidationError(_('%s is not a valid PDF file' % the_file.name) )
|
||||
raise ValidationError(_('%s is not a valid PDF file' % the_file.name))
|
||||
return True
|
||||
|
||||
def valid_xml_char_ordinal(c):
|
||||
|
@ -129,7 +131,7 @@ def valid_xml_char_ordinal(c):
|
|||
0x10000 <= codepoint <= 0x10FFFF
|
||||
)
|
||||
|
||||
def valid_subject( subject_name ):
|
||||
def valid_subject(subject_name):
|
||||
num_commas = 0
|
||||
for c in subject_name:
|
||||
if not valid_xml_char_ordinal(c):
|
||||
|
@ -176,7 +178,7 @@ def auth_cleaner(auth):
|
|||
is not a list of author names'''
|
||||
cleaned = []
|
||||
if ';' in auth or reversed_name.match(auth):
|
||||
authlist = semicolon_list_delim.split(auth)
|
||||
authlist = semicolon_list_delim.split(auth)
|
||||
authlist = [unreverse_name(name) for name in authlist]
|
||||
else:
|
||||
auth = _and_.sub(',', auth)
|
||||
|
@ -193,12 +195,11 @@ def validate_date(date_string):
|
|||
if ymd:
|
||||
return ymd.group(0)
|
||||
try:
|
||||
date = parse(date_string.strip(), default=datetime.date(999,1,1))
|
||||
date = parse(date_string.strip(), default=datetime.date(999, 1, 1))
|
||||
if date.year != 999:
|
||||
return date.strftime('%Y')
|
||||
except ValueError:
|
||||
year = MATCHYEAR.search(date_string)
|
||||
if year:
|
||||
return year.group(0)
|
||||
else:
|
||||
return ''
|
||||
return ''
|
||||
|
|
Loading…
Reference in New Issue