add get_role to scraper

pull/46/head
eric 2017-12-07 17:35:52 -05:00
parent a3f1509cc2
commit 5ccd7a0a47
2 changed files with 13 additions and 4 deletions

View File

@ -242,20 +242,24 @@ class BaseScraper(object):
return []
return value_list
def get_role(self):
return 'author'
def get_authors(self):
role = self.get_role()
value_list = self.get_author_list()
creator_list = []
value_list = authlist_cleaner(value_list)
if len(value_list) == 0:
return
if len(value_list) == 1:
self.set('creator', {'author': {'agent_name': value_list[0]}})
self.set('creator', {role: {'agent_name': value_list[0]}})
return
for auth in value_list:
for auth in value_list:
creator_list.append({'agent_name': auth})
self.set('creator', {'authors': creator_list })
self.set('creator', {'{}s'.format(role): creator_list })
def get_cover(self):
image_url = self.check_metas(['og.image', 'image', 'twitter:image'])
if not image_url:

View File

@ -75,6 +75,11 @@ class SpringerScraper(BaseScraper):
if not value:
(SpringerScraper, self).get_title()
def get_role(self):
if self.doc.select_one('#editors'):
return 'editor'
return 'author'
def get_author_list(self):
for el in self.doc.select('.authors__name'):
yield el.text.strip().replace(u'\xa0', u' ')