add get_role to scraper
parent
a3f1509cc2
commit
5ccd7a0a47
|
@ -242,20 +242,24 @@ class BaseScraper(object):
|
|||
return []
|
||||
return value_list
|
||||
|
||||
def get_role(self):
|
||||
return 'author'
|
||||
|
||||
def get_authors(self):
|
||||
role = self.get_role()
|
||||
value_list = self.get_author_list()
|
||||
creator_list = []
|
||||
value_list = authlist_cleaner(value_list)
|
||||
if len(value_list) == 0:
|
||||
return
|
||||
if len(value_list) == 1:
|
||||
self.set('creator', {'author': {'agent_name': value_list[0]}})
|
||||
self.set('creator', {role: {'agent_name': value_list[0]}})
|
||||
return
|
||||
for auth in value_list:
|
||||
for auth in value_list:
|
||||
creator_list.append({'agent_name': auth})
|
||||
|
||||
self.set('creator', {'authors': creator_list })
|
||||
|
||||
self.set('creator', {'{}s'.format(role): creator_list })
|
||||
|
||||
def get_cover(self):
|
||||
image_url = self.check_metas(['og.image', 'image', 'twitter:image'])
|
||||
if not image_url:
|
||||
|
|
|
@ -75,6 +75,11 @@ class SpringerScraper(BaseScraper):
|
|||
if not value:
|
||||
(SpringerScraper, self).get_title()
|
||||
|
||||
def get_role(self):
|
||||
if self.doc.select_one('#editors'):
|
||||
return 'editor'
|
||||
return 'author'
|
||||
|
||||
def get_author_list(self):
|
||||
for el in self.doc.select('.authors__name'):
|
||||
yield el.text.strip().replace(u'\xa0', u' ')
|
||||
|
|
Loading…
Reference in New Issue