Merge pull request #742 from Gluejar/fix-scraper

fix can_scrape
pull/46/head
eshellman 2018-01-01 19:25:26 -05:00 committed by GitHub
commit 8c6800e626
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 2 deletions

View File

@ -22,7 +22,7 @@ class BaseScraper(object):
designed to make at least a decent guess for webpages that embed metadata
'''
can_scrape_hosts = False
can_scrape_strings = [''] #should always return true
can_scrape_strings = False
@classmethod
def can_scrape(cls, url):
''' return True if the class can scrape the URL '''

View File

@ -22,7 +22,7 @@ class UbiquityScraper(BaseScraper):
def get_language(self):
langlabel = self.doc.find(string='Language')
lang = langlabel.parent.parent.find_next_sibling()
lang = langlabel.parent.parent.find_next_sibling() if langlabel else ''
lang = lang.get_text() if lang else ''
lang = get_language_code(lang) if lang else ''
if lang: