Merge pull request #745 from Gluejar/teak-scrapers

stricter RE
pull/46/head
eshellman 2018-01-04 16:56:15 -05:00 committed by GitHub
commit b73e41e92e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 1 deletions

View File

@ -91,7 +91,7 @@ class BaseScraper(object):
value = '' value = ''
list_mode = attrs.pop('list_mode', 'longest') list_mode = attrs.pop('list_mode', 'longest')
for meta_name in meta_list: for meta_name in meta_list:
attrs['name'] = re.compile(meta_name, flags=re.I) attrs['name'] = re.compile('^{}$'.format(meta_name), flags=re.I)
metas = self.doc.find_all('meta', attrs=attrs) metas = self.doc.find_all('meta', attrs=attrs)
if len(metas) == 0: if len(metas) == 0:
# some sites put schema.org metadata in metas # some sites put schema.org metadata in metas