Merge pull request #748 from Gluejar/disallow-search

Disallow search
pull/46/head
eshellman 2018-01-12 12:09:40 -05:00 committed by GitHub
commit 029c507c66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 4 deletions

View File

@ -4,6 +4,7 @@ User-agent: *
Disallow: /accounts/
Disallow: /feedback/
Disallow: /socialauth/
Disallow: /search/
Disallow: /googlebooks/
{% else %}
User-agent: *

View File

@ -0,0 +1,45 @@
{% extends 'base.html' %}
{% load humanize %}
{% block title %}—
{% if work.is_free %}
{{ work.title }} is a Free eBook. {% for fmt in work.formats %}[{{ fmt }}]{% endfor %}
{% else %}
Help us make {{ work.title }} a Free eBook!
{% endif %}{% if action == 'editions' %} All Editions{% endif %}
{% endblock %}
{% block extra_meta %}
<meta property="og:title" content="{{ work.title }}" />
<meta property="og:type" content="book" />
<meta property="og:url" content="https://unglue.it{% url 'work' work.id %}" />
<meta property="og:image" content="{{ work.cover_image_thumbnail }}" />
<meta property="og:site_name" content="Unglue.it" />
{% for author in work.relators %}<meta property="book:author" content="{{ author.name }}" />{% endfor %}
{% if work.first_isbn_13 %}<meta property="book:isbn" content="{{ work.first_isbn_13 }}" />{% endif %}
{% endblock %}
{% block topsection %}
<div class="book-detail-info">
<div class="layout">
<h2 class="book-name" itemprop="name">{{ work.title }}</h2>
<div>
<div class="pubinfo">
<h3 class="book-author">
<span itemprop="author"><a href="{% url 'search' %}?q={{ work.relators.0.author.name|urlencode }}&amp;ty=au" >{{ work.relators.0.name }}</a></span>{% if work.authors.count == 2 %}
and <span itemprop="author"><a href="{% url 'search' %}?q={{ work.relators.1.author.name|urlencode }}&amp;ty=au" >{{ work.relators.1.name }}</a></span>
{% endif %}{% if work.relators.count > 2 %}{% for author in work.relators %}{% if not forloop.first %}, <span itemprop="author"><a href="{% url 'search' %}?q={{ author.author.name|urlencode }}&amp;ty=au" >{{ author.name }}</a></span>{% endif %}{% endfor %}
{% endif %}
</h3>
<h3 class="book-year">
{% if work.last_campaign.publisher %}
<span itemprop="publisher"><a href="{% url 'bypubname_list' work.last_campaign.publisher.name.id %}">{{ work.last_campaign.publisher }}</a></span>
{% endif %}
<span itemprop="datePublished">{{ work.publication_date }}</span>
<meta itemprop="inLanguage" content="work.language" />
<meta itemprop="typicalAgeRange" content="work.age_range" />
</h3>
</div>
</div>
</div>
{% endblock %}

View File

@ -63,6 +63,7 @@ class RhPageTests(TestCase):
def test_anonymous(self):
anon_client = Client()
r = anon_client.get("/work/{}/".format(self.work.id))
r = anon_client.head("/work/{}/".format(self.work.id))
self.assertEqual(r.status_code, 200)
csrfmatch = re.search("name='csrfmiddlewaretoken' value='([^']*)'", r.content)
self.assertFalse(csrfmatch)

View File

@ -300,7 +300,8 @@ def acks(request, work):
def work(request, work_id, action='display'):
work = safe_get_work(work_id)
alert = ''
if request.method == "HEAD":
return render(request, 'worksummary.html', {'work': work,})
formset = None
if action == "acks":
return acks(request, work)
@ -524,11 +525,20 @@ def manage_ebooks(request, edition_id, by=None):
})
BAD_ROBOTS = [u'memoryBot']
def is_bad_robot(request):
user_agent = request.META.get('HTTP_USER_AGENT', '')
for robot in BAD_ROBOTS:
if robot in user_agent:
return True
return False
def googlebooks(request, googlebooks_id):
try:
edition = models.Identifier.objects.get(type='goog', value=googlebooks_id).edition
except models.Identifier.DoesNotExist:
if is_bad_robot(request):
return HttpResponseNotFound("failed looking up googlebooks id %s" % googlebooks_id)
try:
edition = bookloader.add_by_googlebooks_id(googlebooks_id)
if edition.new:
@ -1916,12 +1926,18 @@ def search(request):
results = models.Work.objects.none()
break
else:
results = gluejar_search(q, user_ip=request.META['REMOTE_ADDR'], page=1)
gbo = 'y'
if is_bad_robot(request):
results = models.Work.objects.none()
else:
results = gluejar_search(q, user_ip=request.META['REMOTE_ADDR'], page=1)
gbo = 'y'
else:
if gbo == 'n':
page = page-1 # because page=1 is the unglue.it results
results = gluejar_search(q, user_ip=request.META['REMOTE_ADDR'], page=page)
if is_bad_robot(request):
results = models.Work.objects.none()
else:
results = gluejar_search(q, user_ip=request.META['REMOTE_ADDR'], page=page)
campaign_works = None
# flag search result as on wishlist as appropriate