Merge remote branch 'origin/master' into search
Conflicts: readthedocs/restapi/views.pyrtd2
commit
ab871ad889
|
@ -14,6 +14,7 @@ django-profiles==0.2
|
|||
django-secure==0.1.2
|
||||
django==1.4.8
|
||||
docutils==0.8.1
|
||||
elasticsearch==0.4.3
|
||||
github2==0.5.2
|
||||
httplib2==0.7.2
|
||||
mercurial==2.6.3
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
import json
|
||||
import hashlib
|
||||
|
||||
from django.shortcuts import get_object_or_404
|
||||
from django.template import Template, Context
|
||||
from django.conf import settings
|
||||
|
@ -7,15 +10,13 @@ from elasticsearch import Elasticsearch
|
|||
from rest_framework import decorators, permissions, viewsets, status
|
||||
from rest_framework.renderers import JSONPRenderer, JSONRenderer, BrowsableAPIRenderer
|
||||
from rest_framework.response import Response
|
||||
import json
|
||||
import hashlib
|
||||
import requests
|
||||
|
||||
from betterversion.better import version_windows, BetterVersion
|
||||
from builds.models import Version
|
||||
from djangome import views as djangome
|
||||
from search.indexes import Page as PageIndex, Project as ProjectIndex
|
||||
from projects.models import Project, EmailHook
|
||||
from search.indexes import Page
|
||||
|
||||
from .serializers import ProjectSerializer
|
||||
from .permissions import RelatedProjectIsOwner
|
||||
|
@ -181,23 +182,6 @@ def quick_search(request):
|
|||
ret_dict[key] = value
|
||||
return Response({"results": ret_dict})
|
||||
|
||||
@decorators.api_view(['GET'])
|
||||
@decorators.permission_classes((permissions.AllowAny,))
|
||||
@decorators.renderer_classes((JSONRenderer, JSONPRenderer, BrowsableAPIRenderer))
|
||||
def search(request):
|
||||
project_slug = request.GET.get('project', None)
|
||||
if not project_slug:
|
||||
return Response({'error': 'project GET argument required'}, status=status.HTTP_400_BAD_REQUEST)
|
||||
version_slug = request.GET.get('version', 'latest')
|
||||
query = request.GET.get('q', None)
|
||||
es = Elasticsearch(settings.ES_HOSTS)
|
||||
ret_dict = {}
|
||||
results = es.query({'project': project_slug, 'version': version_slug, 'query': query})
|
||||
for result in results:
|
||||
#ret_dict[result['key']] = result['url']
|
||||
pass
|
||||
return Response({"results": ret_dict})
|
||||
|
||||
@decorators.api_view(['POST'])
|
||||
@decorators.permission_classes((permissions.IsAdminUser,))
|
||||
@decorators.renderer_classes((JSONRenderer, JSONPRenderer, BrowsableAPIRenderer))
|
||||
|
@ -233,3 +217,45 @@ def index_search(request):
|
|||
index_list.append(page)
|
||||
page_obj.bulk_index(index_list, parent=project_pk)
|
||||
return Response({'indexed': True})
|
||||
|
||||
@decorators.api_view(['GET'])
|
||||
@decorators.permission_classes((permissions.AllowAny,))
|
||||
@decorators.renderer_classes((JSONRenderer, JSONPRenderer, BrowsableAPIRenderer))
|
||||
def search(request):
|
||||
project_id = request.GET.get('project', None)
|
||||
version_slug = request.GET.get('version', 'latest')
|
||||
query = request.GET.get('q', None)
|
||||
|
||||
if project_id:
|
||||
# This is a search within a project -- do a Page search.
|
||||
body = {
|
||||
'filter': {
|
||||
'term': {'project': project_id},
|
||||
'term': {'version': version_slug},
|
||||
},
|
||||
'query': {
|
||||
'bool': {
|
||||
'should': [
|
||||
{'match': {'title': {'query': query, 'boost': 10}}},
|
||||
{'match': {'headers': {'query': query, 'boost': 5}}},
|
||||
{'match': {'content': {'query': query}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
results = PageIndex().search(body, routing=project_id)
|
||||
|
||||
else:
|
||||
body = {
|
||||
'query': {
|
||||
'bool': {
|
||||
'should': [
|
||||
{'match': {'name': {'query': query, 'boost': 10}}},
|
||||
{'match': {'description': {'query': query}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
results = ProjectIndex().search(body)
|
||||
|
||||
return Response({'results': results})
|
||||
|
|
|
@ -0,0 +1,267 @@
|
|||
"""
|
||||
Search indexing classes to index into Elasticsearch.
|
||||
|
||||
Django settings that should be defined:
|
||||
|
||||
`ES_HOSTS`: A list of hosts where Elasticsearch lives. E.g.
|
||||
['192.168.1.1:9200', '192.168.2.1:9200']
|
||||
|
||||
`ES_DEFAULT_NUM_REPLICAS`: An integer of the number of replicas.
|
||||
|
||||
`ES_DEFAULT_NUM_SHARDS`: An integer of the number of shards.
|
||||
|
||||
|
||||
TODO: Handle page removal case in Page.
|
||||
|
||||
"""
|
||||
import datetime
|
||||
|
||||
from elasticsearch import Elasticsearch, exceptions
|
||||
from elasticsearch.helpers import bulk_index
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
class Index(object):
|
||||
"""
|
||||
Base class to define some common methods across indexes.
|
||||
"""
|
||||
# The _index and _type define the URL path to Elasticsearch, e.g.:
|
||||
# http://localhost:9200/{_index}/{_type}/_search
|
||||
_index = 'readthedocs'
|
||||
_type = None
|
||||
|
||||
def __init__(self):
|
||||
self.es = Elasticsearch(settings.ES_HOSTS)
|
||||
|
||||
def get_settings(self, settings_override=None):
|
||||
"""
|
||||
Returns settings to be passed to ES create_index.
|
||||
|
||||
If `settings_override` is provided, this will use `settings_override`
|
||||
to override the defaults defined here.
|
||||
|
||||
"""
|
||||
default_settings = {
|
||||
'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS,
|
||||
'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS,
|
||||
'refresh_interval': '5s',
|
||||
'store.compress.tv': True,
|
||||
'store.compress.stored': True,
|
||||
'analysis': self.get_analysis(),
|
||||
}
|
||||
if settings_override:
|
||||
default_settings.update(settings_override)
|
||||
|
||||
return default_settings
|
||||
|
||||
def get_analysis(self):
|
||||
"""
|
||||
Returns the analysis dict to be used in settings for create_index.
|
||||
|
||||
For languages that ES supports we define either the minimal or light
|
||||
stemming, which isn't as aggresive as the snowball stemmer. We also
|
||||
define the stopwords for that language.
|
||||
|
||||
For all languages we've customized we're using the ICU plugin.
|
||||
|
||||
"""
|
||||
analyzers = {}
|
||||
filters = {}
|
||||
|
||||
# The default is used for fields that need ICU but are composed of
|
||||
# many languages.
|
||||
analyzers['default_icu'] = {
|
||||
'type': 'custom',
|
||||
'tokenizer': 'icu_tokenizer',
|
||||
'filter': ['word_delimiter', 'icu_folding', 'icu_normalizer'],
|
||||
}
|
||||
|
||||
# Customize the word_delimiter filter to set various options.
|
||||
filters['custom_word_delimiter'] = {
|
||||
'type': 'word_delimiter',
|
||||
'preserve_original': True,
|
||||
}
|
||||
|
||||
return {
|
||||
'analyzer': analyzers,
|
||||
'filter': filters,
|
||||
}
|
||||
|
||||
def timestamped_index(self):
|
||||
return '{0}-{1}'.format(
|
||||
self._index, datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
|
||||
|
||||
def create_index(self, index=None):
|
||||
"""
|
||||
Creates index.
|
||||
|
||||
This uses `get_settings` and `get_mappings` to define the index.
|
||||
|
||||
"""
|
||||
index = index or self._index
|
||||
body = {
|
||||
'settings': self.get_settings(),
|
||||
}
|
||||
self.es.indices.create(index=index, body=body)
|
||||
|
||||
def put_mapping(self, index=None):
|
||||
index = index or self._index
|
||||
self.es.indices.put_mapping(index, self._type, self.get_mapping())
|
||||
|
||||
def bulk_index(self, data, index=None, chunk_size=500, parent=None):
|
||||
"""
|
||||
Given a list of documents, uses Elasticsearch bulk indexing.
|
||||
|
||||
For each doc this calls `extract_document`, then indexes.
|
||||
|
||||
`chunk_size` defaults to the elasticsearch lib's default. Override per
|
||||
your document size as needed.
|
||||
|
||||
"""
|
||||
index = index or self._index
|
||||
docs = []
|
||||
for d in data:
|
||||
source = self.extract_document(d)
|
||||
doc = {
|
||||
'_index': index,
|
||||
'_type': self._type,
|
||||
'_id': source['id'],
|
||||
'_source': source,
|
||||
}
|
||||
if parent:
|
||||
doc['_parent'] = parent
|
||||
docs.append(doc)
|
||||
|
||||
bulk_index(self.es, docs, chunk_size=chunk_size)
|
||||
|
||||
def index_document(self, data, index=None, parent=None):
|
||||
index = index or self._index
|
||||
doc = self.extract_document(data)
|
||||
self.es.index(index=index, doc_type=self._type, body=doc, id=doc['id'],
|
||||
parent=parent)
|
||||
|
||||
def get_mapping(self):
|
||||
"""
|
||||
Returns the mapping for this _index and _type.
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
def extract_document(self, pk, obj):
|
||||
"""
|
||||
Extracts the Elasticsearch document for this object instance.
|
||||
"""
|
||||
raise NotImplemented
|
||||
|
||||
def update_aliases(self, new_index, delete=True):
|
||||
"""
|
||||
Points `_index` to `new_index` and deletes `_index` if delete=True.
|
||||
|
||||
The ES `update_aliases` is atomic.
|
||||
"""
|
||||
old_index = None
|
||||
|
||||
# Get current alias, if any.
|
||||
try:
|
||||
aliases = self.es.indices.get_alias(name=self._index)
|
||||
if aliases and aliases.keys():
|
||||
old_index = aliases.keys()[0]
|
||||
except exceptions.NotFoundError:
|
||||
pass
|
||||
|
||||
actions = []
|
||||
if old_index:
|
||||
actions.append({'remove': {'index': old_index,
|
||||
'alias': self._index}})
|
||||
actions.append({'add': {'index': new_index, 'alias': self._index}})
|
||||
|
||||
self.es.indices.update_aliases(body={'actions': actions})
|
||||
|
||||
# Delete old index if any and if specified.
|
||||
if delete and old_index:
|
||||
self.es.indices.delete(index=old_index)
|
||||
|
||||
def search(self, body, **kwargs):
|
||||
return self.es.search(index=self._index, doc_type=self._type,
|
||||
body=body, **kwargs)
|
||||
|
||||
|
||||
class Project(Index):
|
||||
|
||||
_type = 'project'
|
||||
|
||||
def get_mapping(self):
|
||||
mapping = {
|
||||
self._type: {
|
||||
# Disable _all field to reduce index size.
|
||||
'_all': {'enabled': False},
|
||||
# Add a boost field to enhance relevancy of a document.
|
||||
'_boost': {'name': '_boost', 'null_value': 1.0},
|
||||
'properties': {
|
||||
'id': {'type': 'long'},
|
||||
'name': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
'slug': {'type': 'string', 'index': 'not_analyzed'},
|
||||
'description': {'type': 'string',
|
||||
'analyzer': 'default_icu'},
|
||||
'lang': {'type': 'string', 'index': 'not_analyzed'},
|
||||
'author': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
'url': {'type': 'string', 'index': 'not_analyzed'},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mapping
|
||||
|
||||
def extract_document(self, data):
|
||||
doc = {}
|
||||
|
||||
attrs = ('id', 'name', 'description', 'author', 'url')
|
||||
for attr in attrs:
|
||||
doc[attr] = data.get(attr, '')
|
||||
|
||||
# Add project boost.
|
||||
doc['_boost'] = data.get('_boost', 1.0)
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
class Page(Index):
|
||||
|
||||
_type = 'page'
|
||||
_parent = 'project'
|
||||
|
||||
def get_mapping(self):
|
||||
mapping = {
|
||||
self._type: {
|
||||
# Disable _all field to reduce index size.
|
||||
'_all': {'enabled': False},
|
||||
# Add a boost field to enhance relevancy of a document.
|
||||
'_boost': {'name': '_boost', 'null_value': 1.0},
|
||||
# Associate a page with a project.
|
||||
'_parent': {'type': self._parent},
|
||||
'properties': {
|
||||
'id': {'type': 'string', 'index': 'not_analyzed'},
|
||||
'project': {'type': 'long'},
|
||||
'title': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
'headers': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
'version': {'type': 'string', 'index': 'not_analyzed'},
|
||||
'path': {'type': 'string', 'index': 'not_analyzed'},
|
||||
'content': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mapping
|
||||
|
||||
def extract_document(self, data):
|
||||
doc = {}
|
||||
|
||||
attrs = ('id', 'project', 'title', 'headers', 'version', 'path',
|
||||
'content')
|
||||
for attr in attrs:
|
||||
doc[attr] = data.get(attr, '')
|
||||
|
||||
# Add page boost.
|
||||
doc['_boost'] = data.get('_boost', 1.0)
|
||||
|
||||
return doc
|
|
@ -173,6 +173,10 @@ HAYSTACK_CONNECTIONS = {
|
|||
},
|
||||
}
|
||||
|
||||
# Elasticsearch settings.
|
||||
ES_HOSTS = ['127.0.0.1:9200']
|
||||
ES_DEFAULT_NUM_REPLICAS = 0
|
||||
ES_DEFAULT_NUM_SHARDS = 5
|
||||
|
||||
AUTH_PROFILE_MODULE = "core.UserProfile"
|
||||
SOUTH_TESTS_MIGRATE = False
|
||||
|
|
Loading…
Reference in New Issue