Elasticsearch 1.5.0 Upgrade, including:
- Many thanks to @robhudson, who wrote the original 0.90 -> 1.0+ ElasticSearch query code. - Updating pip elasticsearch requirement to 1.5.0 for the new server. - Refactor argument order to match new API. - Updated to boost relevance in a Elasticsearch >= 1.0 way - Removing score script and adding in field_value_factor per @robhudson's insight. - Adding default project_scale and page_scale values. - Found the source of the 400 error on _bulk, and noted.front-end-standardization
parent
c5aababca0
commit
7b78791166
|
@ -42,6 +42,6 @@ class Command(BaseCommand):
|
|||
commit = None
|
||||
try:
|
||||
page_list = parse_json.process_all_json_files(version, build_dir=False)
|
||||
index_search_request(version=version, page_list=page_list, commit=commit)
|
||||
index_search_request(version=version, page_list=page_list, commit=commit, project_scale=0, page_scale=0)
|
||||
except Exception:
|
||||
log.error('Build failed for %s' % version, exc_info=True)
|
||||
|
|
|
@ -686,7 +686,7 @@ def update_search(version_pk, commit):
|
|||
|
||||
log_msg = ' '.join([page['path'] for page in page_list])
|
||||
log.info("(Search Index) Sending Data: %s [%s]" % (version.project.slug, log_msg))
|
||||
index_search_request(version=version, page_list=page_list, commit=commit)
|
||||
index_search_request(version=version, page_list=page_list, commit=commit, project_scale=0, page_scale=0)
|
||||
|
||||
|
||||
@task(queue='web')
|
||||
|
|
|
@ -86,13 +86,13 @@ def delete_versions(project, version_data):
|
|||
return set()
|
||||
|
||||
|
||||
def index_search_request(version, page_list, commit):
|
||||
def index_search_request(version, page_list, commit, project_scale, page_scale):
|
||||
log_msg = ' '.join([page['path'] for page in page_list])
|
||||
log.info("(Server Search) Indexing Pages: %s [%s]" % (
|
||||
version.project.slug, log_msg))
|
||||
project = version.project
|
||||
page_obj = PageIndex()
|
||||
project_scale = 1
|
||||
section_obj = SectionIndex()
|
||||
|
||||
#tags = [tag.name for tag in project.tags.all()]
|
||||
|
||||
|
@ -106,13 +106,13 @@ def index_search_request(version, page_list, commit):
|
|||
'author': [user.username for user in project.users.all()],
|
||||
'url': project.get_absolute_url(),
|
||||
'tags': None,
|
||||
'_boost': project_scale,
|
||||
'weight': project_scale,
|
||||
})
|
||||
|
||||
index_list = []
|
||||
section_index_list = []
|
||||
for page in page_list:
|
||||
log.debug("(API Index) %s:%s" % (project.slug, page['path']))
|
||||
page_scale = 1
|
||||
page_id = hashlib.md5('%s-%s-%s' % (project.slug, version.slug, page['path'])).hexdigest()
|
||||
index_list.append({
|
||||
'id': page_id,
|
||||
|
@ -124,27 +124,38 @@ def index_search_request(version, page_list, commit):
|
|||
'content': page['content'],
|
||||
'taxonomy': None,
|
||||
'commit': commit,
|
||||
'_boost': page_scale + project_scale,
|
||||
'weight': page_scale + project_scale,
|
||||
})
|
||||
for section in page['sections']:
|
||||
section_index_list.append({
|
||||
'id': hashlib.md5('%s-%s-%s-%s' % (project.slug, version.slug, page['path'], section['id'])).hexdigest(),
|
||||
'project': project.slug,
|
||||
'version': version.slug,
|
||||
'path': page['path'],
|
||||
'page_id': section['id'],
|
||||
'title': section['title'],
|
||||
'content': section['content'],
|
||||
'weight': page_scale,
|
||||
})
|
||||
section_obj.bulk_index(section_index_list, parent=page_id, routing=project.slug)
|
||||
|
||||
page_obj.bulk_index(index_list, parent=project.slug)
|
||||
|
||||
log.info("(Server Search) Deleting files not in commit: %s" % commit)
|
||||
# Figure this out later
|
||||
# TODO: AK Make sure this works
|
||||
delete_query = {
|
||||
# ES .90 doesn't wrap this
|
||||
#"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{"term": {"project": project.slug, }},
|
||||
{"term": {"version": version.slug, }},
|
||||
],
|
||||
"must_not": {
|
||||
"term": {
|
||||
"commit": commit
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{"term": {"project": project.slug, }},
|
||||
{"term": {"version": version.slug, }},
|
||||
],
|
||||
"must_not": {
|
||||
"term": {
|
||||
"commit": commit
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#}
|
||||
}
|
||||
page_obj.delete_document(body=delete_query)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import logging
|
||||
|
||||
from rest_framework import decorators, permissions, viewsets, status
|
||||
from rest_framework import decorators, permissions, status
|
||||
from rest_framework.renderers import JSONPRenderer, JSONRenderer, BrowsableAPIRenderer
|
||||
from rest_framework.response import Response
|
||||
import requests
|
||||
|
@ -11,8 +11,10 @@ from search.indexes import PageIndex, ProjectIndex, SectionIndex
|
|||
from projects.models import Project
|
||||
from restapi import utils
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@decorators.api_view(['GET'])
|
||||
@decorators.permission_classes((permissions.AllowAny,))
|
||||
@decorators.renderer_classes((JSONRenderer, JSONPRenderer, BrowsableAPIRenderer))
|
||||
|
@ -43,7 +45,14 @@ def index_search(request):
|
|||
commit = data.get('commit')
|
||||
project = Project.objects.get(pk=project_pk)
|
||||
version = Version.objects.get(pk=version_pk)
|
||||
utils.index_search_request(version=version, page_list=data['page_list'], commit=commit)
|
||||
|
||||
resp = requests.get('https://api.grokthedocs.com/api/v1/index/1/heatmap/', params={'project': project.slug, 'compare': True})
|
||||
ret_json = resp.json()
|
||||
project_scale = ret_json.get('scaled_project', {}).get(project.slug)
|
||||
page_scale = ret_json.get('scaled_page', {}).get(page['path'], 1)
|
||||
|
||||
utils.index_search_request(version=version, page_list=data['page_list'], commit=commit, project_scale=project_scale, page_scale=page_scale)
|
||||
|
||||
return Response({'indexed': True})
|
||||
|
||||
|
||||
|
@ -59,12 +68,17 @@ def search(request):
|
|||
kwargs = {}
|
||||
body = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"should": [
|
||||
{"match": {"title": {"query": query, "boost": 10}}},
|
||||
{"match": {"headers": {"query": query, "boost": 5}}},
|
||||
{"match": {"content": {"query": query}}},
|
||||
]
|
||||
"function_score": {
|
||||
"field_value_factor": {"field": "weight"},
|
||||
"query": {
|
||||
"bool": {
|
||||
"should": [
|
||||
{"match": {"title": {"query": query, "boost": 10}}},
|
||||
{"match": {"headers": {"query": query, "boost": 5}}},
|
||||
{"match": {"content": {"query": query}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"highlight": {
|
||||
|
@ -102,12 +116,17 @@ def project_search(request):
|
|||
log.debug("(API Project Search) %s" % (query))
|
||||
body = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"should": [
|
||||
{"match": {"name": {"query": query, "boost": 10}}},
|
||||
{"match": {"description": {"query": query}}},
|
||||
]
|
||||
},
|
||||
"function_score": {
|
||||
"field_value_factor": {"field": "weight"},
|
||||
"query": {
|
||||
"bool": {
|
||||
"should": [
|
||||
{"match": {"name": {"query": query, "boost": 10}}},
|
||||
{"match": {"description": {"query": query}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"fields": ["name", "slug", "description", "lang"]
|
||||
}
|
||||
|
@ -115,6 +134,7 @@ def project_search(request):
|
|||
|
||||
return Response({'results': results})
|
||||
|
||||
|
||||
@decorators.api_view(['GET'])
|
||||
@decorators.permission_classes((permissions.AllowAny,))
|
||||
@decorators.renderer_classes((JSONRenderer, JSONPRenderer, BrowsableAPIRenderer))
|
||||
|
@ -165,11 +185,16 @@ def section_search(request):
|
|||
kwargs = {}
|
||||
body = {
|
||||
"query": {
|
||||
"bool": {
|
||||
"should": [
|
||||
{"match": {"title": {"query": query, "boost": 10}}},
|
||||
{"match": {"content": {"query": query}}},
|
||||
]
|
||||
"function_score": {
|
||||
"field_value_factor": {"field": "weight"},
|
||||
"query": {
|
||||
"bool": {
|
||||
"should": [
|
||||
{"match": {"title": {"query": query, "boost": 10}}},
|
||||
{"match": {"content": {"query": query}}},
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"facets": {
|
||||
|
@ -177,7 +202,7 @@ def section_search(request):
|
|||
"terms": {"field": "project"},
|
||||
"facet_filter": {
|
||||
"term": {"version": version_slug},
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
"highlight": {
|
||||
|
@ -197,11 +222,11 @@ def section_search(request):
|
|||
{"term": {"version": version_slug}},
|
||||
]
|
||||
}
|
||||
body["facets"]['path'] = {
|
||||
body['facets']['path'] = {
|
||||
"terms": {"field": "path"},
|
||||
"facet_filter": {
|
||||
"term": {"project": project_slug},
|
||||
}
|
||||
}
|
||||
},
|
||||
# Add routing to optimize search by hitting the right shard.
|
||||
kwargs['routing'] = project_slug
|
||||
|
@ -212,14 +237,13 @@ def section_search(request):
|
|||
{"term": {"path": path_slug}},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
if path_slug and not project_slug:
|
||||
# Show facets when we only have a path
|
||||
body["facets"]['path'] = {
|
||||
body['facets']['path'] = {
|
||||
"terms": {"field": "path"}
|
||||
}
|
||||
|
||||
|
||||
results = SectionIndex().search(body, **kwargs)
|
||||
|
||||
return Response({'results': results})
|
||||
|
|
|
@ -107,7 +107,7 @@ class Index(object):
|
|||
|
||||
def put_mapping(self, index=None):
|
||||
index = index or self._index
|
||||
self.es.indices.put_mapping(index, self._type, self.get_mapping())
|
||||
self.es.indices.put_mapping(self._type, self.get_mapping(), index)
|
||||
|
||||
def bulk_index(self, data, index=None, chunk_size=500, parent=None,
|
||||
routing=None):
|
||||
|
@ -136,6 +136,7 @@ class Index(object):
|
|||
doc['_routing'] = routing
|
||||
docs.append(doc)
|
||||
|
||||
# TODO: This doesn't work with the new ES setup.
|
||||
bulk_index(self.es, docs, chunk_size=chunk_size)
|
||||
|
||||
def index_document(self, data, index=None, parent=None, routing=None):
|
||||
|
@ -218,8 +219,6 @@ class ProjectIndex(Index):
|
|||
self._type: {
|
||||
# Disable _all field to reduce index size.
|
||||
'_all': {'enabled': False},
|
||||
# Add a boost field to enhance relevancy of a document.
|
||||
'_boost': {'name': '_boost', 'null_value': 1.0},
|
||||
'properties': {
|
||||
'id': {'type': 'long'},
|
||||
'name': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
|
@ -240,6 +239,8 @@ class ProjectIndex(Index):
|
|||
},
|
||||
},
|
||||
'url': {'type': 'string', 'index': 'not_analyzed'},
|
||||
# Add a weight field to enhance relevancy scoring.
|
||||
'weight': {'type': 'float'},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -254,7 +255,7 @@ class ProjectIndex(Index):
|
|||
doc[attr] = data.get(attr, '')
|
||||
|
||||
# Add project boost.
|
||||
doc['_boost'] = data.get('_boost', 1.0)
|
||||
doc['weight'] = data.get('weight', 1.0)
|
||||
|
||||
return doc
|
||||
|
||||
|
@ -269,8 +270,6 @@ class PageIndex(Index):
|
|||
self._type: {
|
||||
# Disable _all field to reduce index size.
|
||||
'_all': {'enabled': False},
|
||||
# Add a boost field to enhance relevancy of a document.
|
||||
'_boost': {'name': '_boost', 'null_value': 1.0},
|
||||
# Associate a page with a project.
|
||||
'_parent': {'type': self._parent},
|
||||
'properties': {
|
||||
|
@ -285,6 +284,8 @@ class PageIndex(Index):
|
|||
'title': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
'headers': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
'content': {'type': 'string', 'analyzer': 'default_icu'},
|
||||
# Add a weight field to enhance relevancy scoring.
|
||||
'weight': {'type': 'float'},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -299,7 +300,7 @@ class PageIndex(Index):
|
|||
doc[attr] = data.get(attr, '')
|
||||
|
||||
# Add page boost.
|
||||
doc['_boost'] = data.get('_boost', 1.0)
|
||||
doc['weight'] = data.get('weight', 1.0)
|
||||
|
||||
return doc
|
||||
|
||||
|
@ -314,8 +315,6 @@ class SectionIndex(Index):
|
|||
self._type: {
|
||||
# Disable _all field to reduce index size.
|
||||
'_all': {'enabled': False},
|
||||
# Add a boost field to enhance relevancy of a document.
|
||||
'_boost': {'name': '_boost', 'null_value': 1.0},
|
||||
# Associate a section with a page.
|
||||
'_parent': {'type': self._parent},
|
||||
'suggest': {
|
||||
|
@ -338,7 +337,9 @@ class SectionIndex(Index):
|
|||
'properties': {
|
||||
'code': {'type': 'string', 'analyzer': 'default_icu'}
|
||||
}
|
||||
}
|
||||
},
|
||||
# Add a weight field to enhance relevancy scoring.
|
||||
'weight': {'type': 'float'},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -353,6 +354,6 @@ class SectionIndex(Index):
|
|||
doc[attr] = data.get(attr, '')
|
||||
|
||||
# Add page boost.
|
||||
doc['_boost'] = data.get('_boost', 1.0)
|
||||
doc['weight'] = data.get('weight', 1.0)
|
||||
|
||||
return doc
|
||||
|
|
|
@ -43,7 +43,7 @@ github2==0.5.2
|
|||
httplib2==0.7.7
|
||||
|
||||
# Search
|
||||
elasticsearch==0.4.3
|
||||
elasticsearch==1.5.0
|
||||
pyelasticsearch==0.7.1
|
||||
pyquery==1.2.2
|
||||
|
||||
|
|
Loading…
Reference in New Issue