diff --git a/readthedocs/doc_builder/backends/sphinx_search.py b/readthedocs/doc_builder/backends/sphinx_search.py index 69eccb7aa..70474b121 100644 --- a/readthedocs/doc_builder/backends/sphinx_search.py +++ b/readthedocs/doc_builder/backends/sphinx_search.py @@ -27,9 +27,12 @@ class Builder(HtmlBuilder): return build_results def upload(self, **kwargs): + page_list = process_all_json_files(self.version) data = { - 'page_list': process_all_json_files(self.version), + 'page_list': page_list, 'version_pk': self.version.pk, 'project_pk': self.version.project.pk } + log_msg = ' '.join([page['path'] for page in page_list]) + log.info("(Search Index) Sending Data: %s [%s]" % (self.version.project.slug, log_msg)) apiv2.index_search.post({'data': data}) diff --git a/readthedocs/projects/search_indexes.py b/readthedocs/projects/search_indexes.py index c75587fcf..a118f541d 100644 --- a/readthedocs/projects/search_indexes.py +++ b/readthedocs/projects/search_indexes.py @@ -72,9 +72,9 @@ class ImportedFileIndex(celery_indexes.CelerySearchIndex, indexes.Indexable): with codecs.open(file_path, encoding='utf-8', mode='r') as f: content = f.read() except IOError as e: - log.info('Unable to index file: %s, error :%s' % (file_path, e)) + log.info('(Search Index) Unable to index file: %s, error :%s' % (file_path, e)) return - log.debug('Indexing %s:%s' % (obj.project, obj.path)) + log.debug('(Search Index) Indexing %s:%s' % (obj.project, obj.path)) DOCUMENT_PYQUERY_PATH = getattr(settings, 'DOCUMENT_PYQUERY_PATH', 'div.document') try: @@ -84,10 +84,10 @@ class ImportedFileIndex(celery_indexes.CelerySearchIndex, indexes.Indexable): #Pyquery returns ValueError if div.document doesn't exist. return if not to_index: - log.info('Unable to index file: %s:%s, empty file' % (obj.project, + log.info('(Search Index) Unable to index file: %s:%s, empty file' % (obj.project, file_path)) else: - log.debug('%s:%s length: %s' % (obj.project, file_path, + log.debug('(Search Index) %s:%s length: %s' % (obj.project, file_path, len(to_index))) return to_index diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py index 116f57ed2..2fd7e5221 100644 --- a/readthedocs/search/parse_json.py +++ b/readthedocs/search/parse_json.py @@ -12,6 +12,9 @@ log = logging.getLogger(__name__) def process_all_json_files(version): + """ + Return a list of pages to index + """ full_path = version.project.full_json_path(version.slug) html_files = [] for root, dirs, files in os.walk(full_path):