From 1bec15f97fdbd529ff8b3f109311c9267c138dcd Mon Sep 17 00:00:00 2001 From: Rachel Kim Date: Tue, 5 May 2020 22:32:28 -0400 Subject: [PATCH 01/11] Added new search functionality --- api/app.py | 11 +-- api/elastic.py | 46 +++++++++++- api/prints/search.py | 126 +++++++++++++++++++++++++++++++++ api/prints/swagger/swag.py | 141 +++++++++++++++++++++++++++++++++++++ api/response.py | 3 +- builder.py | 7 ++ config.yaml-dist | 26 +++---- esIndexer.py | 3 +- main.py | 20 +++--- model/elastic.py | 11 +-- renBuilder.py | 3 +- 11 files changed, 359 insertions(+), 38 deletions(-) diff --git a/api/app.py b/api/app.py index 2e6f002..83b6d18 100644 --- a/api/app.py +++ b/api/app.py @@ -2,13 +2,13 @@ import os import yaml from flask import Flask, jsonify from flasgger import Swagger -from api.prints.swagger.swag import SwaggerDoc -from api.db import db -from api.elastic import elastic -from api.prints import base, search, uuid +from prints.swagger.swag import SwaggerDoc +from db import db +from elastic import elastic +from prints import base, search, uuid def loadConfig(): - with open('config.yaml', 'r') as yamlFile: + with open('config.yaml-dist', 'r') as yamlFile: config = yaml.safe_load(yamlFile) for section in config: sectionDict = config[section] @@ -37,6 +37,7 @@ application.config['ELASTICSEARCH_INDEX_URI'] = '{}:{}'.format( os.environ['ES_HOST'], os.environ['ES_PORT'] ) +# print(application.config['ELASTICSEARCH_INDEX_URI']) application.config['SWAGGER'] = {'title': 'CCE Search'} db.init_app(application) elastic.init_app(application) diff --git a/api/elastic.py b/api/elastic.py index 2a21ee8..d062786 100644 --- a/api/elastic.py +++ b/api/elastic.py @@ -3,13 +3,18 @@ from elasticsearch_dsl import Search, Q class Elastic(): def __init__(self): - self.client = None + self.client = Elasticsearch() def init_app(self, app): - self.client = Elasticsearch(app.config['ELASTICSEARCH_INDEX_URI']) + try: + self.client = Elasticsearch(hosts=app.config['ELASTICSEARCH_INDEX_URI']) + except ConnectionError as err: + print('Failed to connect to ElasticSearch instance') + raise err def create_search(self, index): - return Search(using=self.client, index=index) + s = Search(using=self.client, index=index) + return s def query_regnum(self, regnum, page=0, perPage=10): startPos, endPos = Elastic.getFromSize(page, perPage) @@ -30,6 +35,41 @@ class Elastic(): search = self.create_search('cce,ccr') renewalSearch = search.query('query_string', query=queryText)[startPos:endPos] return renewalSearch.execute() + + #New Query Types + def query_title(self, queryText,page=0, perPage=10): + startPos, endPos = Elastic.getFromSize(page, perPage) + print(startPos, endPos) + search = self.create_search('cce,ccr') + titleSearch = search.query('match', title=queryText)[startPos:endPos] + print(titleSearch.to_dict()) + return titleSearch.execute() + + def query_author(self, queryText,page=0, perPage=10): + startPos, endPos = Elastic.getFromSize(page, perPage) + print(startPos, endPos) + search = self.create_search('cce,ccr') + titleSearch = search.query('match', authors=queryText)[startPos:endPos] + print(titleSearch.to_dict()) + return titleSearch.execute() + + + # If query is given for publisher field, don't check renewals? + def query_multifields(self, params, page=0, perPage=10): + startPos, endPos = Elastic.getFromSize(page, perPage) + print(startPos, endPos) + if "publishers" in params: + search = self.create_search('cce') + search = search.query('match', publishers=params["publishers"]) + else: + search = self.create_search('cce,ccr') + if "title" in params: + search = search.query('match', title=params['title']) + if "authors" in params: + search = search.query('match', authors=params['authors']) + titleSearch = search[startPos:endPos] + return titleSearch.execute() + @staticmethod def getFromSize(page, perPage): diff --git a/api/prints/search.py b/api/prints/search.py index e347279..2574f5d 100644 --- a/api/prints/search.py +++ b/api/prints/search.py @@ -9,6 +9,132 @@ from api.response import MultiResponse search = Blueprint('search', __name__, url_prefix='/search') +@search.route('/multi', methods=['GET']) +def multiQuery(): + title = request.args.get('title', '') + authors = request.args.get('authors', '') + publishers = request.args.get('publishers','') + sourceReturn = request.args.get('source', False) + page, perPage = MultiResponse.parsePaging(request.args) + queries = {} + if title!="*" and title!="": + queries["title"]=title + if authors!="*" and authors!="": + queries["authors"]=authors + if publishers!="*" and publishers!="": + queries["publishers"]=publishers + print(queries) + matchingDocs = elastic.query_multifields(queries, page=page, perPage=perPage) + textResponse = MultiResponse( + 'text', + matchingDocs.hits.total, + request.base_url, + queries, + page, + perPage + ) + qManager = QueryManager(db.session) + for entry in matchingDocs: + if entry.meta.index == 'cce': + dbEntry = qManager.registrationQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseEntry( + dbEntry, + xml=sourceReturn + )) + else: + try: + dbRenewal = qManager.renewalQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseRenewal( + dbRenewal, + source=sourceReturn + )) + except NoResultFound: + dbRenewal = qManager.orphanRenewalQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseRenewal( + dbRenewal, + source=sourceReturn + )) + + textResponse.createDataBlock() + return jsonify(textResponse.createResponse(200)) + +@search.route('/author', methods=['GET']) +def authorQuery(): + queryText = request.args.get('query', '') + sourceReturn = request.args.get('source', False) + page, perPage = MultiResponse.parsePaging(request.args) + matchingDocs = elastic.query_author(queryText, page=page, perPage=perPage) + textResponse = MultiResponse( + 'text', + matchingDocs.hits.total, + request.base_url, + queryText, + page, + perPage + ) + qManager = QueryManager(db.session) + for entry in matchingDocs: + if entry.meta.index == 'cce': + dbEntry = qManager.registrationQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseEntry( + dbEntry, + xml=sourceReturn + )) + else: + try: + dbRenewal = qManager.renewalQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseRenewal( + dbRenewal, + source=sourceReturn + )) + except NoResultFound: + dbRenewal = qManager.orphanRenewalQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseRenewal( + dbRenewal, + source=sourceReturn + )) + + textResponse.createDataBlock() + return jsonify(textResponse.createResponse(200)) + +@search.route('/title', methods=['GET']) +def titleQuery(): + queryText = request.args.get('query', '') + sourceReturn = request.args.get('source', False) + page, perPage = MultiResponse.parsePaging(request.args) + matchingDocs = elastic.query_title(queryText, page=page, perPage=perPage) + textResponse = MultiResponse( + 'text', + matchingDocs.hits.total, + request.base_url, + queryText, + page, + perPage + ) + qManager = QueryManager(db.session) + for entry in matchingDocs: + if entry.meta.index == 'cce': + dbEntry = qManager.registrationQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseEntry( + dbEntry, + xml=sourceReturn + )) + else: + try: + dbRenewal = qManager.renewalQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseRenewal( + dbRenewal, + source=sourceReturn + )) + except NoResultFound: + dbRenewal = qManager.orphanRenewalQuery(entry.uuid) + textResponse.addResult(MultiResponse.parseRenewal( + dbRenewal, + source=sourceReturn + )) + + textResponse.createDataBlock() + return jsonify(textResponse.createResponse(200)) @search.route('/fulltext', methods=['GET']) def fullTextQuery(): diff --git a/api/prints/swagger/swag.py b/api/prints/swagger/swag.py index d044e2c..3b2c919 100644 --- a/api/prints/swagger/swag.py +++ b/api/prints/swagger/swag.py @@ -23,6 +23,147 @@ class SwaggerDoc(): "https" ], "paths": { + "/search/multi": { + "get": { + "tags": ["Search"], + "summary": "Returns a set of registration and renewal objects", + "description": "Accepts a query string to search across both registration and renewal records in the author field", + "parameters": [ + { + "name": "title", + "in": "query", + "type": "string", + "required": False, + "default": "*" + },{ + "name": "authors", + "in": "query", + "type": "string", + "required": False, + "default": "*" + },{ + "name": "publishers", + "in": "query", + "type": "string", + "required": False, + "default": "*" + },{ + "name": "source", + "in": "query", + "type": "boolean", + "required": False, + "default": False, + "description": "Return source XML/CSV data" + },{ + "name": "page", + "in": "query", + "type": "number", + "required": False, + "default": 0 + },{ + "name": "per_page", + "in": "query", + "type": "number", + "required": False, + "default": 10 + } + ], + "responses": { + 200: { + "description": "A list of copyright registrations and renewals", + "schema": { + "$ref": "#/definitions/MultiResponse" + } + } + } + } + }, + "/search/author": { + "get": { + "tags": ["Search"], + "summary": "Returns a set of registration and renewal objects", + "description": "Accepts a query string to search across both registration and renewal records in the author field", + "parameters": [ + { + "name": "query", + "in": "query", + "type": "string", + "required": True, + "default": "*" + },{ + "name": "source", + "in": "query", + "type": "boolean", + "required": False, + "default": False, + "description": "Return source XML/CSV data" + },{ + "name": "page", + "in": "query", + "type": "number", + "required": False, + "default": 0 + },{ + "name": "per_page", + "in": "query", + "type": "number", + "required": False, + "default": 10 + } + ], + "responses": { + 200: { + "description": "A list of copyright registrations and renewals", + "schema": { + "$ref": "#/definitions/MultiResponse" + } + } + } + } + }, + "/search/title": { + "get": { + "tags": ["Search"], + "summary": "Returns a set of registration and renewal objects", + "description": "Accepts a query string to search across both registration and renewal records in the title fiel", + "parameters": [ + { + "name": "query", + "in": "query", + "type": "string", + "required": True, + "default": "*" + },{ + "name": "source", + "in": "query", + "type": "boolean", + "required": False, + "default": False, + "description": "Return source XML/CSV data" + },{ + "name": "page", + "in": "query", + "type": "number", + "required": False, + "default": 0 + },{ + "name": "per_page", + "in": "query", + "type": "number", + "required": False, + "default": 10 + } + ], + "responses": { + 200: { + "description": "A list of copyright registrations and renewals", + "schema": { + "$ref": "#/definitions/MultiResponse" + } + } + } + } + }, "/search/fulltext": { "get": { "tags": ["Search"], diff --git a/api/response.py b/api/response.py index dfe2b34..b52e8ca 100644 --- a/api/response.py +++ b/api/response.py @@ -1,3 +1,4 @@ +import math class Response(): def __init__(self, queryType, endpoint): @@ -151,7 +152,7 @@ class MultiResponse(Response): else: paging['next'] = None - lastPage = int((self.total - self.perPage) / self.perPage) + lastPage = math.ceil(((self.total - self.perPage) / self.perPage)) if ( self.page * self.perPage < self.total and self.total > self.perPage diff --git a/builder.py b/builder.py index 6b4cc23..e70fa21 100644 --- a/builder.py +++ b/builder.py @@ -6,6 +6,13 @@ from lxml import etree import os import re import traceback +import sys + +import io + +sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = 'utf-8') + +sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = 'utf-8') from model.cce import CCE from model.errorCCE import ErrorCCE diff --git a/config.yaml-dist b/config.yaml-dist index dc73a65..e3b7331 100644 --- a/config.yaml-dist +++ b/config.yaml-dist @@ -1,18 +1,18 @@ DATABASE: - DB_USER: - DB_PSWD: - DB_HOST: - DB_PORT: - DB_NAME: + DB_USER: postgres + DB_PSWD: "9903" + DB_HOST: localhost + DB_PORT: "5432" + DB_NAME: ccesearch GITHUB: - ACCESS_TOKEN: - CCE_REPO: - CCR_REPO: + ACCESS_TOKEN: 218124e9bf09a9b3f379cb5ee1ab0a8756ee3b3c + CCE_REPO: nypl/catalog_of_copyright_entries_project + CCR_REPO: nypl/cce-renewals ELASTICSEARCH: - ES_CCE_INDEX: - ES_CCR_INDEX: - ES_HOST: - ES_PORT: - ES_TIMEOUT: \ No newline at end of file + ES_CCE_INDEX: cce + ES_CCR_INDEX: ccr + ES_HOST: localhost + ES_PORT: '9200' + ES_TIMEOUT: "10000" \ No newline at end of file diff --git a/esIndexer.py b/esIndexer.py index 5631ca4..4dfd53b 100644 --- a/esIndexer.py +++ b/esIndexer.py @@ -58,7 +58,7 @@ class ESIndexer(): if self.client.indices.exists(index=self.ccr_index) is False: Renewal.init() - def indexRecords(self, recType='cce'): + def indexRecords(self, recType='ccr'): """Process the current batch of updating records. This utilizes the elasticsearch-py bulk helper to import records in chunks of the provided size. If a record in the batch errors that is reported and @@ -148,6 +148,7 @@ class ESRen(): self.renewal.rennum = self.dbRen.renewal_num self.renewal.rendate = self.dbRen.renewal_date self.renewal.title = self.dbRen.title + self.renewal.authors = self.dbRen.author self.renewal.claimants = [ Claimant(name=c.name, claim_type=c.claimant_type) for c in self.dbRen.claimants diff --git a/main.py b/main.py index 733c4b8..8451fdf 100644 --- a/main.py +++ b/main.py @@ -14,12 +14,10 @@ def main(secondsAgo=None, year=None, exclude=None, reinit=False): startTime = datetime.now() if secondsAgo is not None: loadFromTime = startTime - timedelta(seconds=secondsAgo) - - if exclude != 'cce': - loadCCE(manager, loadFromTime, year) - if exclude != 'ccr': - loadCCR(manager, loadFromTime, year) - + # if exclude != 'cce': + # loadCCE(manager, loadFromTime, year) + # if exclude != 'ccr': + # loadCCR(manager, loadFromTime, year) indexUpdates(manager, loadFromTime) manager.closeConnection() @@ -39,7 +37,7 @@ def loadCCR(manager, loadFromTime, selectedYear): def indexUpdates(manager, loadFromTime): esIndexer = ESIndexer(manager, None) - esIndexer.indexRecords(recType='cce') + # esIndexer.indexRecords(recType='cce') esIndexer.indexRecords(recType='ccr') @@ -62,7 +60,7 @@ def parseArgs(): def loadConfig(): - with open('config.yaml', 'r') as yamlFile: + with open('config.yaml-dist', 'r') as yamlFile: config = yaml.safe_load(yamlFile) for section in config: sectionDict = config[section] @@ -75,13 +73,17 @@ if __name__ == '__main__': try: loadConfig() except FileNotFoundError: + print("Unable to set environment variables") pass from sessionManager import SessionManager from builder import CCEReader, CCEFile from renBuilder import CCRReader, CCRFile from esIndexer import ESIndexer - + print(args.time) + print(args.year) + print(args.exclude) + print(args.REINITIALIZE) main( secondsAgo=args.time, year=args.year, diff --git a/model/elastic.py b/model/elastic.py index 559f9f0..522bc7c 100644 --- a/model/elastic.py +++ b/model/elastic.py @@ -1,5 +1,6 @@ import os import yaml +import pprint from elasticsearch_dsl import ( Index, Document, @@ -16,14 +17,14 @@ class BaseDoc(Document): date_modified = Date() def save(self, **kwargs): - return super(BaseDoc, self).save(**kwargs) + return super(BaseDoc, self).save(** kwargs) class BaseInner(InnerDoc): date_created = Date() date_modified = Date() def save(self, **kwargs): - return super(BaseInner, self).save(**kwargs) + return super(BaseInner, self).save(** kwargs) class Registration(BaseInner): @@ -41,9 +42,10 @@ class Renewal(BaseDoc): rennum = Keyword() rendate = Date() title = Text(fields={'keyword': Keyword()}) - - claimants = Nested(Claimant) + authors = Text() + claimants = Nested(Claimant) + # pprint.pprint(dict(os.environ), width = 1) class Index: name = os.environ['ES_CCR_INDEX'] @@ -54,7 +56,6 @@ class CCE(BaseDoc): authors = Text(multi=True) publishers = Text(multi=True) lccns = Keyword(multi=True) - registrations = Nested(Registration) class Index: diff --git a/renBuilder.py b/renBuilder.py index ef76553..88a1c2d 100644 --- a/renBuilder.py +++ b/renBuilder.py @@ -14,9 +14,10 @@ from model.registration import Registration class CCRReader(): def __init__(self, manager): self.git = Github(os.environ['ACCESS_TOKEN']) + print(self.git) self.repo = self.git.get_repo(os.environ['CCR_REPO']) + print(self.repo) self.ccrYears = {} - self.dbManager = manager def loadYears(self, selectedYear, loadFromTime): From cefda1b5f92f89edb8c525f591fc057c1f0ba0c8 Mon Sep 17 00:00:00 2001 From: Rachel Kim Date: Wed, 6 May 2020 00:06:29 -0400 Subject: [PATCH 02/11] Fixed bug with cceid search --- api/prints/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/prints/search.py b/api/prints/search.py index 2574f5d..0ea617a 100644 --- a/api/prints/search.py +++ b/api/prints/search.py @@ -226,7 +226,7 @@ def renQuery(rennum): return jsonify(renResponse.createResponse(200)) -def parseRetRenewal(dbRenewal): +def parseRetRenewal(dbRenewal, source): if len(dbRenewal.registrations) == 0: return [MultiResponse.parseRenewal(dbRenewal)] From 4c11e5d6dc63017de7aec1dd4c7bba481d86ec76 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:00:42 -0400 Subject: [PATCH 03/11] Made edit to . notation per Eric's comment --- api/app.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/app.py b/api/app.py index 83b6d18..885e7a3 100644 --- a/api/app.py +++ b/api/app.py @@ -2,10 +2,10 @@ import os import yaml from flask import Flask, jsonify from flasgger import Swagger -from prints.swagger.swag import SwaggerDoc -from db import db -from elastic import elastic -from prints import base, search, uuid +from .prints.swagger.swag import SwaggerDoc +from .db import db +from .elastic import elastic +from .prints import base, search, uuid def loadConfig(): with open('config.yaml-dist', 'r') as yamlFile: From cc4c4e1a240de7f2694e3719cd5fb3b4a80bcddb Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:11:06 -0400 Subject: [PATCH 04/11] Remove print statements Remove print statements per Eric's comment since they are not needed --- api/elastic.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/api/elastic.py b/api/elastic.py index d062786..c0888af 100644 --- a/api/elastic.py +++ b/api/elastic.py @@ -31,7 +31,6 @@ class Elastic(): def query_fulltext(self, queryText, page=0, perPage=10): startPos, endPos = Elastic.getFromSize(page, perPage) - print(startPos, endPos) search = self.create_search('cce,ccr') renewalSearch = search.query('query_string', query=queryText)[startPos:endPos] return renewalSearch.execute() @@ -39,25 +38,20 @@ class Elastic(): #New Query Types def query_title(self, queryText,page=0, perPage=10): startPos, endPos = Elastic.getFromSize(page, perPage) - print(startPos, endPos) search = self.create_search('cce,ccr') titleSearch = search.query('match', title=queryText)[startPos:endPos] - print(titleSearch.to_dict()) return titleSearch.execute() def query_author(self, queryText,page=0, perPage=10): startPos, endPos = Elastic.getFromSize(page, perPage) - print(startPos, endPos) search = self.create_search('cce,ccr') titleSearch = search.query('match', authors=queryText)[startPos:endPos] - print(titleSearch.to_dict()) return titleSearch.execute() # If query is given for publisher field, don't check renewals? def query_multifields(self, params, page=0, perPage=10): startPos, endPos = Elastic.getFromSize(page, perPage) - print(startPos, endPos) if "publishers" in params: search = self.create_search('cce') search = search.query('match', publishers=params["publishers"]) @@ -77,4 +71,4 @@ class Elastic(): endPos = startPos + perPage return startPos, endPos -elastic = Elastic() \ No newline at end of file +elastic = Elastic() From cecb4ae76405892af1040f997f88f0f698c64007 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:13:45 -0400 Subject: [PATCH 05/11] Removed information from config --- config.yaml-dist | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/config.yaml-dist b/config.yaml-dist index e3b7331..efabe59 100644 --- a/config.yaml-dist +++ b/config.yaml-dist @@ -1,18 +1,18 @@ DATABASE: - DB_USER: postgres - DB_PSWD: "9903" - DB_HOST: localhost - DB_PORT: "5432" - DB_NAME: ccesearch + DB_USER: + DB_PSWD: + DB_HOST: + DB_PORT: + DB_NAME: GITHUB: - ACCESS_TOKEN: 218124e9bf09a9b3f379cb5ee1ab0a8756ee3b3c - CCE_REPO: nypl/catalog_of_copyright_entries_project - CCR_REPO: nypl/cce-renewals + ACCESS_TOKEN: + CCE_REPO: + CCR_REPO: ELASTICSEARCH: - ES_CCE_INDEX: cce - ES_CCR_INDEX: ccr - ES_HOST: localhost - ES_PORT: '9200' - ES_TIMEOUT: "10000" \ No newline at end of file + ES_CCE_INDEX: + ES_CCR_INDEX: + ES_HOST: + ES_PORT: + ES_TIMEOUT: From c0f7fb32d1ed85913209a7c4c3f4c44b48f1a397 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:14:37 -0400 Subject: [PATCH 06/11] Update esIndexer.py --- esIndexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esIndexer.py b/esIndexer.py index 4dfd53b..155410f 100644 --- a/esIndexer.py +++ b/esIndexer.py @@ -58,7 +58,7 @@ class ESIndexer(): if self.client.indices.exists(index=self.ccr_index) is False: Renewal.init() - def indexRecords(self, recType='ccr'): + def indexRecords(self, recType='cce'): """Process the current batch of updating records. This utilizes the elasticsearch-py bulk helper to import records in chunks of the provided size. If a record in the batch errors that is reported and From 6c313b0b7544ea6c6ba237e7283a9eed059b1e89 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:15:35 -0400 Subject: [PATCH 07/11] Uncommented code --- main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 8451fdf..922b049 100644 --- a/main.py +++ b/main.py @@ -14,10 +14,10 @@ def main(secondsAgo=None, year=None, exclude=None, reinit=False): startTime = datetime.now() if secondsAgo is not None: loadFromTime = startTime - timedelta(seconds=secondsAgo) - # if exclude != 'cce': - # loadCCE(manager, loadFromTime, year) - # if exclude != 'ccr': - # loadCCR(manager, loadFromTime, year) + if exclude != 'cce': + loadCCE(manager, loadFromTime, year) + if exclude != 'ccr': + loadCCR(manager, loadFromTime, year) indexUpdates(manager, loadFromTime) manager.closeConnection() @@ -89,4 +89,4 @@ if __name__ == '__main__': year=args.year, exclude=args.exclude, reinit=args.REINITIALIZE - ) \ No newline at end of file + ) From 3b32ae8291b11484e6b6aa30587612c26d0a0b81 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:16:23 -0400 Subject: [PATCH 08/11] Update main.py --- main.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 922b049..42be9fa 100644 --- a/main.py +++ b/main.py @@ -37,7 +37,7 @@ def loadCCR(manager, loadFromTime, selectedYear): def indexUpdates(manager, loadFromTime): esIndexer = ESIndexer(manager, None) - # esIndexer.indexRecords(recType='cce') + esIndexer.indexRecords(recType='cce') esIndexer.indexRecords(recType='ccr') @@ -80,10 +80,7 @@ if __name__ == '__main__': from builder import CCEReader, CCEFile from renBuilder import CCRReader, CCRFile from esIndexer import ESIndexer - print(args.time) - print(args.year) - print(args.exclude) - print(args.REINITIALIZE) + main( secondsAgo=args.time, year=args.year, From f796a7cd95970e3e8915e260d03e1e2eb63de0c3 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:18:38 -0400 Subject: [PATCH 09/11] Removed unnecessary lines Removed pprint and changed kwargs arguments --- model/elastic.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/model/elastic.py b/model/elastic.py index 522bc7c..8e2e632 100644 --- a/model/elastic.py +++ b/model/elastic.py @@ -1,6 +1,5 @@ import os import yaml -import pprint from elasticsearch_dsl import ( Index, Document, @@ -17,14 +16,14 @@ class BaseDoc(Document): date_modified = Date() def save(self, **kwargs): - return super(BaseDoc, self).save(** kwargs) + return super(BaseDoc, self).save(**kwargs) class BaseInner(InnerDoc): date_created = Date() date_modified = Date() def save(self, **kwargs): - return super(BaseInner, self).save(** kwargs) + return super(BaseInner, self).save(**kwargs) class Registration(BaseInner): @@ -45,7 +44,6 @@ class Renewal(BaseDoc): authors = Text() claimants = Nested(Claimant) - # pprint.pprint(dict(os.environ), width = 1) class Index: name = os.environ['ES_CCR_INDEX'] From 30908e8cb7a75cfea3addfe5923c2119a3d95324 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:19:21 -0400 Subject: [PATCH 10/11] Removed print statements --- renBuilder.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/renBuilder.py b/renBuilder.py index 88a1c2d..266de56 100644 --- a/renBuilder.py +++ b/renBuilder.py @@ -14,9 +14,7 @@ from model.registration import Registration class CCRReader(): def __init__(self, manager): self.git = Github(os.environ['ACCESS_TOKEN']) - print(self.git) self.repo = self.git.get_repo(os.environ['CCR_REPO']) - print(self.repo) self.ccrYears = {} self.dbManager = manager @@ -191,4 +189,4 @@ class CCRFile(): except KeyError: pass print('No matching field found!') - raise KeyError \ No newline at end of file + raise KeyError From 6efe4397f15f67914cebf125676e33f0aa032c05 Mon Sep 17 00:00:00 2001 From: Rachel Kim <32397590+RSK9903@users.noreply.github.com> Date: Thu, 7 May 2020 22:22:30 -0400 Subject: [PATCH 11/11] Removed reference to source Also removed unnecessary print statement --- api/prints/search.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/api/prints/search.py b/api/prints/search.py index 0ea617a..9d71c28 100644 --- a/api/prints/search.py +++ b/api/prints/search.py @@ -23,7 +23,6 @@ def multiQuery(): queries["authors"]=authors if publishers!="*" and publishers!="": queries["publishers"]=publishers - print(queries) matchingDocs = elastic.query_multifields(queries, page=page, perPage=perPage) textResponse = MultiResponse( 'text', @@ -218,15 +217,14 @@ def renQuery(rennum): for entry in matchingDocs: dbRenewal = qManager.renewalQuery(entry.uuid) renResponse.extendResults(parseRetRenewal( - dbRenewal, - source=sourceReturn + dbRenewal )) renResponse.createDataBlock() return jsonify(renResponse.createResponse(200)) -def parseRetRenewal(dbRenewal, source): +def parseRetRenewal(dbRenewal): if len(dbRenewal.registrations) == 0: return [MultiResponse.parseRenewal(dbRenewal)]