Added new search functionality
parent
46dd2d8d75
commit
1bec15f97f
11
api/app.py
11
api/app.py
|
@ -2,13 +2,13 @@ import os
|
||||||
import yaml
|
import yaml
|
||||||
from flask import Flask, jsonify
|
from flask import Flask, jsonify
|
||||||
from flasgger import Swagger
|
from flasgger import Swagger
|
||||||
from api.prints.swagger.swag import SwaggerDoc
|
from prints.swagger.swag import SwaggerDoc
|
||||||
from api.db import db
|
from db import db
|
||||||
from api.elastic import elastic
|
from elastic import elastic
|
||||||
from api.prints import base, search, uuid
|
from prints import base, search, uuid
|
||||||
|
|
||||||
def loadConfig():
|
def loadConfig():
|
||||||
with open('config.yaml', 'r') as yamlFile:
|
with open('config.yaml-dist', 'r') as yamlFile:
|
||||||
config = yaml.safe_load(yamlFile)
|
config = yaml.safe_load(yamlFile)
|
||||||
for section in config:
|
for section in config:
|
||||||
sectionDict = config[section]
|
sectionDict = config[section]
|
||||||
|
@ -37,6 +37,7 @@ application.config['ELASTICSEARCH_INDEX_URI'] = '{}:{}'.format(
|
||||||
os.environ['ES_HOST'],
|
os.environ['ES_HOST'],
|
||||||
os.environ['ES_PORT']
|
os.environ['ES_PORT']
|
||||||
)
|
)
|
||||||
|
# print(application.config['ELASTICSEARCH_INDEX_URI'])
|
||||||
application.config['SWAGGER'] = {'title': 'CCE Search'}
|
application.config['SWAGGER'] = {'title': 'CCE Search'}
|
||||||
db.init_app(application)
|
db.init_app(application)
|
||||||
elastic.init_app(application)
|
elastic.init_app(application)
|
||||||
|
|
|
@ -3,13 +3,18 @@ from elasticsearch_dsl import Search, Q
|
||||||
|
|
||||||
class Elastic():
|
class Elastic():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.client = None
|
self.client = Elasticsearch()
|
||||||
|
|
||||||
def init_app(self, app):
|
def init_app(self, app):
|
||||||
self.client = Elasticsearch(app.config['ELASTICSEARCH_INDEX_URI'])
|
try:
|
||||||
|
self.client = Elasticsearch(hosts=app.config['ELASTICSEARCH_INDEX_URI'])
|
||||||
|
except ConnectionError as err:
|
||||||
|
print('Failed to connect to ElasticSearch instance')
|
||||||
|
raise err
|
||||||
|
|
||||||
def create_search(self, index):
|
def create_search(self, index):
|
||||||
return Search(using=self.client, index=index)
|
s = Search(using=self.client, index=index)
|
||||||
|
return s
|
||||||
|
|
||||||
def query_regnum(self, regnum, page=0, perPage=10):
|
def query_regnum(self, regnum, page=0, perPage=10):
|
||||||
startPos, endPos = Elastic.getFromSize(page, perPage)
|
startPos, endPos = Elastic.getFromSize(page, perPage)
|
||||||
|
@ -30,6 +35,41 @@ class Elastic():
|
||||||
search = self.create_search('cce,ccr')
|
search = self.create_search('cce,ccr')
|
||||||
renewalSearch = search.query('query_string', query=queryText)[startPos:endPos]
|
renewalSearch = search.query('query_string', query=queryText)[startPos:endPos]
|
||||||
return renewalSearch.execute()
|
return renewalSearch.execute()
|
||||||
|
|
||||||
|
#New Query Types
|
||||||
|
def query_title(self, queryText,page=0, perPage=10):
|
||||||
|
startPos, endPos = Elastic.getFromSize(page, perPage)
|
||||||
|
print(startPos, endPos)
|
||||||
|
search = self.create_search('cce,ccr')
|
||||||
|
titleSearch = search.query('match', title=queryText)[startPos:endPos]
|
||||||
|
print(titleSearch.to_dict())
|
||||||
|
return titleSearch.execute()
|
||||||
|
|
||||||
|
def query_author(self, queryText,page=0, perPage=10):
|
||||||
|
startPos, endPos = Elastic.getFromSize(page, perPage)
|
||||||
|
print(startPos, endPos)
|
||||||
|
search = self.create_search('cce,ccr')
|
||||||
|
titleSearch = search.query('match', authors=queryText)[startPos:endPos]
|
||||||
|
print(titleSearch.to_dict())
|
||||||
|
return titleSearch.execute()
|
||||||
|
|
||||||
|
|
||||||
|
# If query is given for publisher field, don't check renewals?
|
||||||
|
def query_multifields(self, params, page=0, perPage=10):
|
||||||
|
startPos, endPos = Elastic.getFromSize(page, perPage)
|
||||||
|
print(startPos, endPos)
|
||||||
|
if "publishers" in params:
|
||||||
|
search = self.create_search('cce')
|
||||||
|
search = search.query('match', publishers=params["publishers"])
|
||||||
|
else:
|
||||||
|
search = self.create_search('cce,ccr')
|
||||||
|
if "title" in params:
|
||||||
|
search = search.query('match', title=params['title'])
|
||||||
|
if "authors" in params:
|
||||||
|
search = search.query('match', authors=params['authors'])
|
||||||
|
titleSearch = search[startPos:endPos]
|
||||||
|
return titleSearch.execute()
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getFromSize(page, perPage):
|
def getFromSize(page, perPage):
|
||||||
|
|
|
@ -9,6 +9,132 @@ from api.response import MultiResponse
|
||||||
|
|
||||||
search = Blueprint('search', __name__, url_prefix='/search')
|
search = Blueprint('search', __name__, url_prefix='/search')
|
||||||
|
|
||||||
|
@search.route('/multi', methods=['GET'])
|
||||||
|
def multiQuery():
|
||||||
|
title = request.args.get('title', '')
|
||||||
|
authors = request.args.get('authors', '')
|
||||||
|
publishers = request.args.get('publishers','')
|
||||||
|
sourceReturn = request.args.get('source', False)
|
||||||
|
page, perPage = MultiResponse.parsePaging(request.args)
|
||||||
|
queries = {}
|
||||||
|
if title!="*" and title!="":
|
||||||
|
queries["title"]=title
|
||||||
|
if authors!="*" and authors!="":
|
||||||
|
queries["authors"]=authors
|
||||||
|
if publishers!="*" and publishers!="":
|
||||||
|
queries["publishers"]=publishers
|
||||||
|
print(queries)
|
||||||
|
matchingDocs = elastic.query_multifields(queries, page=page, perPage=perPage)
|
||||||
|
textResponse = MultiResponse(
|
||||||
|
'text',
|
||||||
|
matchingDocs.hits.total,
|
||||||
|
request.base_url,
|
||||||
|
queries,
|
||||||
|
page,
|
||||||
|
perPage
|
||||||
|
)
|
||||||
|
qManager = QueryManager(db.session)
|
||||||
|
for entry in matchingDocs:
|
||||||
|
if entry.meta.index == 'cce':
|
||||||
|
dbEntry = qManager.registrationQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseEntry(
|
||||||
|
dbEntry,
|
||||||
|
xml=sourceReturn
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
dbRenewal = qManager.renewalQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseRenewal(
|
||||||
|
dbRenewal,
|
||||||
|
source=sourceReturn
|
||||||
|
))
|
||||||
|
except NoResultFound:
|
||||||
|
dbRenewal = qManager.orphanRenewalQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseRenewal(
|
||||||
|
dbRenewal,
|
||||||
|
source=sourceReturn
|
||||||
|
))
|
||||||
|
|
||||||
|
textResponse.createDataBlock()
|
||||||
|
return jsonify(textResponse.createResponse(200))
|
||||||
|
|
||||||
|
@search.route('/author', methods=['GET'])
|
||||||
|
def authorQuery():
|
||||||
|
queryText = request.args.get('query', '')
|
||||||
|
sourceReturn = request.args.get('source', False)
|
||||||
|
page, perPage = MultiResponse.parsePaging(request.args)
|
||||||
|
matchingDocs = elastic.query_author(queryText, page=page, perPage=perPage)
|
||||||
|
textResponse = MultiResponse(
|
||||||
|
'text',
|
||||||
|
matchingDocs.hits.total,
|
||||||
|
request.base_url,
|
||||||
|
queryText,
|
||||||
|
page,
|
||||||
|
perPage
|
||||||
|
)
|
||||||
|
qManager = QueryManager(db.session)
|
||||||
|
for entry in matchingDocs:
|
||||||
|
if entry.meta.index == 'cce':
|
||||||
|
dbEntry = qManager.registrationQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseEntry(
|
||||||
|
dbEntry,
|
||||||
|
xml=sourceReturn
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
dbRenewal = qManager.renewalQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseRenewal(
|
||||||
|
dbRenewal,
|
||||||
|
source=sourceReturn
|
||||||
|
))
|
||||||
|
except NoResultFound:
|
||||||
|
dbRenewal = qManager.orphanRenewalQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseRenewal(
|
||||||
|
dbRenewal,
|
||||||
|
source=sourceReturn
|
||||||
|
))
|
||||||
|
|
||||||
|
textResponse.createDataBlock()
|
||||||
|
return jsonify(textResponse.createResponse(200))
|
||||||
|
|
||||||
|
@search.route('/title', methods=['GET'])
|
||||||
|
def titleQuery():
|
||||||
|
queryText = request.args.get('query', '')
|
||||||
|
sourceReturn = request.args.get('source', False)
|
||||||
|
page, perPage = MultiResponse.parsePaging(request.args)
|
||||||
|
matchingDocs = elastic.query_title(queryText, page=page, perPage=perPage)
|
||||||
|
textResponse = MultiResponse(
|
||||||
|
'text',
|
||||||
|
matchingDocs.hits.total,
|
||||||
|
request.base_url,
|
||||||
|
queryText,
|
||||||
|
page,
|
||||||
|
perPage
|
||||||
|
)
|
||||||
|
qManager = QueryManager(db.session)
|
||||||
|
for entry in matchingDocs:
|
||||||
|
if entry.meta.index == 'cce':
|
||||||
|
dbEntry = qManager.registrationQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseEntry(
|
||||||
|
dbEntry,
|
||||||
|
xml=sourceReturn
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
dbRenewal = qManager.renewalQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseRenewal(
|
||||||
|
dbRenewal,
|
||||||
|
source=sourceReturn
|
||||||
|
))
|
||||||
|
except NoResultFound:
|
||||||
|
dbRenewal = qManager.orphanRenewalQuery(entry.uuid)
|
||||||
|
textResponse.addResult(MultiResponse.parseRenewal(
|
||||||
|
dbRenewal,
|
||||||
|
source=sourceReturn
|
||||||
|
))
|
||||||
|
|
||||||
|
textResponse.createDataBlock()
|
||||||
|
return jsonify(textResponse.createResponse(200))
|
||||||
|
|
||||||
@search.route('/fulltext', methods=['GET'])
|
@search.route('/fulltext', methods=['GET'])
|
||||||
def fullTextQuery():
|
def fullTextQuery():
|
||||||
|
|
|
@ -23,6 +23,147 @@ class SwaggerDoc():
|
||||||
"https"
|
"https"
|
||||||
],
|
],
|
||||||
"paths": {
|
"paths": {
|
||||||
|
"/search/multi": {
|
||||||
|
"get": {
|
||||||
|
"tags": ["Search"],
|
||||||
|
"summary": "Returns a set of registration and renewal objects",
|
||||||
|
"description": "Accepts a query string to search across both registration and renewal records in the author field",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "title",
|
||||||
|
"in": "query",
|
||||||
|
"type": "string",
|
||||||
|
"required": False,
|
||||||
|
"default": "*"
|
||||||
|
},{
|
||||||
|
"name": "authors",
|
||||||
|
"in": "query",
|
||||||
|
"type": "string",
|
||||||
|
"required": False,
|
||||||
|
"default": "*"
|
||||||
|
},{
|
||||||
|
"name": "publishers",
|
||||||
|
"in": "query",
|
||||||
|
"type": "string",
|
||||||
|
"required": False,
|
||||||
|
"default": "*"
|
||||||
|
},{
|
||||||
|
"name": "source",
|
||||||
|
"in": "query",
|
||||||
|
"type": "boolean",
|
||||||
|
"required": False,
|
||||||
|
"default": False,
|
||||||
|
"description": "Return source XML/CSV data"
|
||||||
|
},{
|
||||||
|
"name": "page",
|
||||||
|
"in": "query",
|
||||||
|
"type": "number",
|
||||||
|
"required": False,
|
||||||
|
"default": 0
|
||||||
|
},{
|
||||||
|
"name": "per_page",
|
||||||
|
"in": "query",
|
||||||
|
"type": "number",
|
||||||
|
"required": False,
|
||||||
|
"default": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
200: {
|
||||||
|
"description": "A list of copyright registrations and renewals",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/MultiResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/search/author": {
|
||||||
|
"get": {
|
||||||
|
"tags": ["Search"],
|
||||||
|
"summary": "Returns a set of registration and renewal objects",
|
||||||
|
"description": "Accepts a query string to search across both registration and renewal records in the author field",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "query",
|
||||||
|
"in": "query",
|
||||||
|
"type": "string",
|
||||||
|
"required": True,
|
||||||
|
"default": "*"
|
||||||
|
},{
|
||||||
|
"name": "source",
|
||||||
|
"in": "query",
|
||||||
|
"type": "boolean",
|
||||||
|
"required": False,
|
||||||
|
"default": False,
|
||||||
|
"description": "Return source XML/CSV data"
|
||||||
|
},{
|
||||||
|
"name": "page",
|
||||||
|
"in": "query",
|
||||||
|
"type": "number",
|
||||||
|
"required": False,
|
||||||
|
"default": 0
|
||||||
|
},{
|
||||||
|
"name": "per_page",
|
||||||
|
"in": "query",
|
||||||
|
"type": "number",
|
||||||
|
"required": False,
|
||||||
|
"default": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
200: {
|
||||||
|
"description": "A list of copyright registrations and renewals",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/MultiResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/search/title": {
|
||||||
|
"get": {
|
||||||
|
"tags": ["Search"],
|
||||||
|
"summary": "Returns a set of registration and renewal objects",
|
||||||
|
"description": "Accepts a query string to search across both registration and renewal records in the title fiel",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "query",
|
||||||
|
"in": "query",
|
||||||
|
"type": "string",
|
||||||
|
"required": True,
|
||||||
|
"default": "*"
|
||||||
|
},{
|
||||||
|
"name": "source",
|
||||||
|
"in": "query",
|
||||||
|
"type": "boolean",
|
||||||
|
"required": False,
|
||||||
|
"default": False,
|
||||||
|
"description": "Return source XML/CSV data"
|
||||||
|
},{
|
||||||
|
"name": "page",
|
||||||
|
"in": "query",
|
||||||
|
"type": "number",
|
||||||
|
"required": False,
|
||||||
|
"default": 0
|
||||||
|
},{
|
||||||
|
"name": "per_page",
|
||||||
|
"in": "query",
|
||||||
|
"type": "number",
|
||||||
|
"required": False,
|
||||||
|
"default": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
200: {
|
||||||
|
"description": "A list of copyright registrations and renewals",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/MultiResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/search/fulltext": {
|
"/search/fulltext": {
|
||||||
"get": {
|
"get": {
|
||||||
"tags": ["Search"],
|
"tags": ["Search"],
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import math
|
||||||
|
|
||||||
class Response():
|
class Response():
|
||||||
def __init__(self, queryType, endpoint):
|
def __init__(self, queryType, endpoint):
|
||||||
|
@ -151,7 +152,7 @@ class MultiResponse(Response):
|
||||||
else:
|
else:
|
||||||
paging['next'] = None
|
paging['next'] = None
|
||||||
|
|
||||||
lastPage = int((self.total - self.perPage) / self.perPage)
|
lastPage = math.ceil(((self.total - self.perPage) / self.perPage))
|
||||||
if (
|
if (
|
||||||
self.page * self.perPage < self.total and
|
self.page * self.perPage < self.total and
|
||||||
self.total > self.perPage
|
self.total > self.perPage
|
||||||
|
|
|
@ -6,6 +6,13 @@ from lxml import etree
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import io
|
||||||
|
|
||||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = 'utf-8')
|
||||||
|
|
||||||
|
sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = 'utf-8')
|
||||||
|
|
||||||
from model.cce import CCE
|
from model.cce import CCE
|
||||||
from model.errorCCE import ErrorCCE
|
from model.errorCCE import ErrorCCE
|
||||||
|
|
|
@ -1,18 +1,18 @@
|
||||||
DATABASE:
|
DATABASE:
|
||||||
DB_USER:
|
DB_USER: postgres
|
||||||
DB_PSWD:
|
DB_PSWD: "9903"
|
||||||
DB_HOST:
|
DB_HOST: localhost
|
||||||
DB_PORT:
|
DB_PORT: "5432"
|
||||||
DB_NAME:
|
DB_NAME: ccesearch
|
||||||
|
|
||||||
GITHUB:
|
GITHUB:
|
||||||
ACCESS_TOKEN:
|
ACCESS_TOKEN: 218124e9bf09a9b3f379cb5ee1ab0a8756ee3b3c
|
||||||
CCE_REPO:
|
CCE_REPO: nypl/catalog_of_copyright_entries_project
|
||||||
CCR_REPO:
|
CCR_REPO: nypl/cce-renewals
|
||||||
|
|
||||||
ELASTICSEARCH:
|
ELASTICSEARCH:
|
||||||
ES_CCE_INDEX:
|
ES_CCE_INDEX: cce
|
||||||
ES_CCR_INDEX:
|
ES_CCR_INDEX: ccr
|
||||||
ES_HOST:
|
ES_HOST: localhost
|
||||||
ES_PORT:
|
ES_PORT: '9200'
|
||||||
ES_TIMEOUT:
|
ES_TIMEOUT: "10000"
|
|
@ -58,7 +58,7 @@ class ESIndexer():
|
||||||
if self.client.indices.exists(index=self.ccr_index) is False:
|
if self.client.indices.exists(index=self.ccr_index) is False:
|
||||||
Renewal.init()
|
Renewal.init()
|
||||||
|
|
||||||
def indexRecords(self, recType='cce'):
|
def indexRecords(self, recType='ccr'):
|
||||||
"""Process the current batch of updating records. This utilizes the
|
"""Process the current batch of updating records. This utilizes the
|
||||||
elasticsearch-py bulk helper to import records in chunks of the
|
elasticsearch-py bulk helper to import records in chunks of the
|
||||||
provided size. If a record in the batch errors that is reported and
|
provided size. If a record in the batch errors that is reported and
|
||||||
|
@ -148,6 +148,7 @@ class ESRen():
|
||||||
self.renewal.rennum = self.dbRen.renewal_num
|
self.renewal.rennum = self.dbRen.renewal_num
|
||||||
self.renewal.rendate = self.dbRen.renewal_date
|
self.renewal.rendate = self.dbRen.renewal_date
|
||||||
self.renewal.title = self.dbRen.title
|
self.renewal.title = self.dbRen.title
|
||||||
|
self.renewal.authors = self.dbRen.author
|
||||||
self.renewal.claimants = [
|
self.renewal.claimants = [
|
||||||
Claimant(name=c.name, claim_type=c.claimant_type)
|
Claimant(name=c.name, claim_type=c.claimant_type)
|
||||||
for c in self.dbRen.claimants
|
for c in self.dbRen.claimants
|
||||||
|
|
20
main.py
20
main.py
|
@ -14,12 +14,10 @@ def main(secondsAgo=None, year=None, exclude=None, reinit=False):
|
||||||
startTime = datetime.now()
|
startTime = datetime.now()
|
||||||
if secondsAgo is not None:
|
if secondsAgo is not None:
|
||||||
loadFromTime = startTime - timedelta(seconds=secondsAgo)
|
loadFromTime = startTime - timedelta(seconds=secondsAgo)
|
||||||
|
# if exclude != 'cce':
|
||||||
if exclude != 'cce':
|
# loadCCE(manager, loadFromTime, year)
|
||||||
loadCCE(manager, loadFromTime, year)
|
# if exclude != 'ccr':
|
||||||
if exclude != 'ccr':
|
# loadCCR(manager, loadFromTime, year)
|
||||||
loadCCR(manager, loadFromTime, year)
|
|
||||||
|
|
||||||
indexUpdates(manager, loadFromTime)
|
indexUpdates(manager, loadFromTime)
|
||||||
|
|
||||||
manager.closeConnection()
|
manager.closeConnection()
|
||||||
|
@ -39,7 +37,7 @@ def loadCCR(manager, loadFromTime, selectedYear):
|
||||||
|
|
||||||
def indexUpdates(manager, loadFromTime):
|
def indexUpdates(manager, loadFromTime):
|
||||||
esIndexer = ESIndexer(manager, None)
|
esIndexer = ESIndexer(manager, None)
|
||||||
esIndexer.indexRecords(recType='cce')
|
# esIndexer.indexRecords(recType='cce')
|
||||||
esIndexer.indexRecords(recType='ccr')
|
esIndexer.indexRecords(recType='ccr')
|
||||||
|
|
||||||
|
|
||||||
|
@ -62,7 +60,7 @@ def parseArgs():
|
||||||
|
|
||||||
|
|
||||||
def loadConfig():
|
def loadConfig():
|
||||||
with open('config.yaml', 'r') as yamlFile:
|
with open('config.yaml-dist', 'r') as yamlFile:
|
||||||
config = yaml.safe_load(yamlFile)
|
config = yaml.safe_load(yamlFile)
|
||||||
for section in config:
|
for section in config:
|
||||||
sectionDict = config[section]
|
sectionDict = config[section]
|
||||||
|
@ -75,13 +73,17 @@ if __name__ == '__main__':
|
||||||
try:
|
try:
|
||||||
loadConfig()
|
loadConfig()
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
print("Unable to set environment variables")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
from sessionManager import SessionManager
|
from sessionManager import SessionManager
|
||||||
from builder import CCEReader, CCEFile
|
from builder import CCEReader, CCEFile
|
||||||
from renBuilder import CCRReader, CCRFile
|
from renBuilder import CCRReader, CCRFile
|
||||||
from esIndexer import ESIndexer
|
from esIndexer import ESIndexer
|
||||||
|
print(args.time)
|
||||||
|
print(args.year)
|
||||||
|
print(args.exclude)
|
||||||
|
print(args.REINITIALIZE)
|
||||||
main(
|
main(
|
||||||
secondsAgo=args.time,
|
secondsAgo=args.time,
|
||||||
year=args.year,
|
year=args.year,
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import yaml
|
import yaml
|
||||||
|
import pprint
|
||||||
from elasticsearch_dsl import (
|
from elasticsearch_dsl import (
|
||||||
Index,
|
Index,
|
||||||
Document,
|
Document,
|
||||||
|
@ -16,14 +17,14 @@ class BaseDoc(Document):
|
||||||
date_modified = Date()
|
date_modified = Date()
|
||||||
|
|
||||||
def save(self, **kwargs):
|
def save(self, **kwargs):
|
||||||
return super(BaseDoc, self).save(**kwargs)
|
return super(BaseDoc, self).save(** kwargs)
|
||||||
|
|
||||||
class BaseInner(InnerDoc):
|
class BaseInner(InnerDoc):
|
||||||
date_created = Date()
|
date_created = Date()
|
||||||
date_modified = Date()
|
date_modified = Date()
|
||||||
|
|
||||||
def save(self, **kwargs):
|
def save(self, **kwargs):
|
||||||
return super(BaseInner, self).save(**kwargs)
|
return super(BaseInner, self).save(** kwargs)
|
||||||
|
|
||||||
|
|
||||||
class Registration(BaseInner):
|
class Registration(BaseInner):
|
||||||
|
@ -41,9 +42,10 @@ class Renewal(BaseDoc):
|
||||||
rennum = Keyword()
|
rennum = Keyword()
|
||||||
rendate = Date()
|
rendate = Date()
|
||||||
title = Text(fields={'keyword': Keyword()})
|
title = Text(fields={'keyword': Keyword()})
|
||||||
|
authors = Text()
|
||||||
claimants = Nested(Claimant)
|
|
||||||
|
|
||||||
|
claimants = Nested(Claimant)
|
||||||
|
# pprint.pprint(dict(os.environ), width = 1)
|
||||||
class Index:
|
class Index:
|
||||||
name = os.environ['ES_CCR_INDEX']
|
name = os.environ['ES_CCR_INDEX']
|
||||||
|
|
||||||
|
@ -54,7 +56,6 @@ class CCE(BaseDoc):
|
||||||
authors = Text(multi=True)
|
authors = Text(multi=True)
|
||||||
publishers = Text(multi=True)
|
publishers = Text(multi=True)
|
||||||
lccns = Keyword(multi=True)
|
lccns = Keyword(multi=True)
|
||||||
|
|
||||||
registrations = Nested(Registration)
|
registrations = Nested(Registration)
|
||||||
|
|
||||||
class Index:
|
class Index:
|
||||||
|
|
|
@ -14,9 +14,10 @@ from model.registration import Registration
|
||||||
class CCRReader():
|
class CCRReader():
|
||||||
def __init__(self, manager):
|
def __init__(self, manager):
|
||||||
self.git = Github(os.environ['ACCESS_TOKEN'])
|
self.git = Github(os.environ['ACCESS_TOKEN'])
|
||||||
|
print(self.git)
|
||||||
self.repo = self.git.get_repo(os.environ['CCR_REPO'])
|
self.repo = self.git.get_repo(os.environ['CCR_REPO'])
|
||||||
|
print(self.repo)
|
||||||
self.ccrYears = {}
|
self.ccrYears = {}
|
||||||
|
|
||||||
self.dbManager = manager
|
self.dbManager = manager
|
||||||
|
|
||||||
def loadYears(self, selectedYear, loadFromTime):
|
def loadYears(self, selectedYear, loadFromTime):
|
||||||
|
|
Loading…
Reference in New Issue