add schedule package to daemon (#48)
parent
966c336b68
commit
b80d745b30
|
@ -10,6 +10,7 @@ psycopg2 = "2.9.3"
|
|||
pandas = "*"
|
||||
scikit-learn = "*"
|
||||
lxml = "*"
|
||||
schedule = "*"
|
||||
charset_normalizer = "*"
|
||||
idna = "*"
|
||||
certifi = "*"
|
||||
|
|
|
@ -71,16 +71,13 @@ def drop_schema(connection) -> None:
|
|||
cursor.close()
|
||||
|
||||
|
||||
def seed_endpoints(connection):
|
||||
|
||||
def get_endpoints():
|
||||
collections = OapenAPI.get_all_collections()
|
||||
|
||||
if collections is None:
|
||||
logger.error("Could not fetch collections from OAPEN server. Is it down?")
|
||||
sys.exit(1)
|
||||
|
||||
db = OapenDB(connection)
|
||||
|
||||
endpoints = []
|
||||
|
||||
COLLECTION_IMPORT_LIMIT = int(os.environ["COLLECTION_IMPORT_LIMIT"])
|
||||
|
@ -108,6 +105,12 @@ def seed_endpoints(connection):
|
|||
)
|
||||
endpoints.append(x)
|
||||
|
||||
return endpoints
|
||||
|
||||
|
||||
def seed_endpoints(connection):
|
||||
db = OapenDB(connection)
|
||||
endpoints = get_endpoints()
|
||||
db.add_urls(endpoints)
|
||||
|
||||
|
||||
|
|
|
@ -21,4 +21,4 @@ SUGGESTIONS_MAX_ITEMS = 50
|
|||
|
||||
# Update items that were modifed since X days ago
|
||||
UPDATE_DAYS_BEFORE = 30
|
||||
REFRESH_IMPORT_LIMIT = 50
|
||||
REFRESH_IMPORT_LIMIT = 0
|
||||
|
|
|
@ -4,7 +4,9 @@ import signal
|
|||
import sys
|
||||
import time
|
||||
|
||||
import schedule
|
||||
from clean import run as run_clean
|
||||
from clean import seed_endpoints
|
||||
from data.connection import get_connection
|
||||
from data.oapen_db import OapenDB
|
||||
from generate_suggestions import run as run_generate_suggestions
|
||||
|
@ -12,8 +14,15 @@ from logger.base_logger import logger
|
|||
from refresh_items import run as run_refresh_items
|
||||
from seed import run as run_seed
|
||||
|
||||
conn = get_connection()
|
||||
db = OapenDB(conn)
|
||||
logger.info("Daemon up")
|
||||
|
||||
|
||||
def harvest():
|
||||
seed_endpoints()
|
||||
urls = db.get_incomplete_urls()
|
||||
if len(urls) > 0:
|
||||
run_seed()
|
||||
run_generate_suggestions()
|
||||
|
||||
|
@ -23,12 +32,6 @@ def refresh():
|
|||
run_generate_suggestions()
|
||||
|
||||
|
||||
logger.info("Daemon up")
|
||||
|
||||
conn = get_connection()
|
||||
db = OapenDB(conn)
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
conn.close()
|
||||
logger.info("Daemon exiting.")
|
||||
|
@ -46,22 +49,11 @@ if int(os.environ["RUN_CLEAN"]) == 1 or (
|
|||
|
||||
harvest()
|
||||
|
||||
harvest_acc = 0
|
||||
refresh_acc = 0
|
||||
schedule.every().day.at("20:00").do(refresh)
|
||||
schedule.every().sunday.at("22:00").do(harvest)
|
||||
|
||||
while True:
|
||||
if harvest_acc >= int(os.environ["HARVEST_PERIOD"]):
|
||||
urls = db.get_incomplete_urls()
|
||||
if len(urls) > 0:
|
||||
harvest()
|
||||
harvest_acc = 0
|
||||
|
||||
if refresh_acc >= int(os.environ["REFRESH_PERIOD"]):
|
||||
refresh()
|
||||
refresh_acc = 0
|
||||
|
||||
schedule.run_pending()
|
||||
time.sleep(60)
|
||||
refresh_acc += 60
|
||||
harvest_acc += 60
|
||||
|
||||
logger.info("Daemon down")
|
||||
|
|
Loading…
Reference in New Issue