diff --git a/Pipfile b/Pipfile index 079e7ae..b68d0ee 100644 --- a/Pipfile +++ b/Pipfile @@ -9,6 +9,7 @@ psycopg2 = "*" pyoai = "*" requests = "*" gunicorn = "*" +pytz = "*" doab-check = {editable = true, path = "."} [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index c4ea5f7..bdbbccb 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "4a11e64a7ef8724e60a36f766844c65796fce7aecee9ca371b1e6db3109afb45" + "sha256": "88319ac8e3e7a9d3d28deb1837d53c845d2d5f340c903f6c60de1f8b9a86f55a" }, "pipfile-spec": 6, "requires": { - "python_version": "3.9" + "python_version": ">=3.9" }, "sources": [ { @@ -250,6 +250,14 @@ "index": "pypi", "version": "==2.5.0" }, + "pytz": { + "hashes": [ + "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588", + "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb" + ], + "index": "pypi", + "version": "==2023.3" + }, "requests": { "hashes": [ "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa", @@ -260,11 +268,11 @@ }, "setuptools": { "hashes": [ - "sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077", - "sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2" + "sha256:6f0839fbdb7e3cfef1fc38d7954f5c1c26bf4eebb155a55c9bf8faf997b9fb67", + "sha256:bb16732e8eb928922eabaa022f881ae2b7cdcfaf9993ef1f5e841a96d32b8e0c" ], "markers": "python_full_version >= '3.7.0'", - "version": "==67.6.0" + "version": "==67.7.1" }, "six": { "hashes": [ @@ -276,11 +284,11 @@ }, "sqlparse": { "hashes": [ - "sha256:0323c0ec29cd52bceabc1b4d9d579e311f3e4961b98d174201d5622a23b85e34", - "sha256:69ca804846bb114d2ec380e4360a8a340db83f0ccf3afceeb1404df028f57268" + "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3", + "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c" ], "markers": "python_version >= '3.5'", - "version": "==0.4.3" + "version": "==0.4.4" }, "urllib3": { "hashes": [ diff --git a/doab_check/doab_oai.py b/doab_check/doab_oai.py index f1d3f89..09637b1 100644 --- a/doab_check/doab_oai.py +++ b/doab_check/doab_oai.py @@ -5,6 +5,10 @@ import datetime import logging import re +import pytz +from dateutil.parser import isoparse +from dateutil.utils import default_tzinfo + from oaipmh.client import Client from oaipmh.error import IdDoesNotExistError, NoRecordsMatchError from oaipmh.metadata import MetadataRegistry @@ -84,6 +88,7 @@ def load_doab_record(doab_id, title, publisher_name, item_type, urls, timestamps new_item.resource_type = item_type new_item.save() for timestamp in timestamps: + timestamp = default_tzinfo(isoparse(timestamp), pytz.UTC) (new_timestamp, created) = Timestamp.objects.get_or_create( datetime=timestamp, item=new_item) @@ -120,12 +125,12 @@ def load_doab_oai(from_date, until_date, limit=100): ''' use oai feed to get oai updates ''' - start = datetime.datetime.now() + start = datetime.datetime.now(pytz.UTC) if from_date: from_ = from_date else: # last 15 days - from_ = datetime.datetime.now() - datetime.timedelta(days=15) + from_ = datetime.datetime.now(pytz.UTC) - datetime.timedelta(days=15) num_doabs = 0 new_doabs = 0 lasttime = datetime.datetime(2000, 1, 1) @@ -147,7 +152,7 @@ def load_doab_oai(from_date, until_date, limit=100): if not item: logger.error('error for doab #%s', doab) continue - if lasttime > start: + if item.created > start: new_doabs += 1 title = item.title logger.info(u'updated:\t%s\t%s', doab, title) diff --git a/doab_check/management/commands/check_items.py b/doab_check/management/commands/check_items.py index e6ccbaf..790305f 100644 --- a/doab_check/management/commands/check_items.py +++ b/doab_check/management/commands/check_items.py @@ -41,7 +41,6 @@ class Command(BaseCommand): except Item.DoesNotExist: continue - self.stdout.write(f'checked {n_checked} links') end_time = datetime.datetime.now() logger.info(f'checked {l_checked} links in {end_time - start_time}') self.stdout.write(f'checked {l_checked} links for {n_checked} items in {end_time - start_time}') diff --git a/doab_check/management/commands/check_some_links.py b/doab_check/management/commands/check_some_links.py index a9dd529..de43c68 100644 --- a/doab_check/management/commands/check_some_links.py +++ b/doab_check/management/commands/check_some_links.py @@ -22,7 +22,6 @@ class Command(BaseCommand): n_checked += 1 if n_checked >= max: break - self.stdout.write(f'checked {n_checked} links') end_time = datetime.datetime.now() logger.info(f'checked {n_checked} links in {end_time - start_time}') self.stdout.write(f'checked {n_checked} links in {end_time - start_time}') diff --git a/setup.cfg b/setup.cfg index d21e7a7..2585a26 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,4 +29,6 @@ install_requires = Django == 4.1.7 psycopg2 == 2.9.5 pyoai == 2.5.0 - requests == 2.28.2 \ No newline at end of file + requests == 2.28.2 + pytz > 2021 + python-dateutil >= 2.8.0 \ No newline at end of file