fixed issue with naive dates

main
eric 2023-04-21 17:17:04 -04:00
parent c50e9cd8be
commit 53e3f655c6
6 changed files with 28 additions and 14 deletions

View File

@ -9,6 +9,7 @@ psycopg2 = "*"
pyoai = "*" pyoai = "*"
requests = "*" requests = "*"
gunicorn = "*" gunicorn = "*"
pytz = "*"
doab-check = {editable = true, path = "."} doab-check = {editable = true, path = "."}
[dev-packages] [dev-packages]

24
Pipfile.lock generated
View File

@ -1,11 +1,11 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "4a11e64a7ef8724e60a36f766844c65796fce7aecee9ca371b1e6db3109afb45" "sha256": "88319ac8e3e7a9d3d28deb1837d53c845d2d5f340c903f6c60de1f8b9a86f55a"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": { "requires": {
"python_version": "3.9" "python_version": ">=3.9"
}, },
"sources": [ "sources": [
{ {
@ -250,6 +250,14 @@
"index": "pypi", "index": "pypi",
"version": "==2.5.0" "version": "==2.5.0"
}, },
"pytz": {
"hashes": [
"sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588",
"sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"
],
"index": "pypi",
"version": "==2023.3"
},
"requests": { "requests": {
"hashes": [ "hashes": [
"sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa", "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa",
@ -260,11 +268,11 @@
}, },
"setuptools": { "setuptools": {
"hashes": [ "hashes": [
"sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077", "sha256:6f0839fbdb7e3cfef1fc38d7954f5c1c26bf4eebb155a55c9bf8faf997b9fb67",
"sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2" "sha256:bb16732e8eb928922eabaa022f881ae2b7cdcfaf9993ef1f5e841a96d32b8e0c"
], ],
"markers": "python_full_version >= '3.7.0'", "markers": "python_full_version >= '3.7.0'",
"version": "==67.6.0" "version": "==67.7.1"
}, },
"six": { "six": {
"hashes": [ "hashes": [
@ -276,11 +284,11 @@
}, },
"sqlparse": { "sqlparse": {
"hashes": [ "hashes": [
"sha256:0323c0ec29cd52bceabc1b4d9d579e311f3e4961b98d174201d5622a23b85e34", "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3",
"sha256:69ca804846bb114d2ec380e4360a8a340db83f0ccf3afceeb1404df028f57268" "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"
], ],
"markers": "python_version >= '3.5'", "markers": "python_version >= '3.5'",
"version": "==0.4.3" "version": "==0.4.4"
}, },
"urllib3": { "urllib3": {
"hashes": [ "hashes": [

View File

@ -5,6 +5,10 @@ import datetime
import logging import logging
import re import re
import pytz
from dateutil.parser import isoparse
from dateutil.utils import default_tzinfo
from oaipmh.client import Client from oaipmh.client import Client
from oaipmh.error import IdDoesNotExistError, NoRecordsMatchError from oaipmh.error import IdDoesNotExistError, NoRecordsMatchError
from oaipmh.metadata import MetadataRegistry from oaipmh.metadata import MetadataRegistry
@ -84,6 +88,7 @@ def load_doab_record(doab_id, title, publisher_name, item_type, urls, timestamps
new_item.resource_type = item_type new_item.resource_type = item_type
new_item.save() new_item.save()
for timestamp in timestamps: for timestamp in timestamps:
timestamp = default_tzinfo(isoparse(timestamp), pytz.UTC)
(new_timestamp, created) = Timestamp.objects.get_or_create( (new_timestamp, created) = Timestamp.objects.get_or_create(
datetime=timestamp, datetime=timestamp,
item=new_item) item=new_item)
@ -120,12 +125,12 @@ def load_doab_oai(from_date, until_date, limit=100):
''' '''
use oai feed to get oai updates use oai feed to get oai updates
''' '''
start = datetime.datetime.now() start = datetime.datetime.now(pytz.UTC)
if from_date: if from_date:
from_ = from_date from_ = from_date
else: else:
# last 15 days # last 15 days
from_ = datetime.datetime.now() - datetime.timedelta(days=15) from_ = datetime.datetime.now(pytz.UTC) - datetime.timedelta(days=15)
num_doabs = 0 num_doabs = 0
new_doabs = 0 new_doabs = 0
lasttime = datetime.datetime(2000, 1, 1) lasttime = datetime.datetime(2000, 1, 1)
@ -147,7 +152,7 @@ def load_doab_oai(from_date, until_date, limit=100):
if not item: if not item:
logger.error('error for doab #%s', doab) logger.error('error for doab #%s', doab)
continue continue
if lasttime > start: if item.created > start:
new_doabs += 1 new_doabs += 1
title = item.title title = item.title
logger.info(u'updated:\t%s\t%s', doab, title) logger.info(u'updated:\t%s\t%s', doab, title)

View File

@ -41,7 +41,6 @@ class Command(BaseCommand):
except Item.DoesNotExist: except Item.DoesNotExist:
continue continue
self.stdout.write(f'checked {n_checked} links')
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
logger.info(f'checked {l_checked} links in {end_time - start_time}') logger.info(f'checked {l_checked} links in {end_time - start_time}')
self.stdout.write(f'checked {l_checked} links for {n_checked} items in {end_time - start_time}') self.stdout.write(f'checked {l_checked} links for {n_checked} items in {end_time - start_time}')

View File

@ -22,7 +22,6 @@ class Command(BaseCommand):
n_checked += 1 n_checked += 1
if n_checked >= max: if n_checked >= max:
break break
self.stdout.write(f'checked {n_checked} links')
end_time = datetime.datetime.now() end_time = datetime.datetime.now()
logger.info(f'checked {n_checked} links in {end_time - start_time}') logger.info(f'checked {n_checked} links in {end_time - start_time}')
self.stdout.write(f'checked {n_checked} links in {end_time - start_time}') self.stdout.write(f'checked {n_checked} links in {end_time - start_time}')

View File

@ -30,3 +30,5 @@ install_requires =
psycopg2 == 2.9.5 psycopg2 == 2.9.5
pyoai == 2.5.0 pyoai == 2.5.0
requests == 2.28.2 requests == 2.28.2
pytz > 2021
python-dateutil >= 2.8.0