gutenbergsite/catalog/admin/make-rdf.php

385 lines
11 KiB
PHP

<?php
$cli = php_sapi_name () == "cli";
if (!$cli) exit ();
set_include_path(get_include_path() . PATH_SEPARATOR . "/public/vhost/g/gutenberg/dev/private/lib/php");
include ("pgcat.phh");
function _log ($s) {
// fwrite (STDERR, "$s\n");
}
function lout (&$book, $name, $tag) {
$s = "";
if (isset ($book[$name])) {
$a = $book[$name];
if (is_array ($a)) {
if (count ($a) > 1) {
$s .= " <$tag>\n <rdf:Bag>\n";
foreach ($a as $val) {
$s .= " <rdf:li rdf:parseType=\"Literal\">$val</rdf:li>\n";
}
$s .= " </rdf:Bag>\n </$tag>\n";
} else {
$val = $a[0];
$s .= " <$tag rdf:parseType=\"Literal\">$val</$tag>\n";
}
} else {
$val = $a;
$s .= " <$tag rdf:parseType=\"Literal\">$val</$tag>\n";
}
}
return $s;
}
function qout (&$book, $name, $tag, $tag2) {
if (!isset ($book[$name])) {
return "";
}
$a = $book[$name];
if (is_array ($a)) {
if (count ($a) > 1) {
$s = " <$tag>\n <rdf:Bag>\n";
foreach ($a as $val) {
$s .= " <rdf:li><$tag2><rdf:value>$val</rdf:value></$tag2></rdf:li>\n";
}
$s .= " </rdf:Bag>\n </$tag>\n";
return $s;
} else {
$val = $a[0];
return " <$tag><$tag2><rdf:value>$val</rdf:value></$tag2></$tag>\n";
}
}
$val = $a;
return " <$tag><$tag2><rdf:value>$val</rdf:value></$tag2></$tag>\n";
}
_log ("Initializing ...");
$db = $config->db ();
_log (" Connected to Database ...");
$base_url = "http://www.gutenberg.org";
$file_base = "$base_url";
$now = date ("Y-m-d");
$books = array ();
_log (" Done\n");
_log ("Loading data from database ...");
_log (" Books");
$db->exec ("select * from books");
if ($db->FirstRow ()) {
do {
$pk = $db->get ("pk", SQLINT);
if ($reldate = $db->get ("release_date", SQLDATE)) {
$books[$pk]['release_date'] = date ("Y-m-d", $reldate);
$books[$pk]['downloads'] = $db->get ("downloads", SQLINT);
}
if ($db->get ("copyrighted", SQLINT)) {
$books[$pk]['copyrighted'] = 1;
}
} while ($db->NextRow ());
}
_log (" Authors");
$db->exec ("select * from v_books_authors order by fk_books");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$role = $db->get ("role", SQLCHAR);
if ($role == "Creator" || $role == "Author") {
$val = htmlspecialchars (FormatAuthorDate ($db));
$books[$fk_books]['creators'][] = $val;
} else {
$val = htmlspecialchars (FormatAuthorDateRole ($db));
$books[$fk_books]['contributors'][] = $val;
}
} while ($db->NextRow ());
}
_log (" FriendlyTitles");
foreach ($books as $fk_books => $dummy) {
$books[$fk_books]['friendlytitle'][] = htmlspecialchars (friendlytitle ($fk_books));
}
/* _log (" Titles");
$db->exec ("select * from titles");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$title = htmlspecialchars ($db->get ("title", SQLCHAR));
// $title = preg_replace ("/\s*\n/", "<br />", $title);
switch ($db->get ("title_order", SQLINT)) {
case 1:
$books[$fk_books]['240'][] = $title; break;
case 2:
case 4:
case 5:
$books[$fk_books]['246'][] = $title; break;
case 3:
$books[$fk_books]['505'][] = $title; break;
}
} while ($db->NextRow ());
} */
_log (" Attributes");
$db->exec ("select * from attributes");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$fk_attriblist = (string) $db->get ("fk_attriblist", SQLINT);
$text = htmlspecialchars ($db->get ("text", SQLCHAR));
$books[$fk_books][$fk_attriblist][] = $text;
} while ($db->NextRow ());
}
_log (" Categories");
$db->exec ("select * from mn_books_categories, categories where fk_categories = pk");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$books[$fk_books]['categories'][] = $db->get ("category", SQLCHAR);
} while ($db->NextRow ());
} else {
$books[$fk_books]['categories'][] = 'eBook';
}
_log (" Languages");
$db->exec ("select * from mn_books_langs");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$books[$fk_books]['languages'][] = $db->get ("fk_langs", SQLCHAR);
} while ($db->NextRow ());
}
_log (" Subjects");
$db->exec ("select * from mn_books_subjects, subjects where fk_subjects = pk");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$subject = htmlspecialchars ($db->get ("subject", SQLCHAR));
$books[$fk_books]['subjects'][] = $subject;
} while ($db->NextRow ());
}
_log (" LoCC");
$db->exec ("select * from mn_books_loccs");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$books[$fk_books]['loccs'][] = $db->get ("fk_loccs", SQLCHAR);
} while ($db->NextRow ());
}
_log (" Done\n");
$fp = fopen ("php://stdout", "w");
$s = <<< EOF
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE rdf:RDF [
<!ENTITY pg "Project Gutenberg">
<!ENTITY lic "$base_url/license">
<!ENTITY f "$file_base/">
]>
<!--
The Project Gutenberg Catalog in RDF/XML Format
Copyright (C) 2004-present by
Project Gutenberg Literary Archive Foundation
4557 Melan Drive S.
Fairbanks AK 99712
U.S.A.
LICENSE TERMS
This file ("work") is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License (GPL)
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This work is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Please become familiar with the GNU GPL before using this file.
You can get a full copy of the GNU General Public License online at:
http://www.gnu.org/licenses/gpl.html
The GNU General Public License is explained in human language at:
http://creativecommons.org/licenses/GPL/2.0/
If you are uncertain about the terms of the license or whether your
intended use is legitimate, please email gbnewby@pglaf.org for
clarification.
Thank you for your interest in Project Gutenberg. For more
information about Project Gutenberg and its goals, visit
the Official Project Gutenberg Web site at
http://www.gutenberg.org
-->
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:cc="http://web.resource.org/cc/"
xmlns:pgterms="$base_url/rdfterms/"
xml:base="$base_url/feeds/catalog.rdf">
<cc:Work rdf:about="">
<cc:license rdf:resource="http://creativecommons.org/licenses/GPL/2.0/" />
</cc:Work>
<cc:License rdf:about="http://creativecommons.org/licenses/GPL/2.0/">
<cc:permits rdf:resource="http://web.resource.org/cc/Reproduction" />
<cc:permits rdf:resource="http://web.resource.org/cc/Distribution" />
<cc:requires rdf:resource="http://web.resource.org/cc/Notice" />
<cc:permits rdf:resource="http://web.resource.org/cc/DerivativeWorks" />
<cc:requires rdf:resource="http://web.resource.org/cc/ShareAlike" />
<cc:requires rdf:resource="http://web.resource.org/cc/SourceCode" />
</cc:License>
<rdf:Description rdf:about="">
<dc:created><dcterms:W3CDTF><rdf:value>$now</rdf:value></dcterms:W3CDTF></dc:created>
</rdf:Description>
EOF;
fputs ($fp, $s);
// debug
// $books = array_slice ($books, 0, 500);
reset ($books);
while (list ($fk_books, $book) = each ($books)) {
$s = "<pgterms:etext rdf:ID=\"etext$fk_books\">\n";
$s .= " <dc:publisher>&pg;</dc:publisher>\n";
$s .= lout ($book, '240', 'dc:title');
$s .= lout ($book, '245', 'dc:title');
$s .= lout ($book, '246', 'dc:alternative');
$s .= lout ($book, '500', 'dc:description');
$s .= lout ($book, '505', 'dc:tableOfContents');
$s .= lout ($book, 'creators', 'dc:creator');
$s .= lout ($book, 'contributors', 'dc:contributor');
$s .= lout ($book, 'friendlytitle', 'pgterms:friendlytitle');
$s .= qout ($book, 'languages', 'dc:language', 'dcterms:ISO639-2');
$s .= qout ($book, 'subjects', 'dc:subject', 'dcterms:LCSH');
$s .= qout ($book, 'loccs', 'dc:subject', 'dcterms:LCC');
$s .= qout ($book, 'release_date', 'dc:created', 'dcterms:W3CDTF');
$s .= qout ($book, 'downloads', 'pgterms:downloads', 'xsd:nonNegativeInteger');
$s .= qout ($book, 'categories', 'dc:type', 'pgterms:category');
if (isset ($book['copyrighted'])) {
$s .= " <dc:rights>Copyrighted work. See license inside work.</dc:rights>\n";
} else {
$s .= " <dc:rights rdf:resource=\"&lic;\" />\n";
}
$s .= "</pgterms:etext>\n\n";
fputs ($fp, $s);
}
$books = null;
// files
$db->exec ("select fk_books, mediatype, filetype, fk_filetypes, fk_compressions, fk_encodings, " .
"edition, filename, filesize, filemtime " .
"from files " .
"left join filetypes on files.fk_filetypes = filetypes.pk " .
"where fk_books is not null and obsoleted = 0 and diskstatus = 0 " .
"order by fk_books, filename;");
if ($db->FirstRow ()) {
do {
$fk_books = $db->get ("fk_books", SQLINT);
$filename = $db->get ("filename", SQLCHAR);
$filesize = $db->get ("filesize", SQLINT);
$filetype = $db->get ("filetype", SQLCHAR);
$fk_filetypes = $db->get ("fk_filetypes", SQLCHAR);
$filemtime = $db->get ("filemtime", SQLDATE);
$mediatype = $db->get ("mediatype", SQLCHAR);
$fk_encodings = $db->get ("fk_encodings", SQLCHAR);
$fk_compressions = $db->get ("fk_compressions", SQLCHAR);
if (!strncmp ($filename, "cache/", 6)) {
$filename = "&f;$filename";
} else {
$filename = "&f;dirs/$filename";
}
$mtime = date ("Y-m-d", $filemtime);
if (!empty ($fk_encodings) && !strncmp ($mediatype, "text/", 5)) {
$mediatype .= "; charset=\"$fk_encodings\"";
}
if (empty ($mediatype)) {
$mediatype = "application/octet-stream";
if (!empty ($fk_filetypes)) {
$mediatype .= "; type=\"$filetype ($fk_filetypes)\"";
}
}
$compression = "";
if ($fk_compressions == "zip") {
$compression = "\n <dc:format><dcterms:IMT><rdf:value>application/zip</rdf:value></dcterms:IMT></dc:format>";
}
$s = "
<pgterms:file rdf:about=\"$filename\">
<dc:format><dcterms:IMT><rdf:value>$mediatype</rdf:value></dcterms:IMT></dc:format>$compression
<dcterms:extent>$filesize</dcterms:extent>
<dcterms:modified><dcterms:W3CDTF><rdf:value>$mtime</rdf:value></dcterms:W3CDTF></dcterms:modified>
<dcterms:isFormatOf rdf:resource=\"#etext$fk_books\" />
</pgterms:file>
";
fputs ($fp, $s);
} while ($db->NextRow ());
}
fputs ($fp, "\n</rdf:RDF>\n");
fclose ($fp);
_log (" Done!\n");
?>