Merge branch 'main' into documentation
commit
c092e5efa2
|
@ -7,3 +7,4 @@ POSTGRES_DB_NAME=postgres
|
|||
POSTGRES_USERNAME=postgres
|
||||
POSTGRES_PASSWORD=postgrespw
|
||||
POSTGRES_SSLMODE=require
|
||||
CA_CERT=/usr/local/share/ca-certificates/ca-certificate.crt
|
||||
|
|
|
@ -5,7 +5,7 @@ jobs:
|
|||
name: Run tests
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-node@v1
|
||||
with:
|
||||
node-version: '14.x'
|
||||
|
|
|
@ -4,16 +4,25 @@ on: push
|
|||
|
||||
jobs:
|
||||
docker:
|
||||
timeout-minutes: 4
|
||||
timeout-minutes: 10
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1
|
||||
- run: cp .env.template .env
|
||||
uses: actions/checkout@v3
|
||||
- name: Create .env file
|
||||
run: |
|
||||
cp .env.template .env
|
||||
sed -i 's/POSTGRES_SSLMODE=require/POSTGRES_SSLMODE=allow/' .env
|
||||
- name: Create dummy certificate
|
||||
run: |
|
||||
mkdir api/certificates
|
||||
touch api/certificates/dummy-cert.crt
|
||||
- name: Create PostgreSQL container
|
||||
run: docker run -d --name postgres -p 5432:5432 -e POSTGRES_PASSWORD=postgrespw postgres
|
||||
- name: Start containers
|
||||
run: docker-compose -f "docker-compose.yml" up -d --build
|
||||
|
||||
- name: Unit tests for oapen-engine
|
||||
run: docker-compose run --entrypoint "./scripts/tests.sh" oapen-engine
|
||||
- name: Stop containers
|
||||
if: always()
|
||||
run: docker-compose -f "docker-compose.yml" down
|
|
@ -3,3 +3,5 @@ oapen-engine/lib/
|
|||
.python-version
|
||||
private/
|
||||
.env
|
||||
|
||||
api/certificates/
|
152
DEPLOYING.md
152
DEPLOYING.md
|
@ -1,152 +0,0 @@
|
|||
# Deployment Guide
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before we begin, you should already have the following:
|
||||
|
||||
- A DigitalOcean account.
|
||||
- `ssh` installed locally on your computer, comes pre-installed with most operating systems.
|
||||
|
||||
## Step 1: Create SSH/find keys
|
||||
|
||||
1. **Check for existing SSH keypair:**
|
||||
To check if an SSH keypair already exists on your machine, open a terminal and type the following command:
|
||||
|
||||
```
|
||||
ls ~/.ssh
|
||||
```
|
||||
|
||||
If there is an existing keypair, you will see output containing something like this:
|
||||
|
||||
```
|
||||
/home/username/.ssh/id_rsa /home/username/.ssh/id_rsa.pub
|
||||
```
|
||||
|
||||
If there is no output, then you do not have an existing keypair.
|
||||
|
||||
2. **Generate a new SSH keypair:**
|
||||
If you do not have an existing keypair, you can generate a new one using the following command:
|
||||
|
||||
```
|
||||
ssh-keygen -t rsa -b 4096
|
||||
```
|
||||
|
||||
This command generates a new RSA keypair with a key length of 4096 bits.
|
||||
|
||||
You can add an optional passphrase, which adds an extra layer of security, as anyone who gains access to your key will also need to know the passphrase to use it. If you want to add a passphrase, you will be prompted to enter it twice.
|
||||
|
||||
3. **Copy the public key to the clipboard:**
|
||||
Copy and paste the contents of the .pub file, typically id_rsa.pub, by using the cat command to output the key to the terminal:
|
||||
|
||||
```
|
||||
cat ~/.ssh/id_rsa.pub
|
||||
```
|
||||
|
||||
|
||||
## Step 2: Create a new DigitalOcean droplet
|
||||
|
||||
1. Log in to your DigitalOcean account.
|
||||
2. Click the "Create" button and select "Droplets".
|
||||
3. Choose a region & datacenter closest to your majority audience.
|
||||
4. Under "Choose an image" select "Marketplace" and search for "Docker". There should be an image option named "Docker 20.10.21 on Ubuntu 22.04", select that image:
|
||||
![Image](https://i.imgur.com/jibKdDq.png)
|
||||
5. Choose any options under "Choose Size", but the cheapest option as pictured should more than suffice:
|
||||
![Sizing options](https://i.imgur.com/ZBzsHUV.png)
|
||||
6. Under "Choose Authentication Method" choose "SSH Key" and click "New SSH Key", and in the popup window paste the public key you copied to your clipboard earlier.
|
||||
7. Give the Droplet a name, and click "Create Droplet".
|
||||
8. Once the Droplet is done spinning up, copy the IPv4 address.
|
||||
|
||||
## Step 3: Create a non-root user with sudo privileges
|
||||
|
||||
1. Log in to your droplet as the root user over SSH:
|
||||
|
||||
```
|
||||
ssh root@<your-droplet-ip>
|
||||
```
|
||||
|
||||
2. Create a new user and set a password, then add them to the `sudo` and `docker` groups:
|
||||
|
||||
```
|
||||
adduser --create-home <username>
|
||||
passwd <username>
|
||||
usermod -aG sudo,docker <username>
|
||||
```
|
||||
|
||||
4. Edit the SSH configuration file to disallow root login:
|
||||
|
||||
```
|
||||
nano /etc/ssh/sshd_config
|
||||
```
|
||||
|
||||
5. Find the line that reads `PermitRootLogin yes` and change it to `PermitRootLogin no`. Save and exit the file.
|
||||
7. Allow SSH login with non-root user with the same SSH keys you uploaded:
|
||||
|
||||
```
|
||||
su - <username>
|
||||
mkdir -p ~/.ssh
|
||||
sudo cp /root/.ssh/authorized_keys ~/.ssh/
|
||||
sudo chown -R $USER:$USER ~/.ssh
|
||||
sudo chmod 700 ~/.ssh
|
||||
sudo chmod 600 ~/.ssh/authorized_keys
|
||||
```
|
||||
|
||||
8. Restart the SSH service to enforce the changes you made:
|
||||
|
||||
```
|
||||
sudo systemctl restart ssh
|
||||
```
|
||||
|
||||
## Step 4: Setup a new DigitalOcean managed database
|
||||
|
||||
1. From the DigitalOcean dashboard, click on the "Databases" tab.
|
||||
2. Click "Create Database".
|
||||
3. For the region, it is ideal to select the same region & datacenter as the Droplet you just created, so they can be part of the same VPC network.
|
||||
4. Choose "PostgreSQL" and select "v15" for the version.
|
||||
5. Under "Choose a database configuration", you may select any plan, though the smallest size as pictured should more than suffice: ![DB Sizing options](https://i.ibb.co/0GxMzvX/Screenshot-2023-04-10-at-8-23-26-AM.png)
|
||||
6. Give the database a name, and click "Create Database Cluster".
|
||||
7. Once the database is done creating (this can take a few minutes), find the "Connection details" section on the Database Cluster's page: ![Connection details](https://i.ibb.co/cg8LtPG/Screenshot-2023-04-10-at-9-01-22-AM.png)
|
||||
You will need these for the next step.
|
||||
|
||||
## Step 5: Clone & Configure the Repository
|
||||
|
||||
1. Login to the droplet (not the database) with the non-root user you created:
|
||||
```
|
||||
ssh <username>@<droplet-ip>
|
||||
```
|
||||
2. Install the Docker Compose plugin:
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install docker-compose-plugin
|
||||
2. Clone the repository and go into the directory it creates:
|
||||
```
|
||||
git clone https://github.com/EbookFoundation/oapen-suggestion-service.git
|
||||
cd oapen-suggestion-service
|
||||
```
|
||||
4. Now, edit the `.env` file using the editor of your choice, `nano` or `vim`, you will need to configure all of the options for the application to work properly:
|
||||
```properties
|
||||
API_PORT=<Port to serve API on>
|
||||
POSTGRES_HOST=<Hostname of postgres server>
|
||||
POSTGRES_PORT=<Port postgres is running on>
|
||||
POSTGRES_DB_NAME=<Name of the postgres database>
|
||||
POSTGRES_USERNAME=<Username of the postgres user>
|
||||
POSTGRES_PASSWORD=<Password of the postgres user>
|
||||
```
|
||||
5. **NOTE:** Open the `docker-compose.yml` file and find the line:
|
||||
```dockerfile
|
||||
- RUN_CLEAN=1
|
||||
```
|
||||
This is set to `1` by default, which causes the database to be ***COMPLETELY*** deleted and the types recreated each time the server restarts. It is important to have this set to `1` only on the _first run of the application_, or after making changes that affect the structure of the database. As soon as you run the application with the following command, you should change the line to:
|
||||
```dockerfile
|
||||
- RUN_CLEAN=0
|
||||
```
|
||||
To prevent this behavior.
|
||||
|
||||
## Start & Stop the Service
|
||||
You can start the services from the server by running:
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
And you can stop the services with:
|
||||
```bash
|
||||
docker compose down
|
||||
```
|
372
README.md
372
README.md
|
@ -1,20 +1,33 @@
|
|||
# OAPEN Suggestion Engine
|
||||
# OAPEN Suggestion Service
|
||||
|
||||
## Description
|
||||
The OAPEN Suggestion Engine will suggest e-books based on other books with similar content. It achieves this using a trigram semantic inferecing algorithm. The proof-of-concept and paper that this service is built on is the work of Ronald Snijder of the OAPEN Foundation, you can read his original paper [here](https://liberquarterly.eu/article/view/10938).
|
||||
|
||||
The OAPEN Suggestion Service uses natural-language processing to suggest books based on their content similarities. To protect user privacy, we utilize text analysis rather than usage data to provide recommendations. This service is built on the proof-of-concept and paper by Ronald Snijder from the OAPEN Foundation, and you can [read the paper here](https://liberquarterly.eu/article/view/10938).
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Setup](#setup)
|
||||
- [Configuration](#configuration)
|
||||
- [Endpoints](#dependencies)
|
||||
- [Installation (Server)](#installation-server)
|
||||
* [DigitalOcean Droplet](#digitalocean-droplet)
|
||||
* [DigitalOcean Managed Database](#digitalocean-managed-database)
|
||||
* [Setup Users & Install Requirements](#setup-users-install-requirements)
|
||||
* [Clone & Configure the Project](#clone-configure-the-project)
|
||||
* [SSL Certificate](#ssl-certificate)
|
||||
- [Running](#running)
|
||||
- [Logging](#logging)
|
||||
- [Endpoints](#endpoints)
|
||||
* [/api](#get-api)
|
||||
* [/api/ngrams](#get-apingrams)
|
||||
* [/api/{handle}](#get-apihandle)
|
||||
* [/api/{handle}/ngrams](#get-apihandlengrams)
|
||||
- [Service Components](#service-components)
|
||||
- [License](/LICENSE.md)
|
||||
- [Deploying](/DEPLOYING.md)
|
||||
* [Suggestion Engine](#suggestion-engine)
|
||||
* [API](#api)
|
||||
* [Embed Script](#embed-script)
|
||||
* [Web Demo](#web-demo)
|
||||
- [Updates](#updates)
|
||||
- [Local Installation (No Server)](#local-installation-no-server)
|
||||
|
||||
## Installation
|
||||
|
||||
### 0. Configure Server
|
||||
## Installation (Server)
|
||||
|
||||
#### Digital Ocean:
|
||||
|
||||
|
@ -47,81 +60,242 @@ Then add the user to it:
|
|||
|
||||
Restart the machine for the changes to take effect or you can run `
|
||||
|
||||
### 1. Install Docker
|
||||
### DigitalOcean Droplet
|
||||
|
||||
This project uses Docker. To run the project, you will need to have Docker installed. You can find instructions for installing Docker [here](https://docs.docker.com/get-docker/). Note that on Linux, if you do not install Docker with Docker Desktop, you will have to install Docker Compose separately, instructions for which can be found [here](https://docs.docker.com/compose/install/#scenario-two-install-the-compose-plugin).
|
||||
|
||||
### 2. Install PostgreSQL
|
||||
1. Log in to your DigitalOcean account.
|
||||
2. Create a new Droplet.
|
||||
3. Under "Choose an image" select "Marketplace" and search for "Docker". Select "Docker 20.10.21 on Ubuntu 22.04".
|
||||
4. Choose any size, but the cheapest option will work fine.
|
||||
5. If you do not have an ssh key, generate one with:
|
||||
```bash
|
||||
ssh-keygen -t rsa -b 4096
|
||||
```
|
||||
And copy the public key to your clipboard. If you have a key on your computer already, you can use that.
|
||||
6. Under "Choose Authentication Method" choose "SSH Key" and click "New SSH Key", and in the popup window paste the public key you copied to your clipboard. Make sure it is selected.
|
||||
7. Give the Droplet a name and click "Create".
|
||||
8. Open the firewall ports
|
||||
- https://cloud.digitalocean.com/networking/firewalls
|
||||
|
||||
The project uses PostgreSQL as a database. You can find instructions for installing PostgreSQL [here](https://www.postgresql.org/download/).
|
||||
Make sure it is running, and a database is created. Take note of the credentials and name of the database you create, you will need them for the next step.
|
||||
### DigitalOcean Managed Database
|
||||
|
||||
> If you would like to run the project for local testing, you can create a PostgreSQL server with Docker using this command:
|
||||
```bash
|
||||
docker run -d --name postgres -p 5432:5432 -e POSTGRES_PASSWORD=postgrespw postgres
|
||||
```
|
||||
> Note that the username and database name will both be `postgres` and the password will be `postgrespw`. You can connect via the hostname `host.docker.internal` over port `5432`. As such, it is not recommended to use this in a production environment.
|
||||
1. From the DigitalOcean dashboard, click "Databases" > "Create Database".
|
||||
2. Ideally, select the same region & datacenter as the Droplet you just created, so they can be part of the same VPC network.
|
||||
3. Choose "PostgreSQL v15".
|
||||
4. Select any sizing plan, but the cheapest one will suffice.
|
||||
5. Give the database a name, and click "Create Database Cluster".
|
||||
6. Once the database is done creating (this can take a few minutes), find the "Connection details" section on the new database's page, you will need them later.
|
||||
|
||||
### 3. Clone the repository
|
||||
### Setup Users & Install Requirements
|
||||
|
||||
Clone the repository:
|
||||
1. Log in to the droplet over SSH:
|
||||
```bash
|
||||
ssh root@<your-droplet-ip>
|
||||
```
|
||||
2. Create a new user `oapen` and set a password, adding them to the `sudo` and `docker` groups, then login as the new user:
|
||||
|
||||
```bash
|
||||
useradd -m -G sudo,docker oapen
|
||||
passwd oapen
|
||||
su -l -s /bin/bash oapen
|
||||
```
|
||||
|
||||
3. Install the `docker compose` command:
|
||||
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install docker-compose-plugin
|
||||
```
|
||||
|
||||
4. Change the SSH configuration file to disallow root login:
|
||||
|
||||
```bash
|
||||
sudo sed -i 's/PermitRootLogin yes/PermitRootLogin no/' /etc/ssh/sshd_config
|
||||
```
|
||||
|
||||
5. Allow SSH login with non-root user with the same SSH keys you uploaded to DigitalOcean:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.ssh
|
||||
sudo cp /root/.ssh/authorized_keys ~/.ssh/
|
||||
sudo chown -R oapen:oapen ~/.ssh
|
||||
sudo chmod 700 ~/.ssh
|
||||
sudo chmod 600 ~/.ssh/authorized_keys
|
||||
sudo systemctl restart ssh
|
||||
```
|
||||
|
||||
6. Create a swapfile to avoid issues with high memory usage:
|
||||
|
||||
```bash
|
||||
sudo fallocate -l 1G /swapfile
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
echo '/swapfile none swap sw 0 0' | sudo tee -a /etc/fstab
|
||||
```
|
||||
|
||||
> Feel free to replace `1G` in the first command with `4G`. Although the service should never use this much memory, extra swap never hurts if you have the disk space to spare. More on swap [here](https://www.digitalocean.com/community/tutorials/how-to-add-swap-space-on-ubuntu-20-04).
|
||||
|
||||
7. Restart the droplet to persist all of the changes. From now on, login to the droplet with:
|
||||
|
||||
```bash
|
||||
ssh oapen@<your-droplet-ip>
|
||||
```
|
||||
|
||||
### Clone & Configure the Project
|
||||
|
||||
1. Clone the repository and cd into the directory it creates:
|
||||
```bash
|
||||
git clone https://github.com/EbookFoundation/oapen-suggestion-service.git
|
||||
cd oapen-suggestion-service
|
||||
```
|
||||
> You can clone this anywhere but in the home directory is easiest.
|
||||
2. Copy the `.env.template` file to `.env`:
|
||||
|
||||
```bash
|
||||
cp .env.template .env
|
||||
```
|
||||
|
||||
3. Using a text editor like `vim` or `nano` configure all of the options in `.env`:
|
||||
|
||||
```properties
|
||||
API_PORT=<Port to serve API on>
|
||||
POSTGRES_HOST=<Hostname of postgres server>
|
||||
POSTGRES_PORT=<Port postgres is running on>
|
||||
POSTGRES_DB_NAME=<Name of the postgres database>
|
||||
POSTGRES_USERNAME=<Username of the postgres user>
|
||||
POSTGRES_PASSWORD=<Password of the postgres user>
|
||||
POSTGRES_SSLMODE=<'require' when using a managed database>
|
||||
```
|
||||
|
||||
> Postgres credentials can be found in the "Connection details" section of the managed database
|
||||
|
||||
### SSL Certificate
|
||||
|
||||
> Add information on how to retrieve certificate from DigitalOcean managed DB.
|
||||
|
||||
Create a directory in `api` called `certificates`. Once you have acquired a certificate for your managed database, copy it into `/api/certificates`. **Make sure that this file is named `ca-certificate.crt`, or ensure that the name of your certificate matches the `CA_CERT` variable in your `.env`.**
|
||||
|
||||
## Running
|
||||
|
||||
You can start the services by running the following command in the directory where you cloned the repo:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/EbookFoundation/oapen-suggestion-service.git
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
And go into the project directory:
|
||||
The API will be running on `https://<your-ip>:<API_PORT>`.
|
||||
|
||||
> _NOTE_: The `-d` flag runs the services in the background, so you can safely exit the session and the services will continue to run. The `--build` flag ensures any changes to the code are reflected in the containers.
|
||||
|
||||
You can stop the services with:
|
||||
|
||||
```bash
|
||||
cd oapen-suggestion-service
|
||||
docker compose down
|
||||
```
|
||||
|
||||
### 4. Configure the environment
|
||||
## Logging
|
||||
|
||||
And create a file `.env` with the following, replacing `<>` with the described values:
|
||||
Log files are automatically generated by Docker for each container. The log files can be found in `/var/lib/docker/containers/<container-id>/*-json.log`.
|
||||
|
||||
```properties
|
||||
API_PORT=<Port to serve API on>
|
||||
WEB_DEMO_PORT=<Port to serve web demo on>
|
||||
EMBED_SCRIPT_PORT=<Port to serve embed script on>
|
||||
POSTGRES_HOST=<Hostname of postgres server, will be "localhost" on local installation>
|
||||
POSTGRES_PORT=<Port postgres is running on, default of 5432 in most cases>
|
||||
POSTGRES_DB_NAME=<Name of the postgres database, "postgres" works fine here>
|
||||
POSTGRES_USERNAME=<Username of the postgres user>
|
||||
POSTGRES_PASSWORD=<Password of the postgres user>
|
||||
POSTGRES_SSLMODE=require # for Digital ocean managed db
|
||||
```
|
||||
To find a container's id, run `docker ps -a`.
|
||||
|
||||
> The service **will not run** if this is improperly configured.
|
||||
To view log files, run `tail -f /var/lib/docker/containers/<container-id>/*-json.log`.
|
||||
|
||||
### 5. Run the service
|
||||
After some time, log files may take up too much disk space. To clear all logs on the host machine, run `truncate -s 0 /var/lib/docker/containers/*/*-json.log`
|
||||
|
||||
Now you can simply start the service with:
|
||||
|
||||
```bash
|
||||
docker compose up
|
||||
```
|
||||
|
||||
and connect to the API at `http://localhost:<API_PORT>`.
|
||||
|
||||
To view logs:
|
||||
```
|
||||
docker logs oapen-suggestion-service-oapen-engine-1
|
||||
```
|
||||
## Configuration
|
||||
|
||||
> *More configuration options should go here*
|
||||
To clear logs for a specific container, run `truncate -s 0 /var/lib/docker/containers/<container-id>/*-json.log`
|
||||
|
||||
## Endpoints
|
||||
|
||||
The API provides access to the following endpoints:
|
||||
|
||||
- `http://localhost:3001/api/{handle}`
|
||||
- e.g. http://localhost:3001/api/20.400.12657/47581
|
||||
- `http://localhost:3001/api/{handle}/?threshold={integer}`
|
||||
- e.g. http://localhost:3001/api/20.400.12657/47581/?threshold=5
|
||||
- `http://localhost:3001/api/{handle}/ngrams`
|
||||
- e.g. http://localhost:3001/api/20.400.12657/47581/ngrams
|
||||
### GET /api
|
||||
|
||||
Returns an array of suggestions for each book as an array.
|
||||
|
||||
The array of books is ordered by the date they were added (most recent first).
|
||||
|
||||
#### Query Parameters
|
||||
|
||||
- `limit` (optional): limits the number of results returned. Default is 25, maximum is 100.
|
||||
- `offset` (optional): offset the list of results. Default is 0.
|
||||
- `threshold` (optional): sets the minimum similarity score to receive suggestions for. Default is 0, returning all suggestions.
|
||||
|
||||
#### Examples
|
||||
|
||||
Any combination of the query parameters in any order are valid.
|
||||
|
||||
- `/api?threshold=3`
|
||||
|
||||
Returns suggestions with a similarity score of 3 or more for the 25 most recently added books.
|
||||
- `/api?threshold=5&limit=100`
|
||||
|
||||
Returns suggestions with a similarity score of 3 or more for the 100 most recently added books.
|
||||
- `/api?limit=50&offset=1000`
|
||||
|
||||
Returns 50 books and all of their suggestions, skipping the 1000 most recent.
|
||||
|
||||
### GET /api/ngrams
|
||||
|
||||
Returns an array of ngrams and their occurences for each book as an array.
|
||||
|
||||
The array of books is ordered by the date they were added (most recent first).
|
||||
|
||||
#### Query Parameters
|
||||
|
||||
- `limit` (optional): limits the number of results returned. Default is 25, maximum is 100.
|
||||
- `offset` (optional): offset the list of results. Default is 0.
|
||||
|
||||
#### Examples
|
||||
|
||||
Any combination of the query parameters in any order are valid.
|
||||
|
||||
- `/api?limit=100`
|
||||
|
||||
Returns ngrams for the 100 most recent books.
|
||||
- `/api?offset=1000`
|
||||
|
||||
Returns ngrams for 25 books, skipping the 1000 most recent.
|
||||
|
||||
|
||||
### GET /api/{handle}
|
||||
|
||||
Returns suggestions for the book with the specified handle.
|
||||
|
||||
#### Path Parameters
|
||||
|
||||
`{handle}` (required): the handle of the book to retrieve.
|
||||
|
||||
#### Query Parameters
|
||||
`threshold` (optional): sets the minimum similarity score to receive suggestions for. Default is 0, returning all suggestions.
|
||||
|
||||
#### Examples
|
||||
|
||||
> **NOTE**: You won't need to worry about the forward slash in handles causing problems, this is handled server-side.
|
||||
|
||||
- `/api/20.400.12657/47581`
|
||||
|
||||
Returns suggestions for [the book](https://library.oapen.org/handle/20.500.12657/37041) with the handle `20.400.12657/47581`.
|
||||
|
||||
- `/api/20.400.12657/47581?threshold=3`
|
||||
|
||||
Returns suggestions with a similarity score of 3 or more for [the book](https://library.oapen.org/handle/20.500.12657/37041) with the handle `20.400.12657/47581`.
|
||||
|
||||
|
||||
### GET /api/{handle}/ngrams
|
||||
|
||||
Returns the ngrams and their occurences for the book with the specified handle.
|
||||
|
||||
#### Path Parameters
|
||||
|
||||
`{handle}` (required): the handle of the book to retrieve.
|
||||
|
||||
#### Example
|
||||
|
||||
`/api/20.400.12657/47581/ngrams`
|
||||
|
||||
Returns ngrams and their occurences for [the book](https://library.oapen.org/handle/20.500.12657/37041) with the handle `20.400.12657/47581`.
|
||||
|
||||
## Service Components
|
||||
|
||||
|
@ -161,19 +335,69 @@ You can find the code for the web demo in `web/`.
|
|||
Configuration info for the web demo is in [`web/README.md`](web/README.md).
|
||||
|
||||
**Base dependencies**:
|
||||
* NodeJS 14.x+
|
||||
* NPM package manager
|
||||
|
||||
- NodeJS 14.x+
|
||||
- NPM package manager
|
||||
|
||||
**Automatically-installed dependencies**:
|
||||
* `next` -- Framework for production-driven web apps
|
||||
* Maintained by [Vercel](https://vercel.com) and the open source community
|
||||
* `react` -- Frontend design framework
|
||||
* Maintained by [Meta](https://reactjs.org).
|
||||
* Largest frontend web UI library.
|
||||
* (Alternative considered: Angular -- however, was recently deprecated by Google)
|
||||
* `pg` -- basic PostgreSQL driver
|
||||
* Maintained [on npm](https://www.npmjs.com/package/pg)
|
||||
* `typescript` -- Types for JavaScript
|
||||
* Maintained by [Microsoft](https://www.typescriptlang.org/) and the open source community.
|
||||
|
||||
### Updates
|
||||
- `next` -- Framework for production-driven web apps
|
||||
- Maintained by [Vercel](https://vercel.com) and the open source community
|
||||
- `react` -- Frontend design framework
|
||||
- Maintained by [Meta](https://reactjs.org).
|
||||
- Largest frontend web UI library.
|
||||
- (Alternative considered: Angular -- however, was recently deprecated by Google)
|
||||
- `pg` -- basic PostgreSQL driver
|
||||
- Maintained [on npm](https://www.npmjs.com/package/pg)
|
||||
- `typescript` -- Types for JavaScript
|
||||
- Maintained by [Microsoft](https://www.typescriptlang.org/) and the open source community.
|
||||
|
||||
## Updates
|
||||
|
||||
> TODO: add documentation
|
||||
|
||||
## Local Installation (No Server)
|
||||
|
||||
1. **Install Docker**
|
||||
|
||||
This project uses Docker. Instructions for installing Docker [here](https://docs.docker.com/get-docker/). Note that if you do not install Docker with Docker Desktop (which is recommended) you will have to install Docker Compose separately Instructions for that [here](https://docs.docker.com/compose/install/#scenario-two-install-the-compose-plugin).
|
||||
|
||||
2. **Install PostgreSQL**
|
||||
|
||||
You can find instructions for installing PostgreSQL on your machine [here](https://www.postgresql.org/download/).
|
||||
|
||||
Or you can create a PostgreSQL server with Docker:
|
||||
|
||||
```bash
|
||||
docker run -d --name postgres -p 5432:5432 -e POSTGRES_PASSWORD=postgrespw postgres
|
||||
```
|
||||
|
||||
> The username and database name will both be `postgres` and the password will be `postgrespw`. You can connect via the hostname `host.docker.internal` over port `5432`.
|
||||
|
||||
3. **Clone and configure the project**
|
||||
|
||||
- Clone the repo and go into its directory:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/EbookFoundation/oapen-suggestion-service.git
|
||||
cd oapen-suggestion-service
|
||||
```
|
||||
|
||||
- Copy the `.env.template` file to `.env`:
|
||||
|
||||
```bash
|
||||
cp .env.template .env
|
||||
```
|
||||
|
||||
- Using a text editor like `vim` or `nano` configure all of the options in `.env`:
|
||||
```properties
|
||||
API_PORT=<Port to serve API on>
|
||||
POSTGRES_HOST=<Hostname of postgres server>
|
||||
POSTGRES_PORT=<Port postgres is running on>
|
||||
POSTGRES_DB_NAME=<Name of the postgres database>
|
||||
POSTGRES_USERNAME=<Username of the postgres user>
|
||||
POSTGRES_PASSWORD=<Password of the postgres user>
|
||||
POSTGRES_SSLMODE=<'allow' for a local installation>
|
||||
```
|
||||
|
||||
4. See [Running](#running)
|
||||
|
|
|
@ -10,8 +10,13 @@ RUN npm install
|
|||
# RUN npm ci --only=production
|
||||
|
||||
# Bundle app source
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY ./certificates/* /usr/local/share/ca-certificates/
|
||||
|
||||
RUN chmod 644 /usr/local/share/ca-certificates/*.crt && update-ca-certificates
|
||||
|
||||
EXPOSE 3001
|
||||
|
||||
CMD [ "npm", "start" ]
|
||||
CMD [ "npm", "start" ]
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
const options = {};
|
||||
const pgp = require("pg-promise")(options);
|
||||
const fs = require("fs");
|
||||
|
||||
class DatabaseConnectionError extends Error {
|
||||
constructor(message) {
|
||||
|
@ -7,21 +8,25 @@ class DatabaseConnectionError extends Error {
|
|||
}
|
||||
}
|
||||
|
||||
if (
|
||||
!(
|
||||
process.env.POSTGRES_USERNAME &&
|
||||
process.env.POSTGRES_PASSWORD &&
|
||||
process.env.POSTGRES_HOST &&
|
||||
process.env.POSTGRES_PORT &&
|
||||
process.env.POSTGRES_DB_NAME &&
|
||||
process.env.POSTGRES_SSLMODE
|
||||
)
|
||||
)
|
||||
throw new DatabaseConnectionError(
|
||||
"Some Postgres environment variables weren't found. Please configure them in the .env file."
|
||||
);
|
||||
let db;
|
||||
|
||||
const connection_string = `postgresql://${process.env.POSTGRES_USERNAME}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT}/${process.env.POSTGRES_DB_NAME}?sslmode=${process.env.POSTGRES_SSLMODE}`;
|
||||
const db = pgp(connection_string);
|
||||
try {
|
||||
const cn = {
|
||||
host: process.env.POSTGRES_HOST,
|
||||
port: process.env.POSTGRES_PORT,
|
||||
database: process.env.POSTGRES_DB_NAME,
|
||||
user: process.env.POSTGRES_USERNAME,
|
||||
password: process.env.POSTGRES_PASSWORD,
|
||||
ssl: {
|
||||
rejectUnauthorized: process.env.POSTGRES_SSLMODE === "require",
|
||||
ca: fs.readFileSync(process.env.CA_CERT).toString(),
|
||||
}
|
||||
};
|
||||
db = pgp(cn);
|
||||
} catch {
|
||||
throw new DatabaseConnectionError(
|
||||
"Postgres connection could not be created, please check your .env file."
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = db;
|
||||
|
|
|
@ -18,16 +18,15 @@ async function querySuggestions(handle, threshold = 0) {
|
|||
return { error: { name: error.name, message: error.message } };
|
||||
});
|
||||
|
||||
if (result?.["error"])
|
||||
return result;
|
||||
if (result?.["error"]) return result;
|
||||
|
||||
console.log(result);
|
||||
|
||||
|
||||
const data = {
|
||||
"handle": handle,
|
||||
"suggestions": result
|
||||
handle: handle,
|
||||
suggestions: result,
|
||||
};
|
||||
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
@ -35,18 +34,83 @@ async function queryNgrams(handle) {
|
|||
await validate.checkHandle(handle);
|
||||
|
||||
const query = new PQ({
|
||||
text: "SELECT * FROM oapen_suggestions.ngrams WHERE handle = $1",
|
||||
text: `SELECT handle, "name", thumbnail, created_at, updated_at,
|
||||
array_agg(
|
||||
JSON_BUILD_OBJECT(
|
||||
'ngram', ngram.ngram,
|
||||
'count', ngram.count
|
||||
)
|
||||
) as ngrams
|
||||
FROM oapen_suggestions.ngrams, UNNEST(ngrams) as ngram
|
||||
WHERE handle = $1
|
||||
GROUP BY handle;`,
|
||||
values: [handle],
|
||||
});
|
||||
|
||||
return db.one(query).catch((error) => {
|
||||
return { error: { name: error.name, message: error.message } };
|
||||
});
|
||||
}
|
||||
|
||||
// return await db.any(query);
|
||||
async function queryManySuggestions(
|
||||
threshold = 0,
|
||||
limit = validate.DEFAULT_ITEM_LIMIT,
|
||||
offset = 0
|
||||
) {
|
||||
if (threshold < 0) threshold = 0;
|
||||
if (limit > validate.MAX_ITEM_LIMIT) {
|
||||
limit = validate.MAX_ITEM_LIMIT;
|
||||
} else if (limit < 1) {
|
||||
limit = 1;
|
||||
}
|
||||
if (offset < 0) offset = 0;
|
||||
|
||||
const query = new PQ({
|
||||
text: `SELECT suggestion AS handle, score
|
||||
FROM oapen_suggestions.suggestions
|
||||
WHERE score >= $1
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $2 OFFSET $3;`,
|
||||
values: [threshold, limit, offset],
|
||||
});
|
||||
|
||||
return db.query(query).catch((error) => {
|
||||
return { error: { name: error.name, message: error.message } };
|
||||
});
|
||||
}
|
||||
|
||||
async function queryManyNgrams(limit = validate.DEFAULT_ITEM_LIMIT, offset = 0) {
|
||||
if (limit > validate.MAX_ITEM_LIMIT) {
|
||||
limit = validate.MAX_ITEM_LIMIT;
|
||||
} else if (limit < 1) {
|
||||
limit = 1;
|
||||
}
|
||||
if (offset < 0) offset = 0;
|
||||
|
||||
const query = new PQ({
|
||||
text: `SELECT handle, "name", thumbnail, created_at, updated_at,
|
||||
array_agg(
|
||||
JSON_BUILD_OBJECT(
|
||||
'ngram', ngram.ngram,
|
||||
'count', ngram.count
|
||||
)
|
||||
) as ngrams
|
||||
FROM oapen_suggestions.ngrams, UNNEST(ngrams) as ngram
|
||||
GROUP BY handle
|
||||
ORDER BY created_at
|
||||
LIMIT $1 OFFSET $2;
|
||||
`,
|
||||
values: [limit, offset],
|
||||
});
|
||||
|
||||
return db.query(query).catch((error) => {
|
||||
return { error: { name: error.name, message: error.message } };
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
querySuggestions,
|
||||
queryNgrams,
|
||||
queryManySuggestions,
|
||||
queryManyNgrams,
|
||||
};
|
||||
|
|
|
@ -59,4 +59,67 @@ router.get("/:handle([0-9]+.[0-9]+.[0-9]+/[0-9]+)/ngrams", async (req, res) => {
|
|||
}
|
||||
});
|
||||
|
||||
router.get("/", async (req, res) => {
|
||||
try {
|
||||
let threshold = parseInt(req.query.threshold) || 0;
|
||||
if (threshold < 0) threshold = 0;
|
||||
let limit = parseInt(req.query.limit) || validate.DEFAULT_ITEM_LIMIT;
|
||||
let offset = parseInt(req.query.offset) || 0;
|
||||
if (limit > validate.MAX_ITEM_LIMIT) {
|
||||
limit = validate.MAX_ITEM_LIMIT;
|
||||
} else if (limit < 1) {
|
||||
limit = 1;
|
||||
}
|
||||
if (offset < 0) offset = 0;
|
||||
|
||||
|
||||
let responseData = await data.queryManySuggestions(threshold, limit, offset);
|
||||
|
||||
if (
|
||||
responseData.error &&
|
||||
responseData.error.name === pgp.errors.QueryResultError.name
|
||||
) {
|
||||
return res.status(404).json({ error: responseData.error.message });
|
||||
} else if (responseData.error) {
|
||||
return res.status(500).json(responseData);
|
||||
}
|
||||
|
||||
res.header("Access-Control-Allow-Origin", "*");
|
||||
|
||||
return res.status(200).json(responseData);
|
||||
} catch (e) {
|
||||
return res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
})
|
||||
|
||||
router.get("/ngrams", async (req, res) => {
|
||||
try {
|
||||
let limit = parseInt(req.query.limit) || validate.DEFAULT_ITEM_LIMIT;
|
||||
let offset = parseInt(req.query.offset) || 0;
|
||||
if (limit > validate.MAX_ITEM_LIMIT) {
|
||||
limit = validate.MAX_ITEM_LIMIT;
|
||||
} else if (limit < 1) {
|
||||
limit = 1;
|
||||
}
|
||||
if (offset < 0) offset = 0;
|
||||
|
||||
let responseData = await data.queryManyNgrams(limit, offset);
|
||||
|
||||
if (
|
||||
responseData.error &&
|
||||
responseData.error.name === pgp.errors.QueryResultError.name
|
||||
) {
|
||||
return res.status(404).json({ error: responseData.error.message });
|
||||
} else if (responseData.error) {
|
||||
return res.status(500).json(responseData);
|
||||
}
|
||||
|
||||
res.header("Access-Control-Allow-Origin", "*");
|
||||
|
||||
return res.status(200).json(responseData);
|
||||
} catch (e) {
|
||||
return res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
})
|
||||
|
||||
module.exports = router;
|
||||
|
|
|
@ -7,6 +7,9 @@ class UserError extends Error {
|
|||
}
|
||||
}
|
||||
|
||||
const MAX_ITEM_LIMIT = 100;
|
||||
const DEFAULT_ITEM_LIMIT = 25;
|
||||
|
||||
// RegEx to match formatting of handle
|
||||
const handleRegExpression = new RegExp("([0-9]+.[0-9]+.[0-9]+/[0-9]+)");
|
||||
|
||||
|
@ -24,4 +27,6 @@ let checkHandle = async (handle) => {
|
|||
|
||||
module.exports = {
|
||||
checkHandle,
|
||||
MAX_ITEM_LIMIT,
|
||||
DEFAULT_ITEM_LIMIT
|
||||
};
|
||||
|
|
|
@ -2,33 +2,30 @@ version: "3.8"
|
|||
services:
|
||||
oapen-engine :
|
||||
build: ./oapen-engine/
|
||||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- RUN_CLEAN=0
|
||||
- COLLECTION_IMPORT_LIMIT=0 # Set to 0 for full harvest
|
||||
- REFRESH_PERIOD=86400 # daily
|
||||
- HARVEST_PERIOD=604800 # weekly
|
||||
api:
|
||||
build: ./api/
|
||||
restart: always
|
||||
env_file:
|
||||
- .env
|
||||
expose:
|
||||
- ${API_PORT}
|
||||
ports:
|
||||
- "0.0.0.0:${API_PORT}:${API_PORT}"
|
||||
web:
|
||||
build: ./web/
|
||||
expose:
|
||||
- ${WEB_DEMO_PORT}
|
||||
restart: always
|
||||
ports:
|
||||
- "0.0.0.0:${WEB_DEMO_PORT}:${WEB_DEMO_PORT}"
|
||||
- "0.0.0.0:${WEB_DEMO_PORT}:3000"
|
||||
embed-script-test:
|
||||
build: ./embed-script/
|
||||
expose:
|
||||
- ${EMBED_SCRIPT_PORT}
|
||||
restart: always
|
||||
ports:
|
||||
- "0.0.0.0:${EMBED_SCRIPT_PORT}:${EMBED_SCRIPT_PORT}"
|
||||
- "0.0.0.0:${EMBED_SCRIPT_PORT}:3002"
|
||||
volumes:
|
||||
db:
|
||||
driver: local
|
||||
|
|
|
@ -6,7 +6,6 @@ ENV LC_ALL C.UTF-8
|
|||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PYTHONFAULTHANDLER 1
|
||||
|
||||
|
||||
FROM base AS python-deps
|
||||
|
||||
# Install pipenv and compilation dependencies
|
||||
|
@ -17,8 +16,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends libpq-dev build
|
|||
COPY Pipfile .
|
||||
RUN PIPENV_VENV_IN_PROJECT=1 pipenv install --deploy --skip-lock --verbose
|
||||
|
||||
|
||||
|
||||
FROM base AS runtime
|
||||
|
||||
# Copy virtual env from python-deps stage
|
||||
|
@ -26,7 +23,6 @@ COPY --from=python-deps /.venv /.venv
|
|||
ENV PATH="/.venv/bin:$PATH"
|
||||
ENV PATH="$PATH:/usr/local/bin"
|
||||
|
||||
|
||||
RUN apt-get update && apt-get install libpq5 -y
|
||||
|
||||
# Create and switch to a new user
|
||||
|
@ -48,4 +44,4 @@ RUN chmod -R +x scripts
|
|||
USER appuser
|
||||
|
||||
# Run the application
|
||||
ENTRYPOINT ["./scripts/test-and-run.sh"]
|
||||
ENTRYPOINT ["./scripts/run.sh"]
|
|
@ -1,21 +1,41 @@
|
|||
# OAPEN Suggestion Service
|
||||
## Getting Started
|
||||
### Running the application
|
||||
Ensure that you have followed the setup instructions in the top level README, then run:
|
||||
# Suggestion Engine
|
||||
## Updating/migrating the database
|
||||
When you make database changes, or add new stopwords, you'll want to completely re-run the harvesting and suggestion creation for the database. Though this happens weekly by default, you have some more immediate options:
|
||||
|
||||
To erase & recreate the database _NOW_, you can run:
|
||||
```bash
|
||||
docker compose run oapen-engine clean now
|
||||
```
|
||||
docker-compose up --build
|
||||
> *WARNING*: You will lose ALL database data! Reruns are resource-intensive and lengthy, be sure before running this. This _could_ cause unexpected errors if the running service is active, in which case you will need to restart the container.
|
||||
|
||||
To erase & recreate the database _on the next run_, you can run:
|
||||
```bash
|
||||
docker compose run oapen-engine clean true
|
||||
```
|
||||
### Cleaning the database manually
|
||||
> *WARNING*: You will lose ALL database data! Reruns are resource-intensive and lengthy, be sure before running this. This is safer than the last command and should not cause any breakage, even if the database is being used by the service actively.
|
||||
|
||||
To cancel the operation above, so the database is _not_ erased on the next run, you can run:
|
||||
```bash
|
||||
docker compose run oapen-engine clean false
|
||||
```
|
||||
./scripts/clean.sh
|
||||
|
||||
### How it works
|
||||
Those last two operations work by creating/deleting a table called `migrate` in the `oapen_suggestions` schema in the database. When the table exists, the daemon checks for the existence of the table when starting up, and drops & recreates the schema, tables, and types if it exists. It then deletes the table. When the table does not exist, the database is left as-is. You can also manually create the `migrate` table via an SQL query in any database admin tool, and the database will be re-created on the next run.
|
||||
|
||||
## Running the engine alone
|
||||
Ensure that you have followed the [setup instructions](../README.md#installation-server), then run:
|
||||
```
|
||||
### Refreshing items + suggestions manually
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
## Refreshing items + suggestions manually
|
||||
```
|
||||
./scripts/refresh.sh
|
||||
```
|
||||
|
||||
## How to remove/filter out bad ngrams
|
||||
Members of EbookFoundation can create a pull request to edit the stopwords used to filter out bad trigrams:
|
||||
People with access to the repository can create a pull request to edit the stopwords used to filter out bad trigrams:
|
||||
```
|
||||
oapen-engine/src/model/stopwords_*.txt
|
||||
```
|
||||
This also can be done to remove a malformed trigram already in the database (during the next run)
|
||||
Changes in stopwords will not reflected until the next harvest, which occurs weekly by default.
|
|
@ -1 +0,0 @@
|
|||
python src/tasks/clean.py
|
|
@ -1,3 +1,3 @@
|
|||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
|
||||
python src/tasks/refresh_items.py
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Running tests..."
|
||||
python src/test/data/run_tests.py
|
||||
echo "Running app..."
|
||||
python src/tasks/daemon.py
|
||||
elif [ "$1" == "clean" ]; then
|
||||
if [ "$2" == "now" ] || [ "$2" == "true" ] || [ "$2" == "false" ]; then
|
||||
python src/tasks/clean.py $2
|
||||
else
|
||||
echo "Invalid arguments for clean. Valid options are 'now', 'true', or 'false'."
|
||||
echo "
|
||||
Usage: docker compose run oapen-engine clean [now/true/false]
|
||||
Options:
|
||||
now Clean the database now. Drops ALL DATA in the database!
|
||||
true Clean the database on the next run of the service. Drops ALL DATA in the database!
|
||||
false Do not clean the database on the next run. Leaves the database as it is."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Invalid command. Valid commands are 'clean', or you can leave it blank to run the daemon."
|
||||
exit 1
|
||||
fi
|
|
@ -1,9 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
echo "Running tests..." && \
|
||||
python src/test/data/run_tests.py && \
|
||||
echo "Running app" && \
|
||||
python src/tasks/daemon.py
|
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
|
||||
python src/test/data/run_tests.py
|
|
@ -68,7 +68,7 @@ class OapenDB:
|
|||
cursor.close()
|
||||
return args
|
||||
|
||||
def table_exists(self, table):
|
||||
def table_exists(self, table_name):
|
||||
cursor = self.connection.cursor()
|
||||
query = """
|
||||
SELECT EXISTS (
|
||||
|
@ -77,10 +77,29 @@ class OapenDB:
|
|||
"""
|
||||
|
||||
try:
|
||||
cursor.execute(query, (table,))
|
||||
cursor.execute(query, (table_name,))
|
||||
res = cursor.fetchone()[0]
|
||||
|
||||
return res is not None
|
||||
return bool(res)
|
||||
except (Exception, psycopg2.Error) as error:
|
||||
logger.error(error)
|
||||
return False
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
def type_exists(self, type_name):
|
||||
cursor = self.connection.cursor()
|
||||
query = """
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM pg_type WHERE typname = %s
|
||||
)
|
||||
"""
|
||||
|
||||
try:
|
||||
cursor.execute(query, (type_name,))
|
||||
res = cursor.fetchone()[0]
|
||||
|
||||
return bool(res)
|
||||
except (Exception, psycopg2.Error) as error:
|
||||
logger.error(error)
|
||||
return False
|
||||
|
|
|
@ -66,7 +66,7 @@ def create_schema(connection) -> None:
|
|||
|
||||
|
||||
def drop_schema(connection) -> None:
|
||||
logger.warn("WARNING: DROPPING DATABASE!")
|
||||
logger.warning("WARNING: DROPPING DATABASE!")
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
|
@ -74,6 +74,7 @@ def drop_schema(connection) -> None:
|
|||
DROP TABLE IF EXISTS oapen_suggestions.suggestions CASCADE;
|
||||
DROP TABLE IF EXISTS oapen_suggestions.ngrams CASCADE;
|
||||
DROP TABLE IF EXISTS oapen_suggestions.endpoints CASCADE;
|
||||
DROP TABLE IF EXISTS oapen_suggestions.migrate;
|
||||
DROP TYPE IF EXISTS oapen_suggestions.ngram CASCADE;
|
||||
"""
|
||||
)
|
||||
|
@ -123,12 +124,49 @@ def seed_endpoints(connection):
|
|||
endpoints = get_endpoints()
|
||||
db.add_urls(endpoints)
|
||||
|
||||
def mark_for_cleaning(connection):
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE SCHEMA IF NOT EXISTS oapen_suggestions;
|
||||
CREATE TABLE IF NOT EXISTS oapen_suggestions.migrate (migrate boolean);
|
||||
"""
|
||||
)
|
||||
cursor.close()
|
||||
|
||||
def mark_no_clean(connection):
|
||||
cursor = connection.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE SCHEMA IF NOT EXISTS oapen_suggestions;
|
||||
DROP TABLE IF EXISTS oapen_suggestions.migrate;
|
||||
"""
|
||||
)
|
||||
cursor.close()
|
||||
|
||||
def run():
|
||||
connection = get_connection()
|
||||
|
||||
drop_schema(connection)
|
||||
create_schema(connection)
|
||||
mark_no_clean(connection)
|
||||
seed_endpoints(connection)
|
||||
|
||||
connection.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 2 and sys.argv[1] in ["now", "true", "false"]:
|
||||
if sys.argv[1] == "now":
|
||||
run()
|
||||
elif sys.argv[1] == "true":
|
||||
connection = get_connection()
|
||||
mark_for_cleaning(connection)
|
||||
logger.warning("WARNING: The database will be ERASED on the next run.")
|
||||
connection.close()
|
||||
elif sys.argv[1] == "false":
|
||||
connection = get_connection()
|
||||
mark_no_clean(connection)
|
||||
logger.info("The database will not be cleaned on the next run.")
|
||||
connection.close()
|
||||
else:
|
||||
logger.error("Invalid argument supplied to clean.py. Valid options are 'now', 'true', or 'false'.")
|
|
@ -45,10 +45,11 @@ def main():
|
|||
|
||||
logger.info("Daemon up")
|
||||
|
||||
if int(os.environ["RUN_CLEAN"]) == 1 or (
|
||||
not db.table_exists("suggestions")
|
||||
or not db.table_exists("ngrams")
|
||||
or not db.table_exists("endpoints")
|
||||
if db.table_exists("migrate") or not (
|
||||
db.table_exists("suggestions")
|
||||
and db.table_exists("ngrams")
|
||||
and db.table_exists("endpoints")
|
||||
and db.type_exists("ngram")
|
||||
):
|
||||
run_clean()
|
||||
|
||||
|
|
Loading…
Reference in New Issue