Basic testing (#45)

* finished changes to stopwords and langauges * final changes to stopwords * basic testing * add tests * Remove formatter for now * fix merge * cd * touch __init__ * Relative path issue \? * run tests before app * Move tests to inside docker * exit when any command fails --------- Co-authored-by: Max Zaremba <max.zaremba@gmail.com>
2023-03-22 14:52:38 -04:00 · 2023-03-22 14:52:38 -04:00 · 376545450d
parent 884872cf60
commit 376545450d
21 changed files with 224 additions and 59 deletions
--- a/.github/workflows/engine.yml
+++ b/.github/workflows/engine.yml
@ -1,27 +0,0 @@
 name: OAPEN Engine
 on: [push]
 jobs:
  build:
    runs-on: ubuntu-latest
    env:
      working-directory: ./oapen-engine
    steps:
      - uses: actions/checkout@v3
      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.10'
      - name: Install dependencies with pipenv
        working-directory: ${{env.working-directory}}
        run: |
          pip install pipenv
          pipenv install --deploy --dev
          pipenv run isort --profile black src/
          pipenv run black --check src/ --exclude="lib/*"
          pipenv run flake8 src/ --ignore="lib/*, W, E203, E266, E501, W503, F403, F401"
--- a/.github/workflows/lint-web.yml
+++ b/.github/workflows/lint-web.yml
@ -1,4 +1,4 @@
-name: Web lint checker
+name: Build and test web
 on: push
 jobs:
  test:
--- a/.github/workflows/test-containers.yml
+++ b/.github/workflows/test-containers.yml
@ -1,4 +1,4 @@
-name: Test Containers
+name: Build and test containers
 on: push
--- a/oapen-engine/Dockerfile
+++ b/oapen-engine/Dockerfile
@ -48,4 +48,4 @@ RUN chmod -R +x scripts
 USER appuser
 # Run the application
-ENTRYPOINT ["./scripts/run.sh"]
+ENTRYPOINT ["./scripts/test-and-run.sh"]
--- a/oapen-engine/Makefile
+++ b/oapen-engine/Makefile
@ -0,0 +1,42 @@
 PYTHONEX ?= "python"
 PYTHONPATH = "$(CURDIR)/src"
 PYTHON = PYTHONPATH="$(PYTHONPATH)" $(PYTHONEX)
 setup-env:
 ifeq ($(OS),Windows_NT)
 	py -m pip install --upgrade pip
 else
 	$(PYTHON) -m pip install --upgrade pip
 endif
 	$(PYTHON) -m pip install pipenv
 	$(PYTHON) -m pipenv install --skip-lock
 	$(PYTHON) -m pipenv shell
 seed_db:
 	cd src && $(PYTHON) -m pipenv run python tasks/seed.py
 clean_db:
 	cd src && $(PYTHON) -m pipenv run python tasks/clean.py
 clean_and_seed:
 	$(MAKE) clean_db 
 	$(MAKE) seed_db
 generate_suggestions:
 	cd src && $(PYTHON) -m pipenv run python tasks/generate_suggestions.py
 run:
 	$(MAKE) clean_and_seed
 	$(MAKE) generate_suggestions 
 run-tests:
 	cd src && $(PYTHON) -m pipenv run pytest
 refresh-items:
 	cd src && $(PYTHON) -m pipenv run python tasks/refresh_items.py
 run-daemon:
 	cd src && $(PYTHON) -m pipenv run python tasks/daemon.py
 run-unit-tests:
 	cd src && $(PYTHON) -m pipenv run python test/data/run_tests.py
--- a/oapen-engine/Pipfile
+++ b/oapen-engine/Pipfile
@ -10,6 +10,9 @@ psycopg2 = "2.9.3"
 pandas = "*"
 scikit-learn = "*"
 lxml = "*"
 charset_normalizer = "*"
 idna = "*"
 certifi = "*"
 [dev-packages]
 pytest = "*"
--- a/oapen-engine/scripts/run.sh
+++ b/oapen-engine/scripts/run.sh
@ -1,3 +0,0 @@
 #!/bin/sh
 python src/tasks/daemon.py
--- a/oapen-engine/scripts/test-and-run.sh
+++ b/oapen-engine/scripts/test-and-run.sh
@ -0,0 +1,9 @@
 #!/bin/sh
 # exit when any command fails
 set -e
 echo "Running tests..." && \
 python src/test/data/run_tests.py && \
 echo "Running app" && \
 python src/tasks/daemon.py
--- a/oapen-engine/src/data/init.py
+++ b/oapen-engine/src/data/init.py
--- a/oapen-engine/src/model/init.py
+++ b/oapen-engine/src/model/init.py
--- a/oapen-engine/src/model/ngrams.py
+++ b/oapen-engine/src/model/ngrams.py
@ -1,37 +1,19 @@
 import string
 from typing import List
-import pandas as pd  # pylint: disable=import-error
+import nltk 
-from nltk import word_tokenize  # pylint: disable=import-error
+from nltk import word_tokenize 
-from nltk.corpus import stopwords  # pylint: disable=import-error
+from .stopwords_processor import STOPWORDS
 import pandas as pd  
-from .oapen_types import (  # pylint: disable=relative-beyond-top-level
+nltk.download('punkt')
 from .oapen_types import (
    NgramDict,
    NgramRowWithoutDate,
    OapenItem,
 )
 stopword_paths = [
    "src/model/stopwords_broken.txt",
    "src/model/stopwords_dutch.txt",
    "src/model/stopwords_filter.txt",
    "src/model/stopwords_publisher.txt",
 ]
 stopwords_list = []
 for p in stopword_paths:
    with open(p, "r") as f:
        stopwords_list += [line.rstrip() for line in f]
 STOPWORDS = (
    stopwords.words("english")
    + stopwords.words("german")
    + stopwords.words("dutch")
    + stopwords_list
 )
 def process_text(text):
    l_text = text.lower()
    p_text = "".join([c for c in l_text if c not in string.punctuation])
--- a/oapen-engine/src/model/stopwords/broken.txt
+++ b/oapen-engine/src/model/stopwords/broken.txt
--- a/oapen-engine/src/model/stopwords/dutch.txt
+++ b/oapen-engine/src/model/stopwords/dutch.txt
--- a/oapen-engine/src/model/stopwords/filter.txt
+++ b/oapen-engine/src/model/stopwords/filter.txt
--- a/oapen-engine/src/model/stopwords/publisher.txt
+++ b/oapen-engine/src/model/stopwords/publisher.txt
--- a/oapen-engine/src/model/stopwords/stopwords_full_list.py
+++ b/oapen-engine/src/model/stopwords/stopwords_full_list.py
--- a/oapen-engine/src/model/stopwords_processor.py
+++ b/oapen-engine/src/model/stopwords_processor.py
@ -0,0 +1,44 @@
 import nltk
 from nltk.corpus import stopwords
 from functools import reduce
 import os
 # This is run as a precaution in case of the error "NLTK stop words not found",
 # which makes sure to download the stop words after installing nltk
 nltk.download("stopwords")
 # add additional custom stopwords to ./custom_lists/ folder and update the reference here
 # print working directory
 print("Working directory: " + os.getcwd())
 current_dir = os.path.realpath(os.path.dirname(__file__))
 print("Local script directory: " + current_dir)
 custom_lists_folder = current_dir + "/stopwords/"
 custom_stopwords_in_use = [
    "broken",
    "dutch",
    "filter",
    "publisher",
 ]
 # For reference on available languages, please reference https://pypi.org/project/stop-words/
 enabled_languages = [
    "english",
    "german",
    "dutch"
 ]
 # the combined stopwords of all enabled langauges
 nltk_stopwords = []
 for language in enabled_languages:
    nltk_stopwords += stopwords.words(language)
 # get the custom lists
 custom_stopwords = []
 for custom_list in custom_stopwords_in_use:
    with open(custom_lists_folder + custom_list + ".txt", "r") as file:  # specify folder name
        custom_stopwords += [line.rstrip() for line in file]
 # add languages and custom stopwords for final stopwords var
 STOPWORDS = (nltk_stopwords + custom_stopwords)
--- a/oapen-engine/src/test/data/run_tests.py
+++ b/oapen-engine/src/test/data/run_tests.py
@ -0,0 +1,41 @@
 import test_oapen
 import test_stopwords
 import test_ngrams
 def run_test(run_msg, func):
    print(run_msg, end = " ")
    func()
    print("OK")  # will throw on fail
 def main():
    print("Testing connection to OAPEN.")
    try:
        run_test("Attempting to get item [Embodying Contagion]:", test_oapen.test_get_item)
        run_test("Attempting to get null item:", test_oapen.test_get_item_404)
        run_test("Attempting to get collection limit by label [Knowledge Unlatched (KU)]:",
            test_oapen.test_get_collection_limit)
        run_test("Attempting to get null collection:", test_oapen.test_get_collection_404)
    except Exception as e:
        print("\nFailed:")
        print(e)
    print("\nTesting stopwords generation.")
    try:
        run_test("Testing stopwords correctly generated:", 
            test_stopwords.test_stopwords_contains_all)
    except Exception as e:
        print("Failed:")
        print(e)
    print("\nTesting ngrams functionality.")
    try:
        run_test("Testing process_text:", test_ngrams.test_process_text)
        run_test("Testing ngram generation:", test_ngrams.test_generate_ngram)
        run_test("Testing similarity score:", test_ngrams.test_similarity_score)
    except Exception as e:
        print("Failed:")
        print(e)
 if __name__ == "__main__":
    main()
--- a/oapen-engine/src/test/data/test_ngrams.py
+++ b/oapen-engine/src/test/data/test_ngrams.py
@ -0,0 +1,51 @@
 import model.ngrams as ngrams
 test_text1 = "Foxes are cunning animals. There was a quick, red fox known to avoid crossing roads during the day, doing so only at night."
 test_text2 = "The quick red fox jumped over the lazy brown dog. It had a fantastic time doing so, as it felt finally free. The fox had been in the zoo for far too long, held in captivity."
 processed_text1 = ['foxes', 'cunning', 'animals', 'quick', 'red', 'fox', 'known', 'avoid', 'crossing', 'roads', 'day', 'night']
 processed_text2 = ['quick', 'red', 'fox', 'jumped', 'lazy', 'brown', 'dog', 'fantastic', 'time', 'felt', 'finally', 'free', 'fox', 'zoo', 'far', 'long', 'held', 'captivity']
 ngrams1 = {
    'foxes cunning animals': 1, 
    'cunning animals quick': 1, 
    'animals quick red': 1, 
    'quick red fox': 1, 
    'red fox known': 1, 
    'fox known avoid': 1, 
    'known avoid crossing': 1, 
    'avoid crossing roads': 1, 
    'crossing roads day': 1, 
    'roads day night': 1
 }
 ngrams2 = {
    'quick red fox': 1, 
    'red fox jumped': 1, 
    'fox jumped lazy': 1, 
    'jumped lazy brown': 1, 
    'lazy brown dog': 1, 
    'brown dog fantastic': 1, 
    'dog fantastic time': 1, 
    'fantastic time felt': 1, 
    'time felt finally': 1, 
    'felt finally free': 1, 
    'finally free fox': 1, 
    'free fox zoo': 1, 
    'fox zoo far': 1, 
    'zoo far long': 1, 
    'far long held': 1, 
    'long held captivity': 1
 }
 def test_process_text():
    assert(ngrams.process_text(test_text1) == processed_text1)
    assert(ngrams.process_text(test_text2) == processed_text2)
 def test_generate_ngram():
    assert(ngrams.generate_ngram(processed_text1) == ngrams1)
    assert(ngrams.generate_ngram(processed_text2) == ngrams2)
 def test_similarity_score():
    assert(ngrams.get_similarity_score(ngrams1, ngrams2, n=5, as_percent=False) == 1)
    assert(ngrams.get_similarity_score(ngrams1, ngrams2, n=5, as_percent=True) == 0.2)
--- a/oapen-engine/src/test/data/test_oapen.py
+++ b/oapen-engine/src/test/data/test_oapen.py
@ -1,6 +1,6 @@
 from typing import List
-import src.data.oapen as OapenAPI
+import data.oapen as OapenAPI
 from model.oapen_types import OapenItem
--- a/oapen-engine/src/test/data/test_stopwords.py
+++ b/oapen-engine/src/test/data/test_stopwords.py
@ -0,0 +1,23 @@
 from model.stopwords_processor import STOPWORDS
 import model.stopwords.stopwords_full_list as stopwords_full_list
 # currently contains stopwords_filter, stopwords_publisher, stopwords_broken, stopwords_dutch_extra
 # tests all at once
 def test_stopwords_contains_all():
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_filter))
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_publisher))
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_broken))
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_dutch_extra))
 # individual tests provided if needed
 def test_stopwords_contains_stopwords_filter():
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_filter))
 def test_stopwords_contains_stopwords_publisher():
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_publisher))
 def test_stopwords_contains_stopwords_broken():
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_broken))
 def test_stopwords_contains_stopwords_dutch_extra():
    assert(all(x in STOPWORDS for x in stopwords_full_list.stopwords_dutch_extra))
`@ -1,4 +1,4 @@`
	`name: Test Containers`	`name: Build and test containers`

	`on: push`	`on: push`