Added indexer tests

This commit is contained in:
Tris Forster 2026-05-23 11:38:40 +10:00
parent c1f0e48f80
commit 947626c2af
5 changed files with 276 additions and 216 deletions

View File

@ -1,7 +1,7 @@
PYTHON=env/bin/python
DROPZONE=5.7.0
test: check
test:
poetry run coverage run --include "app/*" --omit "*/migrations/*" app/manage.py test app
poetry run coverage html
poetry run coverage report
@ -10,6 +10,8 @@ check:
poetry run ruff check app
poetry run ruff format --check app || true
pre-commit: check test
build:
poetry build

View File

@ -3,7 +3,7 @@ from whoosh.analysis import StemmingAnalyzer, CharsetFilter
from whoosh.support.charset import accent_map
from whoosh.fields import Schema, TEXT, KEYWORD, NUMERIC
from whoosh.qparser import QueryParser
from whoosh.query import Term, NullQuery, Prefix
from whoosh.query import Term, NullQuery, FuzzyTerm
from typing import Any
@ -69,7 +69,6 @@ def search(
limit = NullQuery
for c in collections:
limit |= Term("collection", c)
print(limit)
hits = []
ix = get_index()
@ -78,7 +77,7 @@ def search(
# if no results, do a prefix search
if results.results.is_empty():
qp.termclass = Prefix
qp.termclass = FuzzyTerm
q = qp.parse(query)
results = searcher.search_page(q & limit, page, pagesize)

View File

@ -1,212 +0,0 @@
from interface.tests import AccessTestCase
from byostorage.user import UserStorage
from library import models
import tempfile
import json
class LibraryTestCase(AccessTestCase):
USERS = (
{
"username": "admin",
"password": "secret",
"is_superuser": True,
"is_staff": True,
},
{"username": "homer", "password": "maggie"},
)
ENSEMBLES = (
{"name": "The Be Sharps", "slug": "be-sharps", "admins": ["homer"]},
{"name": "Lisa & the Bleeding Gums", "slug": "bleeding-gums"},
{"name": "Party Posse"},
)
PROJECTS = (
{"name": "Baker St", "ensemble": "bleeding-gums", "when": -12},
{"name": "Navy Recruitment Day", "ensemble": "party-posse", "when": 6},
{"name": "Barbershop Contest", "ensemble": "be-sharps", "when": 28},
{"name": "Open Mic Night", "ensemble": "bleeding-gums", "when": 1},
)
COLLECTIONS = (
{"name": "Springfield Elementary Library", "prefix": "sel"},
{"name": "Neds Library", "prefix": "ned", "admins": ["homer"]},
)
WORKS = (
{
"name": "Baby on Board",
"collection": "ned",
"docs": [{"upload": "local:baby_on_board.pdf"}],
},
{"name": "Star Spangled Banner", "collection": "sel"},
)
PROTECTED_URLS = (
"/collections/1",
"/collections/1/add",
"/collections/2/works/1",
"/collections/2/works/1/edit",
"/collections/2/works/1/partset",
"/collections/2/works/1/add_to_project",
"/collections/2/works/1/upload",
"/collections/2/docs/1/annotate",
# Need to add storage before we can test these
"/api/collections/2",
"/api/collections/2/works/1",
"/admin/library/collection/",
"/admin/library/document/",
"/admin/library/ensembleaccess/",
"/admin/library/orchestration/",
"/admin/library/projectitem/",
"/admin/library/work/",
)
@classmethod
def setUpTestData(cls):
super().setUpTestData()
cls.temp_dir = tempfile.TemporaryDirectory()
cls.storage = UserStorage.objects.create(
name="local",
storage="django.core.files.storage.FileSystemStorage",
settings_data=json.dumps(
{
"location": cls.temp_dir.name,
"base_url": "file://" + cls.temp_dir.name,
}
),
)
cls.collections = {}
for details in cls.COLLECTIONS:
admins = details.pop("admins", [])
obj = models.Collection.objects.create(storage=cls.storage, **details)
for admin in admins:
obj.administrators.add(cls.users[admin])
cls.collections[details["prefix"]] = obj
cls.works = {}
for details in cls.WORKS:
collection = cls.collections[details.pop("collection")]
# details.setdefault('docs', [])
# details.setdefault('meta_info', [])
# s = WorkSerializer(data=details)
# assert s.is_valid(), s.errors
# s.save(collection_id=collection.pk)
docs = details.pop("docs", [])
obj = models.Work.objects.create(collection=collection, **details)
for doc in docs:
obj.docs.create(**doc)
cls.works[details["name"]] = obj
def setUp(self):
pass
@classmethod
def tearDownClass(cls):
cls.temp_dir.cleanup()
def test_integration(self):
pass
def test_superuser_access(self):
self.login("admin", "secret")
self.assertAccess(
{
"/collections": True,
"/collections/1": True,
"/collections/2/works/1": True,
}
)
def test_administrator_access(self):
self.login("homer", "maggie")
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": True,
"/collections/2/works/1": True,
}
)
def test_link_access(self):
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": False,
"/collections/2/works/1": False,
}
)
self.authorize(models.Collection, pk=2)
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": True,
"/collections/2/works/1": True,
}
)
def test_anon_access(self):
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": False,
"/collections/2/works/1": False,
}
)
def test_export_and_import(self):
self.login("admin", "secret")
data = self.client.get(
"/api/collections/1/works/2", HTTP_ACCEPT="application/json"
).json()
response = self.client.post(
"/api/collections/2/import", data, "application/json"
)
self.assertEqual(response.status_code, 201)
def test_movement_from_large_work(self):
"""
Will be common to store a work which has several movements, but the project is only going to play one.
This also should give us the ability to store an anthology as one Work have Project reference 'no:23'
"""
work = self.collections["sel"].works.create(
name="Some Quartet", composer="Beethoven"
)
for g in ("vl-1", "vl-2", "vla", "vc"):
doc = work.docs.create(
upload=f"sel/beethoven/some_quartet/some_quartet_{g}.pdf"
)
doc.sections.create(tag="mvmt-1", start=1, end=3)
doc.sections.create(tag="mvmt-2", start=4, end=8)
doc.sections.create(tag="mvmt-3", start=9, end=12)
doc.sections.create(tag=g)
# no tags - get nothing (should it be everything?)
self.assertEqual(work.list_sections(), [])
# single tag - should get just that range
self.assertEqual(
work.list_sections("vl-1"),
[("sel/beethoven/some_quartet/some_quartet_vl-1.pdf", None, None)],
)
# single tag - returns all documents with that range
result = work.list_sections("mvmt-2")
self.assertEqual(len(result), 4)
# multiple tags - returns the overlapping portion of all documents that have all tags
self.assertEqual(
work.list_sections("vl-1", "mvmt-2"),
[("sel/beethoven/some_quartet/some_quartet_vl-1.pdf", 4, 8)],
)
self.assertEqual(work.list_sections("vl-1", "vl-2"), [])

View File

@ -0,0 +1,59 @@
from django.test import TestCase
from tempfile import TemporaryDirectory
from library.models import Collection, Work
from library.indexer import index_works, model_search
from library.indexer import whoosh
JAZZ_STANDARDS = (
["But Not For Me", "Gershwin, George & Ira"],
["Autumn Leaves", "Kosma, Joseph"],
["Best of Gershwin", "Compilation"],
)
CLASSICAL_WORKS = (
["Symphony No.5", "Beethoven, L"],
["March from Aieda", "Verdi"],
)
class WhooshIndexTestCase(TestCase):
@classmethod
def setUpTestData(cls):
jazz = Collection.objects.create(name="Jazz Standards", prefix="jazz")
classical = Collection.objects.create(
name="Classical Music", prefix="classical"
)
for name, composer in JAZZ_STANDARDS:
jazz.works.create(name=name, composer=composer)
for name, composer in CLASSICAL_WORKS:
classical.works.create(name=name, composer=composer)
def test_setup(self):
self.assertEqual(Collection.objects.all().count(), 2)
self.assertEqual(Work.objects.all().count(), 5)
def test_indexer(self):
expected = [
("beethoven", [], ["Symph"]),
("Ira", [], ["But N"]),
("bethoven", [], ["Symph"]),
("George", [1], ["But N"]),
("George", [2], []),
("George", [1, 2], ["But N"]),
("But not", [], ["But N"]),
("Gershwin", [], ["Best ", "But N"]),
("composer:Gershwin", [], ["But N"]),
]
with TemporaryDirectory() as d:
whoosh.index_path = d
index_works(Work.objects.all())
for query, collections, result in expected:
self.assertListEqual(
[x.name[:5] for x in model_search(query, collections)], result
)

View File

@ -0,0 +1,212 @@
from interface.tests import AccessTestCase
from byostorage.user import UserStorage
from library import models
import tempfile
import json
class LibraryTestCase(AccessTestCase):
USERS = (
{
"username": "admin",
"password": "secret",
"is_superuser": True,
"is_staff": True,
},
{"username": "homer", "password": "maggie"},
)
ENSEMBLES = (
{"name": "The Be Sharps", "slug": "be-sharps", "admins": ["homer"]},
{"name": "Lisa & the Bleeding Gums", "slug": "bleeding-gums"},
{"name": "Party Posse"},
)
PROJECTS = (
{"name": "Baker St", "ensemble": "bleeding-gums", "when": -12},
{"name": "Navy Recruitment Day", "ensemble": "party-posse", "when": 6},
{"name": "Barbershop Contest", "ensemble": "be-sharps", "when": 28},
{"name": "Open Mic Night", "ensemble": "bleeding-gums", "when": 1},
)
COLLECTIONS = (
{"name": "Springfield Elementary Library", "prefix": "sel"},
{"name": "Neds Library", "prefix": "ned", "admins": ["homer"]},
)
WORKS = (
{
"name": "Baby on Board",
"collection": "ned",
"docs": [{"upload": "local:baby_on_board.pdf"}],
},
{"name": "Star Spangled Banner", "collection": "sel"},
)
PROTECTED_URLS = (
"/collections/1",
"/collections/1/add",
"/collections/2/works/1",
"/collections/2/works/1/edit",
"/collections/2/works/1/partset",
"/collections/2/works/1/add_to_project",
"/collections/2/works/1/upload",
"/collections/2/docs/1/annotate",
# Need to add storage before we can test these
"/api/collections/2",
"/api/collections/2/works/1",
"/admin/library/collection/",
"/admin/library/document/",
"/admin/library/ensembleaccess/",
"/admin/library/orchestration/",
"/admin/library/projectitem/",
"/admin/library/work/",
)
@classmethod
def setUpTestData(cls):
super().setUpTestData()
cls.temp_dir = tempfile.TemporaryDirectory()
cls.storage = UserStorage.objects.create(
name="local",
storage="django.core.files.storage.FileSystemStorage",
settings_data=json.dumps(
{
"location": cls.temp_dir.name,
"base_url": "file://" + cls.temp_dir.name,
}
),
)
cls.collections = {}
for details in cls.COLLECTIONS:
admins = details.pop("admins", [])
obj = models.Collection.objects.create(storage=cls.storage, **details)
for admin in admins:
obj.administrators.add(cls.users[admin])
cls.collections[details["prefix"]] = obj
cls.works = {}
for details in cls.WORKS:
collection = cls.collections[details.pop("collection")]
# details.setdefault('docs', [])
# details.setdefault('meta_info', [])
# s = WorkSerializer(data=details)
# assert s.is_valid(), s.errors
# s.save(collection_id=collection.pk)
docs = details.pop("docs", [])
obj = models.Work.objects.create(collection=collection, **details)
for doc in docs:
obj.docs.create(**doc)
cls.works[details["name"]] = obj
def setUp(self):
pass
@classmethod
def tearDownClass(cls):
cls.temp_dir.cleanup()
def test_integration(self):
pass
def test_superuser_access(self):
self.login("admin", "secret")
self.assertAccess(
{
"/collections": True,
"/collections/1": True,
"/collections/2/works/1": True,
}
)
def test_administrator_access(self):
self.login("homer", "maggie")
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": True,
"/collections/2/works/1": True,
}
)
def test_link_access(self):
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": False,
"/collections/2/works/1": False,
}
)
self.authorize(models.Collection, pk=2)
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": True,
"/collections/2/works/1": True,
}
)
def test_anon_access(self):
self.assertAccess(
{
"/collections": True,
"/collections/1": False,
"/collections/2": False,
"/collections/2/works/1": False,
}
)
def test_export_and_import(self):
self.login("admin", "secret")
data = self.client.get(
"/api/collections/1/works/2", HTTP_ACCEPT="application/json"
).json()
response = self.client.post(
"/api/collections/2/import", data, "application/json"
)
self.assertEqual(response.status_code, 201)
def test_movement_from_large_work(self):
"""
Will be common to store a work which has several movements, but the project is only going to play one.
This also should give us the ability to store an anthology as one Work have Project reference 'no:23'
"""
work = self.collections["sel"].works.create(
name="Some Quartet", composer="Beethoven"
)
for g in ("vl-1", "vl-2", "vla", "vc"):
doc = work.docs.create(
upload=f"sel/beethoven/some_quartet/some_quartet_{g}.pdf"
)
doc.sections.create(tag="mvmt-1", start=1, end=3)
doc.sections.create(tag="mvmt-2", start=4, end=8)
doc.sections.create(tag="mvmt-3", start=9, end=12)
doc.sections.create(tag=g)
# no tags - get nothing (should it be everything?)
self.assertEqual(work.list_sections(), [])
# single tag - should get just that range
self.assertEqual(
work.list_sections("vl-1"),
[("sel/beethoven/some_quartet/some_quartet_vl-1.pdf", None, None)],
)
# single tag - returns all documents with that range
result = work.list_sections("mvmt-2")
self.assertEqual(len(result), 4)
# multiple tags - returns the overlapping portion of all documents that have all tags
self.assertEqual(
work.list_sections("vl-1", "mvmt-2"),
[("sel/beethoven/some_quartet/some_quartet_vl-1.pdf", 4, 8)],
)
self.assertEqual(work.list_sections("vl-1", "vl-2"), [])