104 lines
2.6 KiB
Python
104 lines
2.6 KiB
Python
from typing import Protocol, Any, Iterable, Generator
|
|
from django.conf import settings
|
|
from django.utils.module_loading import import_module
|
|
from django.db.models import QuerySet
|
|
|
|
from library.models import Work, Collection
|
|
|
|
instance = getattr(settings, "INDEXER", "library.indexer.whoosh")
|
|
|
|
|
|
class Indexer(Protocol):
|
|
def create_index(self) -> None: ...
|
|
|
|
def get_index(self) -> None: ...
|
|
|
|
def reset_index(self) -> None: ...
|
|
|
|
def search(
|
|
self,
|
|
query: str,
|
|
collections: list[int],
|
|
page: int = 1,
|
|
pagesize: int = 20,
|
|
) -> tuple[list[dict], dict[str, Any]]: ...
|
|
|
|
def index_docs(self, docs: Iterable[dict]): ...
|
|
|
|
|
|
class PartialResultSet(object):
|
|
"""
|
|
Implements enough of QuerySet to fool the Paginator!
|
|
"""
|
|
|
|
def __init__(self, works, meta):
|
|
self.works = works
|
|
self.meta = meta
|
|
|
|
self.start = (meta["page"] - 1) * meta["pagesize"]
|
|
|
|
def __getitem__(self, key):
|
|
if isinstance(key, slice):
|
|
if key.start != self.start:
|
|
raise KeyError(f"Expected {self.start}, got {key.start}")
|
|
|
|
return self.works
|
|
|
|
return self.works[key]
|
|
|
|
def count(self):
|
|
return self.meta["total"]
|
|
|
|
|
|
# make the given module available as indexer
|
|
indexer: Indexer = import_module(instance) # type: ignore
|
|
|
|
|
|
def work_to_doc(work: Work) -> dict[str, str]:
|
|
tags = ",".join(work.meta_info.filter(name="tag").values_list("value", flat=True)) # type: ignore
|
|
meta = ", ".join(work.meta_info.values_list("value", flat=True)) # type: ignore
|
|
|
|
description = f"""{work.name}
|
|
{work.composer}
|
|
{work.edition}
|
|
{work.notes}
|
|
{meta}
|
|
"""
|
|
|
|
return dict(
|
|
work=str(work.pk),
|
|
collection=str(work.collection_id), # type: ignore
|
|
name=work.name,
|
|
composer=work.composer,
|
|
edition=work.edition,
|
|
tag=tags.lower(),
|
|
text=description,
|
|
)
|
|
|
|
|
|
def doc_set(works: QuerySet) -> Generator:
|
|
for work in works:
|
|
yield work_to_doc(work)
|
|
|
|
|
|
def index_works(works: QuerySet):
|
|
indexer.index_docs(doc_set(works))
|
|
|
|
|
|
def model_search(
|
|
query: str, collections: list[int], page: int = 1, pagesize: int = 20
|
|
) -> PartialResultSet:
|
|
hits, meta = indexer.search(query.lower(), collections, page, pagesize)
|
|
meta["pagesize"] = pagesize
|
|
meta["page"] = page
|
|
|
|
works = [Work(**hit) for hit in hits]
|
|
|
|
collection_names = dict(Collection.objects.values_list("pk", "name"))
|
|
for work in works:
|
|
work.collection = Collection( # type: ignore
|
|
pk=work.collection_id, name=collection_names[work.collection_id]
|
|
)
|
|
|
|
return PartialResultSet(works, meta)
|