104 lines
2.6 KiB
Python

from typing import Protocol, Any, Iterable, Generator
from django.conf import settings
from django.utils.module_loading import import_module
from django.db.models import QuerySet
from library.models import Work, Collection
instance = getattr(settings, "INDEXER", "library.indexer.whoosh")
class Indexer(Protocol):
def create_index(self) -> None: ...
def get_index(self) -> None: ...
def reset_index(self) -> None: ...
def search(
self,
query: str,
collections: list[int],
page: int = 1,
pagesize: int = 20,
) -> tuple[list[dict], dict[str, Any]]: ...
def index_docs(self, docs: Iterable[dict]): ...
class PartialResultSet(object):
"""
Implements enough of QuerySet to fool the Paginator!
"""
def __init__(self, works, meta):
self.works = works
self.meta = meta
self.start = (meta["page"] - 1) * meta["pagesize"]
def __getitem__(self, key):
if isinstance(key, slice):
if key.start != self.start:
raise KeyError(f"Expected {self.start}, got {key.start}")
return self.works
return self.works[key]
def count(self):
return self.meta["total"]
# make the given module available as indexer
indexer: Indexer = import_module(instance) # type: ignore
def work_to_doc(work: Work) -> dict[str, str]:
tags = ",".join(work.meta_info.filter(name="tag").values_list("value", flat=True)) # type: ignore
meta = ", ".join(work.meta_info.values_list("value", flat=True)) # type: ignore
description = f"""{work.name}
{work.composer}
{work.edition}
{work.notes}
{meta}
"""
return dict(
work=str(work.pk),
collection=str(work.collection_id), # type: ignore
name=work.name,
composer=work.composer,
edition=work.edition,
tag=tags.lower(),
text=description,
)
def doc_set(works: QuerySet) -> Generator:
for work in works:
yield work_to_doc(work)
def index_works(works: QuerySet):
indexer.index_docs(doc_set(works))
def model_search(
query: str, collections: list[int], page: int = 1, pagesize: int = 20
) -> PartialResultSet:
hits, meta = indexer.search(query.lower(), collections, page, pagesize)
meta["pagesize"] = pagesize
meta["page"] = page
works = [Work(**hit) for hit in hits]
collection_names = dict(Collection.objects.values_list("pk", "name"))
for work in works:
work.collection = Collection( # type: ignore
pk=work.collection_id, name=collection_names[work.collection_id]
)
return PartialResultSet(works, meta)