2026-05-12 11:10:21 +10:00

92 lines
2.2 KiB
Python

from whoosh.index import create_in, open_dir, EmptyIndexError, Index
from whoosh.analysis import StemmingAnalyzer, CharsetFilter
from whoosh.support.charset import accent_map
from whoosh.fields import Schema, TEXT, KEYWORD, NUMERIC
from whoosh.qparser import QueryParser
from whoosh.query import Term, NullQuery
from typing import Any
from django.conf import settings
import os.path
import shutil
stemming_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
schema = Schema(
work=NUMERIC(stored=True, unique=True),
collection=NUMERIC(stored=True),
name=TEXT(stored=True),
composer=TEXT(stored=True),
edition=TEXT(stored=True),
tag=KEYWORD(commas=True),
text=TEXT(analyzer=stemming_analyzer),
)
index_path = os.path.join(os.path.dirname(settings.BASE_DIR), "index")
def create_index() -> Index:
if not os.path.exists(index_path):
os.mkdir(index_path)
ix = create_in(index_path, schema)
return ix
def get_index() -> Index:
try:
return open_dir(index_path)
except EmptyIndexError:
return create_index()
def reset_index() -> Index:
shutil.rmtree(index_path)
return create_index()
def index_docs(works: list[dict]):
ix = get_index()
with ix.writer() as writer:
for work in works:
writer.update_document(**work)
def search(
query: str,
collections: list[int] = [],
page: int = 1,
pagesize: int = 20,
) -> tuple[list[dict], dict[str, Any]]:
meta = {}
qp = QueryParser("text", schema=schema)
q = qp.parse(query.lower())
meta["query"] = str(q)
terms = NullQuery
for c in collections:
terms = terms | Term("collection", c)
q = q & terms
hits = []
ix = get_index()
with ix.searcher() as searcher:
results = searcher.search_page(q, page, pagesize)
for result in results:
hits.append(
dict(
pk=result["work"],
name=result["name"],
composer=result["composer"],
edition=result["edition"],
collection_id=int(result["collection"]),
)
)
meta["total"] = len(results)
return hits, meta