94 lines
2.3 KiB
Python

from whoosh.index import create_in, open_dir, EmptyIndexError, Index
from whoosh.analysis import StemmingAnalyzer, CharsetFilter
from whoosh.support.charset import accent_map
from whoosh.fields import Schema, ID, TEXT, KEYWORD, STORED, NUMERIC
from whoosh.qparser import QueryParser
from whoosh.query import Term, NullQuery
from library.models import Work
from typing import Any
from django.conf import settings
from django.db.models import QuerySet
import os.path
import shutil
stemming_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
schema = Schema(
work=NUMERIC(stored=True, unique=True),
collection=NUMERIC(stored=True),
name=TEXT(stored=True),
composer=TEXT(stored=True),
edition=TEXT(stored=True),
tag=KEYWORD(commas=True),
text=TEXT(analyzer=stemming_analyzer),
)
index_path = os.path.join(os.path.dirname(settings.BASE_DIR), "index")
def create_index() -> Index:
if not os.path.exists(index_path):
os.mkdir(index_path)
ix = create_in(index_path, schema)
return ix
def get_index() -> Index:
try:
return open_dir(index_path)
except EmptyIndexError:
return create_index()
def reset_index() -> Index:
shutil.rmtree(index_path)
return create_index()
def index_docs(works: list[dict]):
ix = get_index()
with ix.writer() as writer:
for work in works:
writer.update_document(**work)
def search(
query: str,
collections: list[int] = [],
page: int = 1,
pagesize: int = 20,
) -> tuple[list[dict], dict[str, Any]]:
meta = {}
qp = QueryParser("text", schema=schema)
q = qp.parse(query.lower())
meta["query"] = str(q)
terms = NullQuery
for c in collections:
terms = terms | Term("collection", c)
q = q & terms
hits = []
ix = get_index()
with ix.searcher() as searcher:
results = searcher.search_page(q, page, pagesize)
for result in results:
hits.append(
dict(
pk=result["work"],
name=result["name"],
composer=result["composer"],
edition=result["edition"],
collection_id=int(result["collection"]),
)
)
meta["total"] = len(results)
return hits, meta