from whoosh.index import create_in, open_dir, EmptyIndexError, Index from whoosh.analysis import StemmingAnalyzer, CharsetFilter from whoosh.support.charset import accent_map from whoosh.fields import Schema, TEXT, KEYWORD, NUMERIC from whoosh.qparser import QueryParser from whoosh.query import Term, NullQuery, Prefix from typing import Any from django.conf import settings import os.path import shutil stemming_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map) schema = Schema( work=NUMERIC(stored=True, unique=True), collection=NUMERIC(stored=True), name=TEXT(stored=True), composer=TEXT(stored=True), edition=TEXT(stored=True), tag=KEYWORD(commas=True), text=TEXT(analyzer=stemming_analyzer), ) index_path = settings.WHOOSH_INDEX def create_index() -> Index: if not os.path.exists(index_path): os.mkdir(index_path) ix = create_in(index_path, schema) return ix def get_index() -> Index: try: return open_dir(index_path) except EmptyIndexError: return create_index() def reset_index() -> Index: shutil.rmtree(index_path) return create_index() def index_docs(works: list[dict]): ix = get_index() with ix.writer() as writer: for work in works: writer.update_document(**work) def search( query: str, collections: list[int] = [], page: int = 1, pagesize: int = 20, ) -> tuple[list[dict], dict[str, Any]]: meta = {} query = query.lower() qp = QueryParser("text", schema=schema) q = qp.parse(query) limit = NullQuery for c in collections: limit |= Term("collection", c) print(limit) hits = [] ix = get_index() with ix.searcher() as searcher: results = searcher.search_page(q & limit, page, pagesize) # if no results, do a prefix search if results.results.is_empty(): qp.termclass = Prefix q = qp.parse(query) results = searcher.search_page(q & limit, page, pagesize) for result in results: hits.append( dict( pk=result["work"], name=result["name"], composer=result["composer"], edition=result["edition"], collection_id=int(result["collection"]), ) ) meta["query"] = str(q & limit) meta["total"] = len(results) return hits, meta