98 lines
2.4 KiB
Python
98 lines
2.4 KiB
Python
from whoosh.index import create_in, open_dir, EmptyIndexError, Index
|
|
from whoosh.analysis import StemmingAnalyzer, CharsetFilter
|
|
from whoosh.support.charset import accent_map
|
|
from whoosh.fields import Schema, TEXT, KEYWORD, NUMERIC
|
|
from whoosh.qparser import QueryParser
|
|
from whoosh.query import Term, NullQuery, FuzzyTerm
|
|
|
|
|
|
from typing import Any
|
|
from django.conf import settings
|
|
import os.path
|
|
import shutil
|
|
|
|
stemming_analyzer = StemmingAnalyzer() | CharsetFilter(accent_map)
|
|
|
|
schema = Schema(
|
|
work=NUMERIC(stored=True, unique=True),
|
|
collection=NUMERIC(stored=True),
|
|
name=TEXT(stored=True),
|
|
composer=TEXT(stored=True),
|
|
edition=TEXT(stored=True),
|
|
tag=KEYWORD(commas=True),
|
|
text=TEXT(analyzer=stemming_analyzer),
|
|
)
|
|
|
|
|
|
index_path = settings.WHOOSH_INDEX
|
|
|
|
|
|
def create_index() -> Index:
|
|
if not os.path.exists(index_path):
|
|
os.mkdir(index_path)
|
|
|
|
ix = create_in(index_path, schema)
|
|
return ix
|
|
|
|
|
|
def get_index() -> Index:
|
|
try:
|
|
return open_dir(index_path)
|
|
except EmptyIndexError:
|
|
return create_index()
|
|
|
|
|
|
def reset_index() -> Index:
|
|
shutil.rmtree(index_path)
|
|
return create_index()
|
|
|
|
|
|
def index_docs(works: list[dict]):
|
|
ix = get_index()
|
|
with ix.writer() as writer:
|
|
for work in works:
|
|
writer.update_document(**work)
|
|
|
|
|
|
def search(
|
|
query: str,
|
|
collections: list[int] = [],
|
|
page: int = 1,
|
|
pagesize: int = 20,
|
|
) -> tuple[list[dict], dict[str, Any]]:
|
|
meta = {}
|
|
query = query.lower()
|
|
|
|
qp = QueryParser("text", schema=schema)
|
|
q = qp.parse(query)
|
|
|
|
limit = NullQuery
|
|
for c in collections:
|
|
limit |= Term("collection", c)
|
|
|
|
hits = []
|
|
ix = get_index()
|
|
with ix.searcher() as searcher:
|
|
results = searcher.search_page(q & limit, page, pagesize)
|
|
|
|
# if no results, do a prefix search
|
|
if results.results.is_empty():
|
|
qp.termclass = FuzzyTerm
|
|
q = qp.parse(query)
|
|
results = searcher.search_page(q & limit, page, pagesize)
|
|
|
|
for result in results:
|
|
hits.append(
|
|
dict(
|
|
pk=result["work"],
|
|
name=result["name"],
|
|
composer=result["composer"],
|
|
edition=result["edition"],
|
|
collection_id=int(result["collection"]),
|
|
)
|
|
)
|
|
meta["query"] = str(q & limit)
|
|
meta["total"] = len(results)
|
|
|
|
return hits, meta
|