Added fuzzy matching

This commit is contained in:
Tris Forster 2026-05-14 11:02:55 +10:00
parent 4e102c07ac
commit 27d1b03c3c

View File

@ -3,7 +3,7 @@ from whoosh.analysis import StemmingAnalyzer, CharsetFilter
from whoosh.support.charset import accent_map from whoosh.support.charset import accent_map
from whoosh.fields import Schema, TEXT, KEYWORD, NUMERIC from whoosh.fields import Schema, TEXT, KEYWORD, NUMERIC
from whoosh.qparser import QueryParser from whoosh.qparser import QueryParser
from whoosh.query import Term, NullQuery from whoosh.query import Term, NullQuery, Prefix, FuzzyTerm
from typing import Any from typing import Any
@ -61,20 +61,27 @@ def search(
pagesize: int = 20, pagesize: int = 20,
) -> tuple[list[dict], dict[str, Any]]: ) -> tuple[list[dict], dict[str, Any]]:
meta = {} meta = {}
query = query.lower()
qp = QueryParser("text", schema=schema) qp = QueryParser("text", schema=schema)
q = qp.parse(query.lower()) q = qp.parse(query)
meta["query"] = str(q)
terms = NullQuery limit = NullQuery
for c in collections: for c in collections:
terms = terms | Term("collection", c) limit |= Term("collection", c)
q = q & terms print(limit)
hits = [] hits = []
ix = get_index() ix = get_index()
with ix.searcher() as searcher: with ix.searcher() as searcher:
results = searcher.search_page(q, page, pagesize) results = searcher.search_page(q & limit, page, pagesize)
# if no results, do a prefix search
if results.results.is_empty():
qp.termclass = Prefix
q = qp.parse(query)
results = searcher.search_page(q & limit, page, pagesize)
for result in results: for result in results:
hits.append( hits.append(
dict( dict(
@ -85,7 +92,7 @@ def search(
collection_id=int(result["collection"]), collection_id=int(result["collection"]),
) )
) )
meta["query"] = str(q & limit)
meta["total"] = len(results) meta["total"] = len(results)
return hits, meta return hits, meta