when search includes composite spans + other terms, increase slack instead of switching to word split
This commit is contained in:
parent
90e378333e
commit
9b5de1a4ac
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.18 2007-09-20 08:43:12 dockes Exp $ (C) 2006 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.19 2007-10-04 12:26:04 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -367,16 +367,19 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
|
LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
|
||||||
|
|
||||||
// If there are multiple spans in this element, including
|
// If there are multiple spans in this element, including
|
||||||
// at least one composite, we need to use a word split,
|
// at least one composite, we have to do something
|
||||||
// else a phrase query including a span would fail.
|
// else a phrase query including a span would fail.
|
||||||
// (other possible solution: adjust slack to account for the
|
// Ex: "term0@term1 term2" is onlyspans-split as:
|
||||||
// additional position increase?)
|
// 0 term0@term1 0 12
|
||||||
// Ex: "term0@term01 term1" is onlyspans-split as:
|
// 2 term2 13 18
|
||||||
// 0 term0@term01 0 12
|
// The position of term1 is 2, not 1, so a phrase search
|
||||||
// 2 term1 13 18
|
// would fail.
|
||||||
// The position of term1 is 2, not 1, so the phrase search would
|
// We used to do word split, searching for
|
||||||
// fail. We search for "term0 term01 term1" instead, which may
|
// "term0 term01 term1" instead, which may have worse
|
||||||
// have worse performance, but will succeed.
|
// performance, but will succeed.
|
||||||
|
// We now adjust the phrase/near slack by the term count
|
||||||
|
// difference (this is mainly better for cjk where this is a very
|
||||||
|
// common occurrence because of the ngrams thing.
|
||||||
wsQData splitDataS(stops), splitDataW(stops);
|
wsQData splitDataS(stops), splitDataW(stops);
|
||||||
TextSplit splitterS(&splitDataS,
|
TextSplit splitterS(&splitDataS,
|
||||||
TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
|
TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
|
||||||
@ -388,8 +391,10 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
splitterW.text_to_words(*it);
|
splitterW.text_to_words(*it);
|
||||||
wsQData *splitData = &splitDataS;
|
wsQData *splitData = &splitDataS;
|
||||||
if (splitDataS.terms.size() > 1 &&
|
if (splitDataS.terms.size() > 1 &&
|
||||||
splitDataS.terms.size() != splitDataW.terms.size())
|
splitDataS.terms.size() != splitDataW.terms.size()) {
|
||||||
splitData = &splitDataW;
|
slack += splitDataW.terms.size() - splitDataS.terms.size();
|
||||||
|
// used to: splitData = &splitDataW;
|
||||||
|
}
|
||||||
|
|
||||||
LOGDEB(("strToXapianQ: splitter term count: %d\n",
|
LOGDEB(("strToXapianQ: splitter term count: %d\n",
|
||||||
splitData->terms.size()));
|
splitData->terms.size()));
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user