Merge branch 'RECOLL_1_23_MAINT'

This commit is contained in:
Jean-Francois Dockes 2018-01-08 14:30:30 +01:00
commit 3d22f2c436
6 changed files with 56 additions and 27 deletions

View File

@ -4089,8 +4089,7 @@ alink="#0000FF">
set.</p> set.</p>
</li> </li>
<li class="listitem"> <li class="listitem">
<p><b>%t.&nbsp;</b>Title or Filename if not <p><b>%t.&nbsp;</b>Title.</p>
set.</p>
</li> </li>
<li class="listitem"> <li class="listitem">
<p><b>%U.&nbsp;</b>Url</p> <p><b>%U.&nbsp;</b>Url</p>

View File

@ -3109,8 +3109,8 @@
<listitem><formalpara><title>%T</title><para>Title or Filename if <listitem><formalpara><title>%T</title><para>Title or Filename if
not set.</para></formalpara> not set.</para></formalpara>
</listitem> </listitem>
<listitem><formalpara><title>%t</title><para>Title or Filename if <listitem><formalpara><title>%t</title><para>Title or empty.
not set.</para></formalpara> </para></formalpara>
</listitem> </listitem>
<listitem><formalpara><title>%U</title><para>Url</para></formalpara> <listitem><formalpara><title>%U</title><para>Url</para></formalpara>
</listitem> </listitem>

View File

@ -377,7 +377,7 @@ class Db {
* in the TermMatchResult header * in the TermMatchResult header
*/ */
enum MatchType {ET_NONE=0, ET_WILD=1, ET_REGEXP=2, ET_STEM=3, enum MatchType {ET_NONE=0, ET_WILD=1, ET_REGEXP=2, ET_STEM=3,
ET_DIACSENS=8, ET_CASESENS=16, ET_SYNEXP=32}; ET_DIACSENS=8, ET_CASESENS=16, ET_SYNEXP=32, ET_PATHELT=64};
int matchTypeTp(int tp) int matchTypeTp(int tp)
{ {
return tp & 7; return tp & 7;

View File

@ -164,8 +164,17 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0; bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
bool case_sensitive = (typ_sens & ET_CASESENS) != 0; bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
// Path elements (used for dir: filtering) are special because
LOGDEB0("Db::TermMatch: typ " << (tmtptostr(matchtyp)) << " diacsens " << (diac_sensitive) << " casesens " << (case_sensitive) << " lang [" << (lang) << "] term [" << (_term) << "] max " << (max) << " field [" << (field) << "] stripped " << (o_index_stripchars) << " init res.size " << (res.entries.size()) << "\n" ); // they are not unaccented or lowercased even if the index is
// otherwise stripped.
bool pathelt = (typ_sens & ET_PATHELT) != 0;
LOGDEB0("Db::TermMatch: typ " << tmtptostr(matchtyp) << " diacsens " <<
diac_sensitive << " casesens " << case_sensitive << " pathelt " <<
pathelt << " lang [" <<
lang << "] term [" << _term << "] max " << max << " field [" <<
field << "] stripped " << o_index_stripchars << " init res.size "
<< res.entries.size() << "\n");
// If index is stripped, no case or diac expansion can be needed: // If index is stripped, no case or diac expansion can be needed:
// for the processing inside this routine, everything looks like // for the processing inside this routine, everything looks like
@ -174,8 +183,8 @@ bool Db::termMatch(int typ_sens, const string &lang, const string &_term,
string term = _term; string term = _term;
if (o_index_stripchars) { if (o_index_stripchars) {
diac_sensitive = case_sensitive = true; diac_sensitive = case_sensitive = true;
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) { if (!pathelt && !unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
LOGERR("Db::termMatch: unac failed for [" << (_term) << "]\n" ); LOGERR("Db::termMatch: unac failed for [" << _term << "]\n");
return false; return false;
} }
} }

View File

@ -232,6 +232,9 @@ public:
SDCM_ANCHOREND=0x4, SDCM_CASESENS=0x8, SDCM_DIACSENS=0x10, SDCM_ANCHOREND=0x4, SDCM_CASESENS=0x8, SDCM_DIACSENS=0x10,
SDCM_NOTERMS=0x20, // Don't include terms for highlighting SDCM_NOTERMS=0x20, // Don't include terms for highlighting
SDCM_NOSYNS = 0x40, // Don't perform synonym expansion SDCM_NOSYNS = 0x40, // Don't perform synonym expansion
// Aargh special case. pathelts are case/diac-sensitive
// even in a stripped index
SDCM_PATHELT = 0x80,
}; };
enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE}; enum Relation {REL_CONTAINS, REL_EQUALS, REL_LT, REL_LTE, REL_GT, REL_GTE};

View File

@ -374,6 +374,16 @@ private:
map<int, bool> m_nste; map<int, bool> m_nste;
}; };
static const vector<CharFlags> expandModStrings{
{SearchDataClause::SDCM_NOSTEMMING, "nostemming"},
{SearchDataClause::SDCM_ANCHORSTART, "anchorstart"},
{SearchDataClause::SDCM_ANCHOREND, "anchorend"},
{SearchDataClause::SDCM_CASESENS, "casesens"},
{SearchDataClause::SDCM_DIACSENS, "diacsens"},
{SearchDataClause::SDCM_NOTERMS, "noterms"},
{SearchDataClause::SDCM_NOSYNS, "nosyns"},
{SearchDataClause::SDCM_PATHELT, "pathelt"},
};
/** Expand term into term list, using appropriate mode: stem, wildcards, /** Expand term into term list, using appropriate mode: stem, wildcards,
* diacritics... * diacritics...
@ -396,12 +406,20 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
vector<string>* multiwords vector<string>* multiwords
) )
{ {
LOGDEB0("expandTerm: mods 0x" << (mods) << " fld [" << (m_field) << "] trm [" << (term) << "] lang [" << (getStemLang()) << "]\n" ); LOGDEB0("expandTerm: mods: [" << flagsToString(expandModStrings, mods) <<
"] fld [" << m_field << "] trm [" << term << "] lang [" <<
getStemLang() << "]\n");
sterm.clear(); sterm.clear();
oexp.clear(); oexp.clear();
if (term.empty()) if (term.empty())
return true; return true;
if (mods & SDCM_PATHELT) {
// Path element are so special. Only wildcards, and they are
// case-sensitive.
mods |= SDCM_NOSTEMMING|SDCM_CASESENS|SDCM_DIACSENS|SDCM_NOSYNS;
}
bool maxexpissoft = false; bool maxexpissoft = false;
int maxexpand = getSoftMaxExp(); int maxexpand = getSoftMaxExp();
if (maxexpand != -1) { if (maxexpand != -1) {
@ -420,14 +438,15 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
// No stem expansion if there are wildcards or if prevented by caller // No stem expansion if there are wildcards or if prevented by caller
bool nostemexp = (mods & SDCM_NOSTEMMING) != 0; bool nostemexp = (mods & SDCM_NOSTEMMING) != 0;
if (haswild || getStemLang().empty()) { if (haswild || getStemLang().empty()) {
LOGDEB2("expandTerm: found wildcards or stemlang empty: no exp\n" ); LOGDEB2("expandTerm: found wildcards or stemlang empty: no exp\n");
nostemexp = true; nostemexp = true;
} }
bool diac_sensitive = (mods & SDCM_DIACSENS) != 0; bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
bool case_sensitive = (mods & SDCM_CASESENS) != 0; bool case_sensitive = (mods & SDCM_CASESENS) != 0;
bool synonyms = (mods & SDCM_NOSYNS) == 0; bool synonyms = (mods & SDCM_NOSYNS) == 0;
bool pathelt = (mods & SDCM_PATHELT) != 0;
// noexpansion can be modified further down by possible case/diac expansion // noexpansion can be modified further down by possible case/diac expansion
bool noexpansion = nostemexp && !haswild && !synonyms; bool noexpansion = nostemexp && !haswild && !synonyms;
@ -442,7 +461,7 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
// performed (conversion+comparison) will automatically ignore // performed (conversion+comparison) will automatically ignore
// accented characters which are actually a separate letter // accented characters which are actually a separate letter
if (getAutoDiac() && unachasaccents(term)) { if (getAutoDiac() && unachasaccents(term)) {
LOGDEB0("expandTerm: term has accents -> diac-sensitive\n" ); LOGDEB0("expandTerm: term has accents -> diac-sensitive\n");
diac_sensitive = true; diac_sensitive = true;
} }
@ -453,13 +472,14 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
Utf8Iter it(term); Utf8Iter it(term);
it++; it++;
if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) { if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) {
LOGDEB0("expandTerm: term has uppercase -> case-sensitive\n" ); LOGDEB0("expandTerm: term has uppercase -> case-sensitive\n");
case_sensitive = true; case_sensitive = true;
} }
// If we are sensitive to case or diacritics turn stemming off // If we are sensitive to case or diacritics turn stemming off
if (diac_sensitive || case_sensitive) { if (diac_sensitive || case_sensitive) {
LOGDEB0("expandTerm: diac or case sens set -> stemexpand and synonyms off\n" ); LOGDEB0("expandTerm: diac or case sens set -> stemexpand and "
"synonyms off\n");
nostemexp = true; nostemexp = true;
synonyms = false; synonyms = false;
} }
@ -472,7 +492,7 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
if (noexpansion) { if (noexpansion) {
oexp.push_back(prefix + term); oexp.push_back(prefix + term);
m_hldata.terms[term] = term; m_hldata.terms[term] = term;
LOGDEB("ExpandTerm: noexpansion: final: " << (stringsToString(oexp)) << "\n" ); LOGDEB("ExpandTerm: noexpansion: final: "<<stringsToString(oexp)<< "\n");
return true; return true;
} }
@ -483,7 +503,8 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
termmatchsens |= Db::ET_DIACSENS; termmatchsens |= Db::ET_DIACSENS;
if (synonyms) if (synonyms)
termmatchsens |= Db::ET_SYNEXP; termmatchsens |= Db::ET_SYNEXP;
if (pathelt)
termmatchsens |= Db::ET_PATHELT;
Db::MatchType mtyp = haswild ? Db::ET_WILD : Db::MatchType mtyp = haswild ? Db::ET_WILD :
nostemexp ? Db::ET_NONE : Db::ET_STEM; nostemexp ? Db::ET_NONE : Db::ET_STEM;
TermMatchResult res; TermMatchResult res;
@ -498,9 +519,8 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
" Maybe use case/diacritics sensitivity or increase maxTermExpand."; " Maybe use case/diacritics sensitivity or increase maxTermExpand.";
return false; return false;
} }
for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); for (const auto& entry : res.entries) {
it != res.entries.end(); it++) { oexp.push_back(entry.term);
oexp.push_back(it->term);
} }
// If the term does not exist at all in the db, the return from // If the term does not exist at all in the db, the return from
// termMatch() is going to be empty, which is not what we want (we // termMatch() is going to be empty, which is not what we want (we
@ -509,11 +529,10 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db,
oexp.push_back(prefix + term); oexp.push_back(prefix + term);
// Remember the uterm-to-expansion links // Remember the uterm-to-expansion links
for (vector<string>::const_iterator it = oexp.begin(); for (const auto& entry : oexp) {
it != oexp.end(); it++) { m_hldata.terms[strip_prefix(entry)] = term;
m_hldata.terms[strip_prefix(*it)] = term;
} }
LOGDEB("ExpandTerm: final: " << (stringsToString(oexp)) << "\n" ); LOGDEB("ExpandTerm: final: " << stringsToString(oexp) << "\n");
return true; return true;
} }
@ -951,7 +970,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
#endif #endif
if (ltext.empty()) { if (ltext.empty()) {
LOGERR("SearchDataClausePath: empty path??\n" ); LOGERR("SearchDataClausePath: empty path??\n");
m_reason = "Empty path ?"; m_reason = "Empty path ?";
return false; return false;
} }
@ -971,8 +990,7 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
string sterm; string sterm;
vector<string> exp; vector<string> exp;
if (!expandTerm(db, m_reason, if (!expandTerm(db, m_reason, SDCM_PATHELT,
SDCM_NOSTEMMING|SDCM_CASESENS|SDCM_DIACSENS,
*pit, exp, sterm, wrap_prefix(pathelt_prefix))) { *pit, exp, sterm, wrap_prefix(pathelt_prefix))) {
return false; return false;
} }