Add flag qualifying field to be indexed exclusively with prefix

This commit is contained in:
Jean-Francois Dockes 2014-07-23 15:28:16 +02:00
parent c7a9aced05
commit 578511c3e2
4 changed files with 36 additions and 38 deletions

View File

@ -847,16 +847,19 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
ft.wdfinc = atoi(tval.c_str());
if (attrs.get("boost", tval))
ft.boost = atof(tval.c_str());
if (attrs.get("pfxonly", tval))
ft.pfxonly = stringToBool(tval);
m_fldtotraits[stringtolower(*it)] = ft;
LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n",
it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
}
// Add prefixes for aliases an build alias-to-canonic map while we're at it
// Having the aliases in the prefix map avoids an additional indirection
// at index time.
// Add prefixes for aliases and build alias-to-canonic map while
// we're at it. Having the aliases in the prefix map avoids an
// additional indirection at index time.
tps = m_fields->getNames("aliases");
for (vector<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
for (vector<string>::const_iterator it = tps.begin();
it != tps.end(); it++){
string canonic = stringtolower(*it); // canonic name
FieldTraits ft;
map<string, FieldTraits>::const_iterator pit =

View File

@ -65,9 +65,11 @@ struct FieldTraits {
string pfx; // indexing prefix,
int wdfinc; // Index time term frequency increment (default 1)
double boost; // Query time boost (default 1.0)
FieldTraits(int i, double f) {wdfinc = i; boost = f;}
FieldTraits() : wdfinc(1), boost(1.0) {}
FieldTraits(const string& s) : pfx(s), wdfinc(1), boost(1.0) {}
bool pfxonly; // Suppress prefix-less indexing
FieldTraits()
: wdfinc(1), boost(1.0), pfxonly(false)
{}
};
class RclConfig {

View File

@ -1060,8 +1060,7 @@ class TextSplitDb : public TextSplitP {
Xapian::termpos curpos;
TextSplitDb(Xapian::Document &d, TermProc *prc)
: TextSplitP(prc),
doc(d), basepos(1), curpos(0), wdfinc(1)
: TextSplitP(prc), doc(d), basepos(1), curpos(0)
{}
// Reimplement text_to_words to insert the begin and end anchor terms.
@ -1072,7 +1071,7 @@ class TextSplitDb : public TextSplitP {
try {
// Index the possibly prefixed start term.
doc.add_posting(prefix + start_of_field_term, basepos, wdfinc);
doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
++basepos;
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
@ -1087,8 +1086,8 @@ class TextSplitDb : public TextSplitP {
try {
// Index the possibly prefixed end term.
doc.add_posting(prefix + end_of_field_term, basepos + curpos + 1,
wdfinc);
doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
ft.wdfinc);
++basepos;
} XCATCHERROR(ermsg);
if (!ermsg.empty()) {
@ -1103,27 +1102,15 @@ class TextSplitDb : public TextSplitP {
return true;
}
void setprefix(const string& pref)
void setTraits(const FieldTraits& ftp)
{
if (pref.empty())
prefix.clear();
else
prefix = wrap_prefix(pref);
}
void setwdfinc(int i)
{
wdfinc = i;
ft = ftp;
}
friend class TermProcIdx;
private:
// If prefix is set, we also add a posting for the prefixed terms
// (ie: for titles, add postings for both "term" and "Sterm")
string prefix;
// Some fields have more weight
int wdfinc;
FieldTraits ft;
};
class TermProcIdx : public TermProc {
@ -1145,15 +1132,18 @@ public:
try {
// Index without prefix, using the field-specific weighting
LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str()));
m_ts->doc.add_posting(term, pos, m_ts->wdfinc);
if (!m_ts->ft.pfxonly)
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
#ifdef TESTING_XAPIAN_SPELL
if (Db::isSpellingCandidate(term)) {
m_ts->db.add_spelling(term);
}
#endif
// Index the prefixed term.
if (!m_ts->prefix.empty()) {
m_ts->doc.add_posting(m_ts->prefix + term, pos, m_ts->wdfinc);
if (!m_ts->ft.pfx.empty()) {
m_ts->doc.add_posting(m_ts->ft.pfx + term, pos,
m_ts->ft.wdfinc);
}
return true;
} XCATCHERROR(ermsg);
@ -1168,7 +1158,7 @@ public:
return;
}
m_ts->doc.add_posting(m_ts->prefix + page_break_term, pos);
m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
if (pos == m_lastpagepos) {
m_pageincr++;
LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n",
@ -1351,15 +1341,15 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n",
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
meta_it->second.c_str()));
splitter.setprefix(ftp->pfx);
splitter.setwdfinc(ftp->wdfinc);
splitter.setTraits(*ftp);
if (!splitter.text_to_words(meta_it->second))
LOGDEB(("Db::addOrUpdate: split failed for %s\n",
meta_it->first.c_str()));
}
}
splitter.setprefix(string());
splitter.setwdfinc(1);
// Reset to no prefix and default params
splitter.setTraits(FieldTraits());
if (splitter.curpos < baseTextPosition)
splitter.basepos = baseTextPosition;
@ -1634,8 +1624,7 @@ bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi,
LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n",
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
meta_it->second.c_str()));
splitter->setprefix(ftp->pfx);
splitter->setwdfinc(ftp->wdfinc);
splitter->setTraits(*ftp);
if (!splitter->text_to_words(meta_it->second))
LOGDEB(("Db::xattrOnly: split failed for %s\n",
meta_it->first.c_str()));

View File

@ -29,6 +29,10 @@
# (NOT CURRENTLY IMPLEMENTED) would automatically boost the weight of a
# caption-based field query (ie: caption:mytitle or title:mytitle) at query
# time.
#
# The pfxonly attribute can also be set on entries to express that terms
# from the field should be indexed only with a prefix (in general, field
# terms are indexed both with and without a prefix).
# The following ones are probably hard-coded in the c code, can't change at
# all.
@ -46,6 +50,7 @@ abstract = XS
filename = XSFN
rclUnsplitFN = XSFS
xapyear = Y
recipient = XTO
# Extension examples. These are actually used by default by Recoll, you can
# add your own to search for fields produced by the filters and not handled
@ -57,7 +62,6 @@ xapyear = Y
# I hereby commit to not using XY for Recoll:
# *** USE XY for beginning your local prefixes *** ie:
# myfield = XYMYPREF
recipient = XTO
[stored]
############################