Add flag qualifying field to be indexed exclusively with prefix
This commit is contained in:
parent
c7a9aced05
commit
578511c3e2
@ -847,16 +847,19 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
ft.wdfinc = atoi(tval.c_str());
|
||||
if (attrs.get("boost", tval))
|
||||
ft.boost = atof(tval.c_str());
|
||||
if (attrs.get("pfxonly", tval))
|
||||
ft.pfxonly = stringToBool(tval);
|
||||
m_fldtotraits[stringtolower(*it)] = ft;
|
||||
LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n",
|
||||
it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
|
||||
}
|
||||
|
||||
// Add prefixes for aliases an build alias-to-canonic map while we're at it
|
||||
// Having the aliases in the prefix map avoids an additional indirection
|
||||
// at index time.
|
||||
// Add prefixes for aliases and build alias-to-canonic map while
|
||||
// we're at it. Having the aliases in the prefix map avoids an
|
||||
// additional indirection at index time.
|
||||
tps = m_fields->getNames("aliases");
|
||||
for (vector<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
|
||||
for (vector<string>::const_iterator it = tps.begin();
|
||||
it != tps.end(); it++){
|
||||
string canonic = stringtolower(*it); // canonic name
|
||||
FieldTraits ft;
|
||||
map<string, FieldTraits>::const_iterator pit =
|
||||
|
||||
@ -65,9 +65,11 @@ struct FieldTraits {
|
||||
string pfx; // indexing prefix,
|
||||
int wdfinc; // Index time term frequency increment (default 1)
|
||||
double boost; // Query time boost (default 1.0)
|
||||
FieldTraits(int i, double f) {wdfinc = i; boost = f;}
|
||||
FieldTraits() : wdfinc(1), boost(1.0) {}
|
||||
FieldTraits(const string& s) : pfx(s), wdfinc(1), boost(1.0) {}
|
||||
bool pfxonly; // Suppress prefix-less indexing
|
||||
|
||||
FieldTraits()
|
||||
: wdfinc(1), boost(1.0), pfxonly(false)
|
||||
{}
|
||||
};
|
||||
|
||||
class RclConfig {
|
||||
|
||||
@ -1060,8 +1060,7 @@ class TextSplitDb : public TextSplitP {
|
||||
Xapian::termpos curpos;
|
||||
|
||||
TextSplitDb(Xapian::Document &d, TermProc *prc)
|
||||
: TextSplitP(prc),
|
||||
doc(d), basepos(1), curpos(0), wdfinc(1)
|
||||
: TextSplitP(prc), doc(d), basepos(1), curpos(0)
|
||||
{}
|
||||
|
||||
// Reimplement text_to_words to insert the begin and end anchor terms.
|
||||
@ -1072,7 +1071,7 @@ class TextSplitDb : public TextSplitP {
|
||||
|
||||
try {
|
||||
// Index the possibly prefixed start term.
|
||||
doc.add_posting(prefix + start_of_field_term, basepos, wdfinc);
|
||||
doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
|
||||
++basepos;
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
@ -1087,8 +1086,8 @@ class TextSplitDb : public TextSplitP {
|
||||
|
||||
try {
|
||||
// Index the possibly prefixed end term.
|
||||
doc.add_posting(prefix + end_of_field_term, basepos + curpos + 1,
|
||||
wdfinc);
|
||||
doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
|
||||
ft.wdfinc);
|
||||
++basepos;
|
||||
} XCATCHERROR(ermsg);
|
||||
if (!ermsg.empty()) {
|
||||
@ -1103,27 +1102,15 @@ class TextSplitDb : public TextSplitP {
|
||||
return true;
|
||||
}
|
||||
|
||||
void setprefix(const string& pref)
|
||||
void setTraits(const FieldTraits& ftp)
|
||||
{
|
||||
if (pref.empty())
|
||||
prefix.clear();
|
||||
else
|
||||
prefix = wrap_prefix(pref);
|
||||
}
|
||||
|
||||
void setwdfinc(int i)
|
||||
{
|
||||
wdfinc = i;
|
||||
ft = ftp;
|
||||
}
|
||||
|
||||
friend class TermProcIdx;
|
||||
|
||||
private:
|
||||
// If prefix is set, we also add a posting for the prefixed terms
|
||||
// (ie: for titles, add postings for both "term" and "Sterm")
|
||||
string prefix;
|
||||
// Some fields have more weight
|
||||
int wdfinc;
|
||||
FieldTraits ft;
|
||||
};
|
||||
|
||||
class TermProcIdx : public TermProc {
|
||||
@ -1145,15 +1132,18 @@ public:
|
||||
try {
|
||||
// Index without prefix, using the field-specific weighting
|
||||
LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str()));
|
||||
m_ts->doc.add_posting(term, pos, m_ts->wdfinc);
|
||||
if (!m_ts->ft.pfxonly)
|
||||
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
|
||||
|
||||
#ifdef TESTING_XAPIAN_SPELL
|
||||
if (Db::isSpellingCandidate(term)) {
|
||||
m_ts->db.add_spelling(term);
|
||||
}
|
||||
#endif
|
||||
// Index the prefixed term.
|
||||
if (!m_ts->prefix.empty()) {
|
||||
m_ts->doc.add_posting(m_ts->prefix + term, pos, m_ts->wdfinc);
|
||||
if (!m_ts->ft.pfx.empty()) {
|
||||
m_ts->doc.add_posting(m_ts->ft.pfx + term, pos,
|
||||
m_ts->ft.wdfinc);
|
||||
}
|
||||
return true;
|
||||
} XCATCHERROR(ermsg);
|
||||
@ -1168,7 +1158,7 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
m_ts->doc.add_posting(m_ts->prefix + page_break_term, pos);
|
||||
m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
|
||||
if (pos == m_lastpagepos) {
|
||||
m_pageincr++;
|
||||
LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n",
|
||||
@ -1351,15 +1341,15 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||
LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n",
|
||||
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
||||
meta_it->second.c_str()));
|
||||
splitter.setprefix(ftp->pfx);
|
||||
splitter.setwdfinc(ftp->wdfinc);
|
||||
splitter.setTraits(*ftp);
|
||||
if (!splitter.text_to_words(meta_it->second))
|
||||
LOGDEB(("Db::addOrUpdate: split failed for %s\n",
|
||||
meta_it->first.c_str()));
|
||||
}
|
||||
}
|
||||
splitter.setprefix(string());
|
||||
splitter.setwdfinc(1);
|
||||
|
||||
// Reset to no prefix and default params
|
||||
splitter.setTraits(FieldTraits());
|
||||
|
||||
if (splitter.curpos < baseTextPosition)
|
||||
splitter.basepos = baseTextPosition;
|
||||
@ -1634,8 +1624,7 @@ bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi,
|
||||
LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n",
|
||||
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
||||
meta_it->second.c_str()));
|
||||
splitter->setprefix(ftp->pfx);
|
||||
splitter->setwdfinc(ftp->wdfinc);
|
||||
splitter->setTraits(*ftp);
|
||||
if (!splitter->text_to_words(meta_it->second))
|
||||
LOGDEB(("Db::xattrOnly: split failed for %s\n",
|
||||
meta_it->first.c_str()));
|
||||
|
||||
@ -29,6 +29,10 @@
|
||||
# (NOT CURRENTLY IMPLEMENTED) would automatically boost the weight of a
|
||||
# caption-based field query (ie: caption:mytitle or title:mytitle) at query
|
||||
# time.
|
||||
#
|
||||
# The pfxonly attribute can also be set on entries to express that terms
|
||||
# from the field should be indexed only with a prefix (in general, field
|
||||
# terms are indexed both with and without a prefix).
|
||||
|
||||
# The following ones are probably hard-coded in the c code, can't change at
|
||||
# all.
|
||||
@ -46,6 +50,7 @@ abstract = XS
|
||||
filename = XSFN
|
||||
rclUnsplitFN = XSFS
|
||||
xapyear = Y
|
||||
recipient = XTO
|
||||
|
||||
# Extension examples. These are actually used by default by Recoll, you can
|
||||
# add your own to search for fields produced by the filters and not handled
|
||||
@ -57,7 +62,6 @@ xapyear = Y
|
||||
# I hereby commit to not using XY for Recoll:
|
||||
# *** USE XY for beginning your local prefixes *** ie:
|
||||
# myfield = XYMYPREF
|
||||
recipient = XTO
|
||||
|
||||
[stored]
|
||||
############################
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user