Add flag qualifying field to be indexed exclusively with prefix
This commit is contained in:
parent
c7a9aced05
commit
578511c3e2
@ -847,16 +847,19 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
|||||||
ft.wdfinc = atoi(tval.c_str());
|
ft.wdfinc = atoi(tval.c_str());
|
||||||
if (attrs.get("boost", tval))
|
if (attrs.get("boost", tval))
|
||||||
ft.boost = atof(tval.c_str());
|
ft.boost = atof(tval.c_str());
|
||||||
|
if (attrs.get("pfxonly", tval))
|
||||||
|
ft.pfxonly = stringToBool(tval);
|
||||||
m_fldtotraits[stringtolower(*it)] = ft;
|
m_fldtotraits[stringtolower(*it)] = ft;
|
||||||
LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n",
|
LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n",
|
||||||
it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
|
it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add prefixes for aliases an build alias-to-canonic map while we're at it
|
// Add prefixes for aliases and build alias-to-canonic map while
|
||||||
// Having the aliases in the prefix map avoids an additional indirection
|
// we're at it. Having the aliases in the prefix map avoids an
|
||||||
// at index time.
|
// additional indirection at index time.
|
||||||
tps = m_fields->getNames("aliases");
|
tps = m_fields->getNames("aliases");
|
||||||
for (vector<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
|
for (vector<string>::const_iterator it = tps.begin();
|
||||||
|
it != tps.end(); it++){
|
||||||
string canonic = stringtolower(*it); // canonic name
|
string canonic = stringtolower(*it); // canonic name
|
||||||
FieldTraits ft;
|
FieldTraits ft;
|
||||||
map<string, FieldTraits>::const_iterator pit =
|
map<string, FieldTraits>::const_iterator pit =
|
||||||
|
|||||||
@ -65,9 +65,11 @@ struct FieldTraits {
|
|||||||
string pfx; // indexing prefix,
|
string pfx; // indexing prefix,
|
||||||
int wdfinc; // Index time term frequency increment (default 1)
|
int wdfinc; // Index time term frequency increment (default 1)
|
||||||
double boost; // Query time boost (default 1.0)
|
double boost; // Query time boost (default 1.0)
|
||||||
FieldTraits(int i, double f) {wdfinc = i; boost = f;}
|
bool pfxonly; // Suppress prefix-less indexing
|
||||||
FieldTraits() : wdfinc(1), boost(1.0) {}
|
|
||||||
FieldTraits(const string& s) : pfx(s), wdfinc(1), boost(1.0) {}
|
FieldTraits()
|
||||||
|
: wdfinc(1), boost(1.0), pfxonly(false)
|
||||||
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
class RclConfig {
|
class RclConfig {
|
||||||
|
|||||||
@ -1060,8 +1060,7 @@ class TextSplitDb : public TextSplitP {
|
|||||||
Xapian::termpos curpos;
|
Xapian::termpos curpos;
|
||||||
|
|
||||||
TextSplitDb(Xapian::Document &d, TermProc *prc)
|
TextSplitDb(Xapian::Document &d, TermProc *prc)
|
||||||
: TextSplitP(prc),
|
: TextSplitP(prc), doc(d), basepos(1), curpos(0)
|
||||||
doc(d), basepos(1), curpos(0), wdfinc(1)
|
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// Reimplement text_to_words to insert the begin and end anchor terms.
|
// Reimplement text_to_words to insert the begin and end anchor terms.
|
||||||
@ -1072,7 +1071,7 @@ class TextSplitDb : public TextSplitP {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// Index the possibly prefixed start term.
|
// Index the possibly prefixed start term.
|
||||||
doc.add_posting(prefix + start_of_field_term, basepos, wdfinc);
|
doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
|
||||||
++basepos;
|
++basepos;
|
||||||
} XCATCHERROR(ermsg);
|
} XCATCHERROR(ermsg);
|
||||||
if (!ermsg.empty()) {
|
if (!ermsg.empty()) {
|
||||||
@ -1087,8 +1086,8 @@ class TextSplitDb : public TextSplitP {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// Index the possibly prefixed end term.
|
// Index the possibly prefixed end term.
|
||||||
doc.add_posting(prefix + end_of_field_term, basepos + curpos + 1,
|
doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
|
||||||
wdfinc);
|
ft.wdfinc);
|
||||||
++basepos;
|
++basepos;
|
||||||
} XCATCHERROR(ermsg);
|
} XCATCHERROR(ermsg);
|
||||||
if (!ermsg.empty()) {
|
if (!ermsg.empty()) {
|
||||||
@ -1103,27 +1102,15 @@ class TextSplitDb : public TextSplitP {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setprefix(const string& pref)
|
void setTraits(const FieldTraits& ftp)
|
||||||
{
|
{
|
||||||
if (pref.empty())
|
ft = ftp;
|
||||||
prefix.clear();
|
|
||||||
else
|
|
||||||
prefix = wrap_prefix(pref);
|
|
||||||
}
|
|
||||||
|
|
||||||
void setwdfinc(int i)
|
|
||||||
{
|
|
||||||
wdfinc = i;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
friend class TermProcIdx;
|
friend class TermProcIdx;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// If prefix is set, we also add a posting for the prefixed terms
|
FieldTraits ft;
|
||||||
// (ie: for titles, add postings for both "term" and "Sterm")
|
|
||||||
string prefix;
|
|
||||||
// Some fields have more weight
|
|
||||||
int wdfinc;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class TermProcIdx : public TermProc {
|
class TermProcIdx : public TermProc {
|
||||||
@ -1145,15 +1132,18 @@ public:
|
|||||||
try {
|
try {
|
||||||
// Index without prefix, using the field-specific weighting
|
// Index without prefix, using the field-specific weighting
|
||||||
LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str()));
|
LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str()));
|
||||||
m_ts->doc.add_posting(term, pos, m_ts->wdfinc);
|
if (!m_ts->ft.pfxonly)
|
||||||
|
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
|
||||||
|
|
||||||
#ifdef TESTING_XAPIAN_SPELL
|
#ifdef TESTING_XAPIAN_SPELL
|
||||||
if (Db::isSpellingCandidate(term)) {
|
if (Db::isSpellingCandidate(term)) {
|
||||||
m_ts->db.add_spelling(term);
|
m_ts->db.add_spelling(term);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// Index the prefixed term.
|
// Index the prefixed term.
|
||||||
if (!m_ts->prefix.empty()) {
|
if (!m_ts->ft.pfx.empty()) {
|
||||||
m_ts->doc.add_posting(m_ts->prefix + term, pos, m_ts->wdfinc);
|
m_ts->doc.add_posting(m_ts->ft.pfx + term, pos,
|
||||||
|
m_ts->ft.wdfinc);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} XCATCHERROR(ermsg);
|
} XCATCHERROR(ermsg);
|
||||||
@ -1168,7 +1158,7 @@ public:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_ts->doc.add_posting(m_ts->prefix + page_break_term, pos);
|
m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
|
||||||
if (pos == m_lastpagepos) {
|
if (pos == m_lastpagepos) {
|
||||||
m_pageincr++;
|
m_pageincr++;
|
||||||
LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n",
|
LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n",
|
||||||
@ -1351,15 +1341,15 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||||||
LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n",
|
LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n",
|
||||||
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
||||||
meta_it->second.c_str()));
|
meta_it->second.c_str()));
|
||||||
splitter.setprefix(ftp->pfx);
|
splitter.setTraits(*ftp);
|
||||||
splitter.setwdfinc(ftp->wdfinc);
|
|
||||||
if (!splitter.text_to_words(meta_it->second))
|
if (!splitter.text_to_words(meta_it->second))
|
||||||
LOGDEB(("Db::addOrUpdate: split failed for %s\n",
|
LOGDEB(("Db::addOrUpdate: split failed for %s\n",
|
||||||
meta_it->first.c_str()));
|
meta_it->first.c_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
splitter.setprefix(string());
|
|
||||||
splitter.setwdfinc(1);
|
// Reset to no prefix and default params
|
||||||
|
splitter.setTraits(FieldTraits());
|
||||||
|
|
||||||
if (splitter.curpos < baseTextPosition)
|
if (splitter.curpos < baseTextPosition)
|
||||||
splitter.basepos = baseTextPosition;
|
splitter.basepos = baseTextPosition;
|
||||||
@ -1634,8 +1624,7 @@ bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi,
|
|||||||
LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n",
|
LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n",
|
||||||
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
||||||
meta_it->second.c_str()));
|
meta_it->second.c_str()));
|
||||||
splitter->setprefix(ftp->pfx);
|
splitter->setTraits(*ftp);
|
||||||
splitter->setwdfinc(ftp->wdfinc);
|
|
||||||
if (!splitter->text_to_words(meta_it->second))
|
if (!splitter->text_to_words(meta_it->second))
|
||||||
LOGDEB(("Db::xattrOnly: split failed for %s\n",
|
LOGDEB(("Db::xattrOnly: split failed for %s\n",
|
||||||
meta_it->first.c_str()));
|
meta_it->first.c_str()));
|
||||||
|
|||||||
@ -29,6 +29,10 @@
|
|||||||
# (NOT CURRENTLY IMPLEMENTED) would automatically boost the weight of a
|
# (NOT CURRENTLY IMPLEMENTED) would automatically boost the weight of a
|
||||||
# caption-based field query (ie: caption:mytitle or title:mytitle) at query
|
# caption-based field query (ie: caption:mytitle or title:mytitle) at query
|
||||||
# time.
|
# time.
|
||||||
|
#
|
||||||
|
# The pfxonly attribute can also be set on entries to express that terms
|
||||||
|
# from the field should be indexed only with a prefix (in general, field
|
||||||
|
# terms are indexed both with and without a prefix).
|
||||||
|
|
||||||
# The following ones are probably hard-coded in the c code, can't change at
|
# The following ones are probably hard-coded in the c code, can't change at
|
||||||
# all.
|
# all.
|
||||||
@ -46,6 +50,7 @@ abstract = XS
|
|||||||
filename = XSFN
|
filename = XSFN
|
||||||
rclUnsplitFN = XSFS
|
rclUnsplitFN = XSFS
|
||||||
xapyear = Y
|
xapyear = Y
|
||||||
|
recipient = XTO
|
||||||
|
|
||||||
# Extension examples. These are actually used by default by Recoll, you can
|
# Extension examples. These are actually used by default by Recoll, you can
|
||||||
# add your own to search for fields produced by the filters and not handled
|
# add your own to search for fields produced by the filters and not handled
|
||||||
@ -57,7 +62,6 @@ xapyear = Y
|
|||||||
# I hereby commit to not using XY for Recoll:
|
# I hereby commit to not using XY for Recoll:
|
||||||
# *** USE XY for beginning your local prefixes *** ie:
|
# *** USE XY for beginning your local prefixes *** ie:
|
||||||
# myfield = XYMYPREF
|
# myfield = XYMYPREF
|
||||||
recipient = XTO
|
|
||||||
|
|
||||||
[stored]
|
[stored]
|
||||||
############################
|
############################
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user