diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index f75931f3..0f6479d3 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -847,16 +847,19 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) ft.wdfinc = atoi(tval.c_str()); if (attrs.get("boost", tval)) ft.boost = atof(tval.c_str()); + if (attrs.get("pfxonly", tval)) + ft.pfxonly = stringToBool(tval); m_fldtotraits[stringtolower(*it)] = ft; LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n", it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost)); } - // Add prefixes for aliases an build alias-to-canonic map while we're at it - // Having the aliases in the prefix map avoids an additional indirection - // at index time. + // Add prefixes for aliases and build alias-to-canonic map while + // we're at it. Having the aliases in the prefix map avoids an + // additional indirection at index time. tps = m_fields->getNames("aliases"); - for (vector::const_iterator it = tps.begin(); it != tps.end();it++) { + for (vector::const_iterator it = tps.begin(); + it != tps.end(); it++){ string canonic = stringtolower(*it); // canonic name FieldTraits ft; map::const_iterator pit = diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index 6868721f..a28b59d9 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -65,9 +65,11 @@ struct FieldTraits { string pfx; // indexing prefix, int wdfinc; // Index time term frequency increment (default 1) double boost; // Query time boost (default 1.0) - FieldTraits(int i, double f) {wdfinc = i; boost = f;} - FieldTraits() : wdfinc(1), boost(1.0) {} - FieldTraits(const string& s) : pfx(s), wdfinc(1), boost(1.0) {} + bool pfxonly; // Suppress prefix-less indexing + + FieldTraits() + : wdfinc(1), boost(1.0), pfxonly(false) + {} }; class RclConfig { diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index d8ae9c80..a8bbc799 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1060,8 +1060,7 @@ class TextSplitDb : public TextSplitP { Xapian::termpos curpos; TextSplitDb(Xapian::Document &d, TermProc *prc) - : TextSplitP(prc), - doc(d), basepos(1), curpos(0), wdfinc(1) + : TextSplitP(prc), doc(d), basepos(1), curpos(0) {} // Reimplement text_to_words to insert the begin and end anchor terms. @@ -1072,7 +1071,7 @@ class TextSplitDb : public TextSplitP { try { // Index the possibly prefixed start term. - doc.add_posting(prefix + start_of_field_term, basepos, wdfinc); + doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc); ++basepos; } XCATCHERROR(ermsg); if (!ermsg.empty()) { @@ -1087,8 +1086,8 @@ class TextSplitDb : public TextSplitP { try { // Index the possibly prefixed end term. - doc.add_posting(prefix + end_of_field_term, basepos + curpos + 1, - wdfinc); + doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1, + ft.wdfinc); ++basepos; } XCATCHERROR(ermsg); if (!ermsg.empty()) { @@ -1103,27 +1102,15 @@ class TextSplitDb : public TextSplitP { return true; } - void setprefix(const string& pref) + void setTraits(const FieldTraits& ftp) { - if (pref.empty()) - prefix.clear(); - else - prefix = wrap_prefix(pref); - } - - void setwdfinc(int i) - { - wdfinc = i; + ft = ftp; } friend class TermProcIdx; private: - // If prefix is set, we also add a posting for the prefixed terms - // (ie: for titles, add postings for both "term" and "Sterm") - string prefix; - // Some fields have more weight - int wdfinc; + FieldTraits ft; }; class TermProcIdx : public TermProc { @@ -1145,15 +1132,18 @@ public: try { // Index without prefix, using the field-specific weighting LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str())); - m_ts->doc.add_posting(term, pos, m_ts->wdfinc); + if (!m_ts->ft.pfxonly) + m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc); + #ifdef TESTING_XAPIAN_SPELL if (Db::isSpellingCandidate(term)) { m_ts->db.add_spelling(term); } #endif // Index the prefixed term. - if (!m_ts->prefix.empty()) { - m_ts->doc.add_posting(m_ts->prefix + term, pos, m_ts->wdfinc); + if (!m_ts->ft.pfx.empty()) { + m_ts->doc.add_posting(m_ts->ft.pfx + term, pos, + m_ts->ft.wdfinc); } return true; } XCATCHERROR(ermsg); @@ -1168,7 +1158,7 @@ public: return; } - m_ts->doc.add_posting(m_ts->prefix + page_break_term, pos); + m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos); if (pos == m_lastpagepos) { m_pageincr++; LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n", @@ -1351,15 +1341,15 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n", meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc, meta_it->second.c_str())); - splitter.setprefix(ftp->pfx); - splitter.setwdfinc(ftp->wdfinc); + splitter.setTraits(*ftp); if (!splitter.text_to_words(meta_it->second)) LOGDEB(("Db::addOrUpdate: split failed for %s\n", meta_it->first.c_str())); } } - splitter.setprefix(string()); - splitter.setwdfinc(1); + + // Reset to no prefix and default params + splitter.setTraits(FieldTraits()); if (splitter.curpos < baseTextPosition) splitter.basepos = baseTextPosition; @@ -1634,8 +1624,7 @@ bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi, LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n", meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc, meta_it->second.c_str())); - splitter->setprefix(ftp->pfx); - splitter->setwdfinc(ftp->wdfinc); + splitter->setTraits(*ftp); if (!splitter->text_to_words(meta_it->second)) LOGDEB(("Db::xattrOnly: split failed for %s\n", meta_it->first.c_str())); diff --git a/src/sampleconf/fields b/src/sampleconf/fields index 09931099..47ea0d01 100644 --- a/src/sampleconf/fields +++ b/src/sampleconf/fields @@ -29,6 +29,10 @@ # (NOT CURRENTLY IMPLEMENTED) would automatically boost the weight of a # caption-based field query (ie: caption:mytitle or title:mytitle) at query # time. +# +# The pfxonly attribute can also be set on entries to express that terms +# from the field should be indexed only with a prefix (in general, field +# terms are indexed both with and without a prefix). # The following ones are probably hard-coded in the c code, can't change at # all. @@ -46,6 +50,7 @@ abstract = XS filename = XSFN rclUnsplitFN = XSFS xapyear = Y +recipient = XTO # Extension examples. These are actually used by default by Recoll, you can # add your own to search for fields produced by the filters and not handled @@ -57,7 +62,6 @@ xapyear = Y # I hereby commit to not using XY for Recoll: # *** USE XY for beginning your local prefixes *** ie: # myfield = XYMYPREF -recipient = XTO [stored] ############################