Add flag qualifying field to be indexed exclusively with prefix

2014-07-23 15:28:16 +02:00 · 2014-07-23 15:28:16 +02:00 · 578511c3e2
commit 578511c3e2
parent c7a9aced05
4 changed files with 36 additions and 38 deletions
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -847,16 +847,19 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
 	    ft.wdfinc = atoi(tval.c_str());
 	if (attrs.get("boost", tval))
 	    ft.boost = atof(tval.c_str());
+	if (attrs.get("pfxonly", tval))
+	    ft.pfxonly = stringToBool(tval);
 	m_fldtotraits[stringtolower(*it)] = ft;
 	LOGDEB2(("readFieldsConfig: [%s] -> [%s] %d %.1f\n", 
 		it->c_str(), ft.pfx.c_str(), ft.wdfinc, ft.boost));
    }

-    // Add prefixes for aliases  an build alias-to-canonic map while we're at it
-    // Having the aliases in the prefix map avoids an additional indirection
-    // at index time.
+    // Add prefixes for aliases and build alias-to-canonic map while
+    // we're at it. Having the aliases in the prefix map avoids an
+    // additional indirection at index time.
    tps = m_fields->getNames("aliases");
-    for (vector<string>::const_iterator it = tps.begin(); it != tps.end();it++) {
+    for (vector<string>::const_iterator it = tps.begin(); 
+         it != tps.end(); it++){
 	string canonic = stringtolower(*it); // canonic name
 	FieldTraits ft;
 	map<string, FieldTraits>::const_iterator pit = 
--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@ -65,9 +65,11 @@ struct FieldTraits {
    string pfx; // indexing prefix, 
    int    wdfinc; // Index time term frequency increment (default 1)
    double boost; // Query time boost (default 1.0)
-    FieldTraits(int i, double f) {wdfinc = i; boost = f;}
-    FieldTraits() : wdfinc(1), boost(1.0) {}
-    FieldTraits(const string& s) : pfx(s), wdfinc(1), boost(1.0) {}
+    bool   pfxonly; // Suppress prefix-less indexing
+
+    FieldTraits() 
+        : wdfinc(1), boost(1.0), pfxonly(false)
+        {}
 };

 class RclConfig {
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1060,8 +1060,7 @@ class TextSplitDb : public TextSplitP {
    Xapian::termpos curpos;

    TextSplitDb(Xapian::Document &d, TermProc *prc)
-	: TextSplitP(prc), 
-	  doc(d), basepos(1), curpos(0), wdfinc(1)
+	: TextSplitP(prc), doc(d), basepos(1), curpos(0)
    {}

    // Reimplement text_to_words to insert the begin and end anchor terms.
@ -1072,7 +1071,7 @@ class TextSplitDb : public TextSplitP {

 	try {
 	    // Index the possibly prefixed start term.
-	    doc.add_posting(prefix + start_of_field_term, basepos, wdfinc);
+	    doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
 	    ++basepos;
 	} XCATCHERROR(ermsg);
 	if (!ermsg.empty()) {
@ -1087,8 +1086,8 @@ class TextSplitDb : public TextSplitP {

 	try {
 	    // Index the possibly prefixed end term.
-	    doc.add_posting(prefix + end_of_field_term, basepos + curpos + 1, 
-			    wdfinc);
+	    doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
+			    ft.wdfinc);
 	    ++basepos;
 	} XCATCHERROR(ermsg);
 	if (!ermsg.empty()) {
@ -1103,27 +1102,15 @@ class TextSplitDb : public TextSplitP {
 	return true;
    }

-    void setprefix(const string& pref) 
+    void setTraits(const FieldTraits& ftp) 
    {
-	if (pref.empty())
-	    prefix.clear();
-	else
-	    prefix = wrap_prefix(pref);
-    }
-
-    void setwdfinc(int i) 
-    {
-	wdfinc = i;
+        ft = ftp;
    }

    friend class TermProcIdx;

 private:
-    // If prefix is set, we also add a posting for the prefixed terms
-    // (ie: for titles, add postings for both "term" and "Sterm")
-    string  prefix; 
-    // Some fields have more weight
-    int wdfinc;
+    FieldTraits ft;
 };

 class TermProcIdx : public TermProc {
@ -1145,15 +1132,18 @@ public:
 	try {
 	    // Index without prefix, using the field-specific weighting
 	    LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str()));
-	    m_ts->doc.add_posting(term, pos, m_ts->wdfinc);
+            if (!m_ts->ft.pfxonly)
+                m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
+
 #ifdef TESTING_XAPIAN_SPELL
 	    if (Db::isSpellingCandidate(term)) {
 		m_ts->db.add_spelling(term);
 	    }
 #endif
 	    // Index the prefixed term.
-	    if (!m_ts->prefix.empty()) {
-		m_ts->doc.add_posting(m_ts->prefix + term, pos, m_ts->wdfinc);
+	    if (!m_ts->ft.pfx.empty()) {
+		m_ts->doc.add_posting(m_ts->ft.pfx + term, pos, 
+                                      m_ts->ft.wdfinc);
 	    }
 	    return true;
 	} XCATCHERROR(ermsg);
@ -1168,7 +1158,7 @@ public:
 	    return;
 	}

-	m_ts->doc.add_posting(m_ts->prefix + page_break_term, pos);
+	m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
 	if (pos == m_lastpagepos) {
 	    m_pageincr++;
 	    LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n", 
@ -1351,15 +1341,15 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 		LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n", 
 			 meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
 			 meta_it->second.c_str()));
-		splitter.setprefix(ftp->pfx);
-		splitter.setwdfinc(ftp->wdfinc);
+                splitter.setTraits(*ftp);
 		if (!splitter.text_to_words(meta_it->second))
 		    LOGDEB(("Db::addOrUpdate: split failed for %s\n", 
 			    meta_it->first.c_str()));
 	    }
 	}
-	splitter.setprefix(string());
-	splitter.setwdfinc(1);
+
+        // Reset to no prefix and default params
+        splitter.setTraits(FieldTraits());

 	if (splitter.curpos < baseTextPosition)
 	    splitter.basepos = baseTextPosition;
@ -1634,8 +1624,7 @@ bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi,
 	LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n", 
 		 meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
 		 meta_it->second.c_str()));
-	splitter->setprefix(ftp->pfx);
-	splitter->setwdfinc(ftp->wdfinc);
+	splitter->setTraits(*ftp);
 	if (!splitter->text_to_words(meta_it->second))
 	    LOGDEB(("Db::xattrOnly: split failed for %s\n", 
 		    meta_it->first.c_str()));
--- a/src/sampleconf/fields
+++ b/src/sampleconf/fields
@ -29,6 +29,10 @@
 # (NOT CURRENTLY IMPLEMENTED) would automatically boost the weight of a
 # caption-based field query (ie: caption:mytitle or title:mytitle) at query
 # time.
+#
+# The pfxonly attribute can also be set on entries to express that terms
+# from the field should be indexed only with a prefix (in general, field
+# terms are indexed both with and without a prefix).

 # The following ones are probably hard-coded in the c code, can't change at
 # all.
@ -46,6 +50,7 @@ abstract = XS
 filename = XSFN
 rclUnsplitFN = XSFS
 xapyear = Y
+recipient = XTO

 # Extension examples. These are actually used by default by Recoll, you can
 # add your own to search for fields produced by the filters and not handled
@ -57,7 +62,6 @@ xapyear = Y
 # I hereby commit to not using XY for Recoll:
 # *** USE XY for beginning your local prefixes *** ie:
 # myfield = XYMYPREF
-recipient = XTO

 [stored]
 ############################