Changed new param name storerawtext->storedoctext. + comments

2018-01-02 19:23:12 +01:00 · 2018-01-02 19:23:12 +01:00 · bb810f9ceb
commit bb810f9ceb
parent b4493ed9e1
7 changed files with 22 additions and 17 deletions
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -64,11 +64,10 @@ bool o_index_stripchars = true;
 // instead of building them from index position data. Has become
 // necessary for versions of Xapian 1.6, which have dropped support
 // for the chert index format, and adopted a setup which renders our
-// use of positions list unacceptably slow in cases. 'raw' text here
-// means that the text is not stripped of upper-case, diacritics, or
-// punctuation signs. It is still translated from its original format
-// to UTF-8 plain text.
-bool o_index_storerawtext = false;
+// use of positions list unacceptably slow in cases. The text just
+// translated from its original format to UTF-8 plain text, and is not
+// stripped of upper-case, diacritics, or punctuation signs.
+bool o_index_storedoctext = false;

 bool o_uptodate_test_use_mtime = false;

@ -401,7 +400,7 @@ bool RclConfig::updateMainConfig()
    static int m_index_stripchars_init = 0;
    if (!m_index_stripchars_init) {
 	getConfParam("indexStripChars", &o_index_stripchars);
-        getConfParam("indexStoreRawText", &o_index_storerawtext);
+        getConfParam("indexStoreDocText", &o_index_storedoctext);
        getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
 	m_index_stripchars_init = 1;
    }
--- a/src/common/rclconfig.h
+++ b/src/common/rclconfig.h
@ -446,7 +446,7 @@ extern bool o_index_stripchars;
 // means that the text is not stripped of upper-case, diacritics, or
 // punctuation signs. It is still translated from its original format
 // to UTF-8 plain text.
-extern bool o_index_storerawtext;
+extern bool o_index_storedoctext;

 // This global variable defines if we use mtime instead of ctime for
 // up-to-date tests. This is mostly incompatible with xattr indexing,
--- a/src/rcldb/rclabsfromtext.cpp
+++ b/src/rcldb/rclabsfromtext.cpp
@ -242,9 +242,9 @@ int Query::Native::abstractFromText(
        return ABSRES_ERROR;
    }

-    // tryout the xapian internal method.
 #if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2)  && \
    (defined(RAWTEXT_IN_DATA) || defined(RAWTEXT_IN_VALUE))
+    // Tryout the Xapian internal method.
    string snippet = xmset.snippet(rawtext);
    LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n");
 #endif
--- a/src/rcldb/rclabstract.cpp
+++ b/src/rcldb/rclabstract.cpp
@ -649,7 +649,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
    LOGABS("makeAbstract:" << chron.millis() << "mS: mxttloccs " <<
           maxtotaloccs << " ctxwords " << ctxwords << "\n");

-    if (o_index_storerawtext) {
+    if (o_index_storedoctext) {
        return abstractFromText(ndb, docid, matchedTerms, byQ,
                                totalweight, ctxwords, maxtotaloccs, vabs,
                                chron);
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -794,8 +794,10 @@ bool Db::open(OpenMode mode, OpenError *error)
 	    {
 		int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
 		    Xapian::DB_CREATE_OR_OVERWRITE;
-                if (::access(dir.c_str(), 0) != 0) {
-                    // New index. use a stub to force using Chert
+                if (!o_index_storedoctext && ::access(dir.c_str(), 0) != 0) {
+                    // New index. use a stub to force using Chert. No
+                    // sense in doing this if we are storing the text
+                    // anyway.
                    string stub = path_cat(m_config->getConfDir(),
                                           "xapian.stub");
                    FILE *fp = fopen(stub.c_str(), "w");
@ -1463,7 +1465,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 	    LOGDEB("Db::addOrUpdate: split failed for main text\n");
        } else {
 #ifdef RAWTEXT_IN_VALUE
-            if (o_index_storerawtext) {
+            if (o_index_storedoctext) {
                ZLibUtBuf buf;
                deflateToBuf(doc.text.c_str(), doc.text.size(), buf);
                string tt;
@ -1683,7 +1685,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 	}

 #ifdef RAWTEXT_IN_DATA
-        if (o_index_storerawtext) {
+        if (o_index_storedoctext) {
            RECORD_APPEND(record, string("RAWTEXT"),
                          neutchars(doc.text, cstr_nc));
        }
--- a/src/sampleconf/recoll.conf
+++ b/src/sampleconf/recoll.conf
@ -231,7 +231,7 @@ membermaxkbs = 50000
 # implies an index reset.</descr></var>
 indexStripChars = 1

-# <var name="indexStoreRawText" type="bool"><brief>Decide if we store the
+# <var name="indexStoreDocText" type="bool"><brief>Decide if we store the
 # documents' text content in the index.</brief><descr>Storing the text
 # allows extracting snippets from it at query time, 
 # instead of building them from index position data. This Has become
@ -244,7 +244,7 @@ indexStripChars = 1
 # but also allows for nicer snippets, so it may be worth enabling it even
 # if not strictly needed for performance if you can afford the space.
 # </desc></var>
-indexStoreRawText = 0
+indexStoreDocText = 0

 # <var name="nonumbers" type="bool"><brief>Decides if terms will be
 # generated for numbers.</brief><descr>For example "123", "1.5e6",
--- a/src/utils/hldata.h
+++ b/src/utils/hldata.h
@ -33,7 +33,9 @@ struct HighlightData {
    std::vector<std::vector<std::string> > ugroups;

    /** Processed/expanded terms and groups. Used for looking for
-     * regions to highlight. Terms are just groups with 1 entry. All
+     * regions to highlight. A group can be a PHRASE or NEAR entry (we
+     * process everything as NEAR to keep things reasonably
+     * simple. Terms are just groups with 1 entry. All
     * terms are transformed to be compatible with index content
     * (unaccented and lowercased as needed depending on
     * configuration), and the list may include values
@ -45,7 +47,9 @@ struct HighlightData {

    /** Index into ugroups for each group. Parallel to groups. As a
     * user term or group may generate many processed/expanded terms
-     * or groups, this is how we relate an expansion to its source.
+     * or groups, this is how we relate an expansion to its source
+     * (used, e.g. for generating anchors for walking search matches
+     * in the preview window).
     */
    std::vector<size_t> grpsugidx;