From 60d3ba11acef2f00bfb97f6146a0a1ead2eccd19 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 26 Mar 2014 18:43:49 +0100 Subject: [PATCH] add parameter to set max stored length of metadata fields. Previously fixed at 150. Fixes issue #178 --- src/VERSION | 2 +- src/doc/user/usermanual.sgml | 10 +++++++ src/rcldb/rcldb.cpp | 51 ++++++++++++++++++++--------------- src/rcldb/rcldb.h | 2 ++ src/sampleconf/recoll.conf.in | 4 +++ 5 files changed, 47 insertions(+), 22 deletions(-) diff --git a/src/VERSION b/src/VERSION index 34269ede..e54f3135 100644 --- a/src/VERSION +++ b/src/VERSION @@ -1 +1 @@ -1.19.11p1 +1.19.12 diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index f05f6e22..284c0e0b 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -5594,6 +5594,16 @@ mondelaypatterns = *.log:20 "this one has spaces*:10" + idxmetastoredlen + Maximum stored length for metadata + fields. This does not affect indexing (the whole field is + processed anyway), just the amount of data stored in the + index for the purpose of displaying fields inside result + lists or previews. The default value is 150 bytes which + may be too low if you have custom fields. + + + aspellLanguage Language definitions to use when creating the aspell dictionary. The value must match a set of diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 2e6957a3..cdfb9aa8 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -694,7 +694,7 @@ bool Db::o_inPlaceReset; Db::Db(const RclConfig *cfp) : m_ndb(0), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0), - m_occtxtsz(0), m_occFirstCheck(1), + m_occtxtsz(0), m_occFirstCheck(1), m_idxMetaStoredLen(150), m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4), m_flushMb(-1), m_maxFsOccupPc(0) { @@ -713,6 +713,7 @@ Db::Db(const RclConfig *cfp) if (m_config) { m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc); m_config->getConfParam("idxflushmb", &m_flushMb); + m_config->getConfParam("idxmetastoredlen", &m_idxMetaStoredLen); } } @@ -1469,16 +1470,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) if (!doc.ipath.empty()) RECORD_APPEND(record, Doc::keyipt, doc.ipath); - doc.meta[Doc::keytt] = - neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc); - if (!doc.meta[Doc::keytt].empty()) - RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]); - - trimstring(doc.meta[Doc::keykw], " \t\r\n"); - doc.meta[Doc::keykw] = - neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc); - // No need to explicitly append the keywords, this will be done by - // the "stored" loop + // Fields from the Meta array. Handle title specially because it has a + // different name inside the data record (history...) + string& ttref = doc.meta[Doc::keytt]; + ttref = neutchars(truncate_to_word(ttref, m_idxMetaStoredLen), cstr_nc); + if (!ttref.empty()) { + RECORD_APPEND(record, cstr_caption, ttref); + ttref.clear(); + } // If abstract is empty, we make up one with the beginning of the // document. This is then not indexed, but part of the doc data so @@ -1487,25 +1486,34 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) bool syntabs = false; // Note that the map accesses by operator[] create empty entries if they // don't exist yet. - trimstring(doc.meta[Doc::keyabs], " \t\r\n"); - if (doc.meta[Doc::keyabs].empty()) { + string& absref = doc.meta[Doc::keyabs]; + trimstring(absref, " \t\r\n"); + if (absref.empty()) { syntabs = true; if (!doc.text.empty()) - doc.meta[Doc::keyabs] = cstr_syntAbs + - neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), cstr_nc); + absref = cstr_syntAbs + + neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), + cstr_nc); } else { - doc.meta[Doc::keyabs] = - neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen), - cstr_nc); + absref = neutchars(truncate_to_word(absref, m_idxAbsTruncLen), + cstr_nc); } + // Do the append here to avoid the different truncation done + // in the regular "stored" loop + if (!absref.empty()) { + RECORD_APPEND(record, Doc::keyabs, absref); + absref.clear(); + } + // Append all regular "stored" meta fields const set& stored = m_config->getStoredFields(); for (set::const_iterator it = stored.begin(); it != stored.end(); it++) { string nm = m_config->fieldCanon(*it); if (!doc.meta[nm].empty()) { string value = - neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc); + neutchars(truncate_to_word(doc.meta[nm], + m_idxMetaStoredLen), cstr_nc); RECORD_APPEND(record, nm, value); } } @@ -1611,8 +1619,9 @@ bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi, it != stored.end(); it++) { string nm = m_rcldb->m_config->fieldCanon(*it); if (doc.getmeta(nm, 0)) { - string value = - neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc); + string value = neutchars( + truncate_to_word(doc.meta[nm], m_rcldb->m_idxMetaStoredLen), + cstr_nc); datadic.set(nm, value, ""); } } diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index aad2bc36..bc9b5350 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -458,6 +458,8 @@ private: * after init */ // Stop terms: those don't get indexed. StopList m_stops; + // Truncation length for stored meta fields + int m_idxMetaStoredLen; // This is how long an abstract we keep or build from beginning of // text when indexing. It only has an influence on the size of the // db as we are free to shorten it again when displaying diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in index 7355deca..dcbec289 100644 --- a/src/sampleconf/recoll.conf.in +++ b/src/sampleconf/recoll.conf.in @@ -224,6 +224,10 @@ filtermaxseconds = 1200 # bigger db # idxabsmlen = 250 +# Truncation length of stored metadata fields. This does not affect +# indexing, just what can be displayed inside results. +# idxmetastoredlen = 150 + # Language definitions to use when creating the aspell dictionary. # The value must match a set of aspell language definition files. # You can type "aspell config" to see where these are installed.