From 21adaca229abfd8e8e5abcaa085b986c7b6b6cc1 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sun, 8 Apr 2018 10:54:09 +0200 Subject: [PATCH] Add parameter to truncate all document text to specified length --- src/rcldb/rcldb.cpp | 5 +++++ src/rcldb/rcldb.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index eb521131..68bc1301 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -876,6 +876,7 @@ Db::Db(const RclConfig *cfp) m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc); m_config->getConfParam("idxflushmb", &m_flushMb); m_config->getConfParam("idxmetastoredlen", &m_idxMetaStoredLen); + m_config->getConfParam("idxtexttruncatelen", &m_idxTextTruncateLen); } } @@ -1480,6 +1481,10 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) } } else { + if (m_idxTextTruncateLen > 0) { + doc.text = truncate_to_word(doc.text, m_idxTextTruncateLen); + } + // If the ipath is like a path, index the last element. This is // for compound documents like zip and chm for which the filter // uses the file path as ipath. diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 3e249fc8..d27e5da9 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -509,6 +509,8 @@ private: // text when indexing. It only has an influence on the size of the // db as we are free to shorten it again when displaying int m_idxAbsTruncLen; + // Document text truncation length + int m_idxTextTruncateLen{0}; // This is the size of the abstract that we synthetize out of query // term contexts at *query time* int m_synthAbsLen;