add parameter to set max stored length of metadata fields. Previously fixed at 150. Fixes issue #178
This commit is contained in:
parent
4e8aa756cb
commit
60d3ba11ac
@ -1 +1 @@
|
||||
1.19.11p1
|
||||
1.19.12
|
||||
|
||||
@ -5594,6 +5594,16 @@ mondelaypatterns = *.log:20 "this one has spaces*:10"
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>idxmetastoredlen</varname></term>
|
||||
<listitem><para>Maximum stored length for metadata
|
||||
fields. This does not affect indexing (the whole field is
|
||||
processed anyway), just the amount of data stored in the
|
||||
index for the purpose of displaying fields inside result
|
||||
lists or previews. The default value is 150 bytes which
|
||||
may be too low if you have custom fields.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry><term><varname>aspellLanguage</varname></term>
|
||||
<listitem><para>Language definitions to use when creating
|
||||
the aspell dictionary. The value must match a set of
|
||||
|
||||
@ -694,7 +694,7 @@ bool Db::o_inPlaceReset;
|
||||
|
||||
Db::Db(const RclConfig *cfp)
|
||||
: m_ndb(0), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
|
||||
m_occtxtsz(0), m_occFirstCheck(1),
|
||||
m_occtxtsz(0), m_occFirstCheck(1), m_idxMetaStoredLen(150),
|
||||
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
|
||||
m_flushMb(-1), m_maxFsOccupPc(0)
|
||||
{
|
||||
@ -713,6 +713,7 @@ Db::Db(const RclConfig *cfp)
|
||||
if (m_config) {
|
||||
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
||||
m_config->getConfParam("idxflushmb", &m_flushMb);
|
||||
m_config->getConfParam("idxmetastoredlen", &m_idxMetaStoredLen);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1469,16 +1470,14 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||
if (!doc.ipath.empty())
|
||||
RECORD_APPEND(record, Doc::keyipt, doc.ipath);
|
||||
|
||||
doc.meta[Doc::keytt] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc);
|
||||
if (!doc.meta[Doc::keytt].empty())
|
||||
RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]);
|
||||
|
||||
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
||||
doc.meta[Doc::keykw] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
|
||||
// No need to explicitly append the keywords, this will be done by
|
||||
// the "stored" loop
|
||||
// Fields from the Meta array. Handle title specially because it has a
|
||||
// different name inside the data record (history...)
|
||||
string& ttref = doc.meta[Doc::keytt];
|
||||
ttref = neutchars(truncate_to_word(ttref, m_idxMetaStoredLen), cstr_nc);
|
||||
if (!ttref.empty()) {
|
||||
RECORD_APPEND(record, cstr_caption, ttref);
|
||||
ttref.clear();
|
||||
}
|
||||
|
||||
// If abstract is empty, we make up one with the beginning of the
|
||||
// document. This is then not indexed, but part of the doc data so
|
||||
@ -1487,25 +1486,34 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||
bool syntabs = false;
|
||||
// Note that the map accesses by operator[] create empty entries if they
|
||||
// don't exist yet.
|
||||
trimstring(doc.meta[Doc::keyabs], " \t\r\n");
|
||||
if (doc.meta[Doc::keyabs].empty()) {
|
||||
string& absref = doc.meta[Doc::keyabs];
|
||||
trimstring(absref, " \t\r\n");
|
||||
if (absref.empty()) {
|
||||
syntabs = true;
|
||||
if (!doc.text.empty())
|
||||
doc.meta[Doc::keyabs] = cstr_syntAbs +
|
||||
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), cstr_nc);
|
||||
absref = cstr_syntAbs +
|
||||
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen),
|
||||
cstr_nc);
|
||||
} else {
|
||||
doc.meta[Doc::keyabs] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
||||
cstr_nc);
|
||||
absref = neutchars(truncate_to_word(absref, m_idxAbsTruncLen),
|
||||
cstr_nc);
|
||||
}
|
||||
// Do the append here to avoid the different truncation done
|
||||
// in the regular "stored" loop
|
||||
if (!absref.empty()) {
|
||||
RECORD_APPEND(record, Doc::keyabs, absref);
|
||||
absref.clear();
|
||||
}
|
||||
|
||||
// Append all regular "stored" meta fields
|
||||
const set<string>& stored = m_config->getStoredFields();
|
||||
for (set<string>::const_iterator it = stored.begin();
|
||||
it != stored.end(); it++) {
|
||||
string nm = m_config->fieldCanon(*it);
|
||||
if (!doc.meta[nm].empty()) {
|
||||
string value =
|
||||
neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc);
|
||||
neutchars(truncate_to_word(doc.meta[nm],
|
||||
m_idxMetaStoredLen), cstr_nc);
|
||||
RECORD_APPEND(record, nm, value);
|
||||
}
|
||||
}
|
||||
@ -1611,8 +1619,9 @@ bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi,
|
||||
it != stored.end(); it++) {
|
||||
string nm = m_rcldb->m_config->fieldCanon(*it);
|
||||
if (doc.getmeta(nm, 0)) {
|
||||
string value =
|
||||
neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc);
|
||||
string value = neutchars(
|
||||
truncate_to_word(doc.meta[nm], m_rcldb->m_idxMetaStoredLen),
|
||||
cstr_nc);
|
||||
datadic.set(nm, value, "");
|
||||
}
|
||||
}
|
||||
|
||||
@ -458,6 +458,8 @@ private:
|
||||
* after init */
|
||||
// Stop terms: those don't get indexed.
|
||||
StopList m_stops;
|
||||
// Truncation length for stored meta fields
|
||||
int m_idxMetaStoredLen;
|
||||
// This is how long an abstract we keep or build from beginning of
|
||||
// text when indexing. It only has an influence on the size of the
|
||||
// db as we are free to shorten it again when displaying
|
||||
|
||||
@ -224,6 +224,10 @@ filtermaxseconds = 1200
|
||||
# bigger db
|
||||
# idxabsmlen = 250
|
||||
|
||||
# Truncation length of stored metadata fields. This does not affect
|
||||
# indexing, just what can be displayed inside results.
|
||||
# idxmetastoredlen = 150
|
||||
|
||||
# Language definitions to use when creating the aspell dictionary.
|
||||
# The value must match a set of aspell language definition files.
|
||||
# You can type "aspell config" to see where these are installed.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user