Changed new param name storerawtext->storedoctext. + comments

This commit is contained in:
Jean-Francois Dockes 2018-01-02 19:23:12 +01:00
parent b4493ed9e1
commit bb810f9ceb
7 changed files with 22 additions and 17 deletions

View File

@ -64,11 +64,10 @@ bool o_index_stripchars = true;
// instead of building them from index position data. Has become
// necessary for versions of Xapian 1.6, which have dropped support
// for the chert index format, and adopted a setup which renders our
// use of positions list unacceptably slow in cases. 'raw' text here
// means that the text is not stripped of upper-case, diacritics, or
// punctuation signs. It is still translated from its original format
// to UTF-8 plain text.
bool o_index_storerawtext = false;
// use of positions list unacceptably slow in cases. The text just
// translated from its original format to UTF-8 plain text, and is not
// stripped of upper-case, diacritics, or punctuation signs.
bool o_index_storedoctext = false;
bool o_uptodate_test_use_mtime = false;
@ -401,7 +400,7 @@ bool RclConfig::updateMainConfig()
static int m_index_stripchars_init = 0;
if (!m_index_stripchars_init) {
getConfParam("indexStripChars", &o_index_stripchars);
getConfParam("indexStoreRawText", &o_index_storerawtext);
getConfParam("indexStoreDocText", &o_index_storedoctext);
getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
m_index_stripchars_init = 1;
}

View File

@ -446,7 +446,7 @@ extern bool o_index_stripchars;
// means that the text is not stripped of upper-case, diacritics, or
// punctuation signs. It is still translated from its original format
// to UTF-8 plain text.
extern bool o_index_storerawtext;
extern bool o_index_storedoctext;
// This global variable defines if we use mtime instead of ctime for
// up-to-date tests. This is mostly incompatible with xattr indexing,

View File

@ -242,9 +242,9 @@ int Query::Native::abstractFromText(
return ABSRES_ERROR;
}
// tryout the xapian internal method.
#if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2) && \
(defined(RAWTEXT_IN_DATA) || defined(RAWTEXT_IN_VALUE))
// Tryout the Xapian internal method.
string snippet = xmset.snippet(rawtext);
LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n");
#endif

View File

@ -649,7 +649,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
LOGABS("makeAbstract:" << chron.millis() << "mS: mxttloccs " <<
maxtotaloccs << " ctxwords " << ctxwords << "\n");
if (o_index_storerawtext) {
if (o_index_storedoctext) {
return abstractFromText(ndb, docid, matchedTerms, byQ,
totalweight, ctxwords, maxtotaloccs, vabs,
chron);

View File

@ -794,8 +794,10 @@ bool Db::open(OpenMode mode, OpenError *error)
{
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
Xapian::DB_CREATE_OR_OVERWRITE;
if (::access(dir.c_str(), 0) != 0) {
// New index. use a stub to force using Chert
if (!o_index_storedoctext && ::access(dir.c_str(), 0) != 0) {
// New index. use a stub to force using Chert. No
// sense in doing this if we are storing the text
// anyway.
string stub = path_cat(m_config->getConfDir(),
"xapian.stub");
FILE *fp = fopen(stub.c_str(), "w");
@ -1463,7 +1465,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
LOGDEB("Db::addOrUpdate: split failed for main text\n");
} else {
#ifdef RAWTEXT_IN_VALUE
if (o_index_storerawtext) {
if (o_index_storedoctext) {
ZLibUtBuf buf;
deflateToBuf(doc.text.c_str(), doc.text.size(), buf);
string tt;
@ -1683,7 +1685,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
}
#ifdef RAWTEXT_IN_DATA
if (o_index_storerawtext) {
if (o_index_storedoctext) {
RECORD_APPEND(record, string("RAWTEXT"),
neutchars(doc.text, cstr_nc));
}

View File

@ -231,7 +231,7 @@ membermaxkbs = 50000
# implies an index reset.</descr></var>
indexStripChars = 1
# <var name="indexStoreRawText" type="bool"><brief>Decide if we store the
# <var name="indexStoreDocText" type="bool"><brief>Decide if we store the
# documents' text content in the index.</brief><descr>Storing the text
# allows extracting snippets from it at query time,
# instead of building them from index position data. This Has become
@ -244,7 +244,7 @@ indexStripChars = 1
# but also allows for nicer snippets, so it may be worth enabling it even
# if not strictly needed for performance if you can afford the space.
# </desc></var>
indexStoreRawText = 0
indexStoreDocText = 0
# <var name="nonumbers" type="bool"><brief>Decides if terms will be
# generated for numbers.</brief><descr>For example "123", "1.5e6",

View File

@ -33,7 +33,9 @@ struct HighlightData {
std::vector<std::vector<std::string> > ugroups;
/** Processed/expanded terms and groups. Used for looking for
* regions to highlight. Terms are just groups with 1 entry. All
* regions to highlight. A group can be a PHRASE or NEAR entry (we
* process everything as NEAR to keep things reasonably
* simple. Terms are just groups with 1 entry. All
* terms are transformed to be compatible with index content
* (unaccented and lowercased as needed depending on
* configuration), and the list may include values
@ -45,7 +47,9 @@ struct HighlightData {
/** Index into ugroups for each group. Parallel to groups. As a
* user term or group may generate many processed/expanded terms
* or groups, this is how we relate an expansion to its source.
* or groups, this is how we relate an expansion to its source
* (used, e.g. for generating anchors for walking search matches
* in the preview window).
*/
std::vector<size_t> grpsugidx;