Changed new param name storerawtext->storedoctext. + comments

This commit is contained in:
Jean-Francois Dockes 2018-01-02 19:23:12 +01:00
parent b4493ed9e1
commit bb810f9ceb
7 changed files with 22 additions and 17 deletions

View File

@ -64,11 +64,10 @@ bool o_index_stripchars = true;
// instead of building them from index position data. Has become // instead of building them from index position data. Has become
// necessary for versions of Xapian 1.6, which have dropped support // necessary for versions of Xapian 1.6, which have dropped support
// for the chert index format, and adopted a setup which renders our // for the chert index format, and adopted a setup which renders our
// use of positions list unacceptably slow in cases. 'raw' text here // use of positions list unacceptably slow in cases. The text just
// means that the text is not stripped of upper-case, diacritics, or // translated from its original format to UTF-8 plain text, and is not
// punctuation signs. It is still translated from its original format // stripped of upper-case, diacritics, or punctuation signs.
// to UTF-8 plain text. bool o_index_storedoctext = false;
bool o_index_storerawtext = false;
bool o_uptodate_test_use_mtime = false; bool o_uptodate_test_use_mtime = false;
@ -401,7 +400,7 @@ bool RclConfig::updateMainConfig()
static int m_index_stripchars_init = 0; static int m_index_stripchars_init = 0;
if (!m_index_stripchars_init) { if (!m_index_stripchars_init) {
getConfParam("indexStripChars", &o_index_stripchars); getConfParam("indexStripChars", &o_index_stripchars);
getConfParam("indexStoreRawText", &o_index_storerawtext); getConfParam("indexStoreDocText", &o_index_storedoctext);
getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime); getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
m_index_stripchars_init = 1; m_index_stripchars_init = 1;
} }

View File

@ -446,7 +446,7 @@ extern bool o_index_stripchars;
// means that the text is not stripped of upper-case, diacritics, or // means that the text is not stripped of upper-case, diacritics, or
// punctuation signs. It is still translated from its original format // punctuation signs. It is still translated from its original format
// to UTF-8 plain text. // to UTF-8 plain text.
extern bool o_index_storerawtext; extern bool o_index_storedoctext;
// This global variable defines if we use mtime instead of ctime for // This global variable defines if we use mtime instead of ctime for
// up-to-date tests. This is mostly incompatible with xattr indexing, // up-to-date tests. This is mostly incompatible with xattr indexing,

View File

@ -242,9 +242,9 @@ int Query::Native::abstractFromText(
return ABSRES_ERROR; return ABSRES_ERROR;
} }
// tryout the xapian internal method.
#if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2) && \ #if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2) && \
(defined(RAWTEXT_IN_DATA) || defined(RAWTEXT_IN_VALUE)) (defined(RAWTEXT_IN_DATA) || defined(RAWTEXT_IN_VALUE))
// Tryout the Xapian internal method.
string snippet = xmset.snippet(rawtext); string snippet = xmset.snippet(rawtext);
LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n"); LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n");
#endif #endif

View File

@ -649,7 +649,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
LOGABS("makeAbstract:" << chron.millis() << "mS: mxttloccs " << LOGABS("makeAbstract:" << chron.millis() << "mS: mxttloccs " <<
maxtotaloccs << " ctxwords " << ctxwords << "\n"); maxtotaloccs << " ctxwords " << ctxwords << "\n");
if (o_index_storerawtext) { if (o_index_storedoctext) {
return abstractFromText(ndb, docid, matchedTerms, byQ, return abstractFromText(ndb, docid, matchedTerms, byQ,
totalweight, ctxwords, maxtotaloccs, vabs, totalweight, ctxwords, maxtotaloccs, vabs,
chron); chron);

View File

@ -794,8 +794,10 @@ bool Db::open(OpenMode mode, OpenError *error)
{ {
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN : int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
Xapian::DB_CREATE_OR_OVERWRITE; Xapian::DB_CREATE_OR_OVERWRITE;
if (::access(dir.c_str(), 0) != 0) { if (!o_index_storedoctext && ::access(dir.c_str(), 0) != 0) {
// New index. use a stub to force using Chert // New index. use a stub to force using Chert. No
// sense in doing this if we are storing the text
// anyway.
string stub = path_cat(m_config->getConfDir(), string stub = path_cat(m_config->getConfDir(),
"xapian.stub"); "xapian.stub");
FILE *fp = fopen(stub.c_str(), "w"); FILE *fp = fopen(stub.c_str(), "w");
@ -1463,7 +1465,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
LOGDEB("Db::addOrUpdate: split failed for main text\n"); LOGDEB("Db::addOrUpdate: split failed for main text\n");
} else { } else {
#ifdef RAWTEXT_IN_VALUE #ifdef RAWTEXT_IN_VALUE
if (o_index_storerawtext) { if (o_index_storedoctext) {
ZLibUtBuf buf; ZLibUtBuf buf;
deflateToBuf(doc.text.c_str(), doc.text.size(), buf); deflateToBuf(doc.text.c_str(), doc.text.size(), buf);
string tt; string tt;
@ -1683,7 +1685,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
} }
#ifdef RAWTEXT_IN_DATA #ifdef RAWTEXT_IN_DATA
if (o_index_storerawtext) { if (o_index_storedoctext) {
RECORD_APPEND(record, string("RAWTEXT"), RECORD_APPEND(record, string("RAWTEXT"),
neutchars(doc.text, cstr_nc)); neutchars(doc.text, cstr_nc));
} }

View File

@ -231,7 +231,7 @@ membermaxkbs = 50000
# implies an index reset.</descr></var> # implies an index reset.</descr></var>
indexStripChars = 1 indexStripChars = 1
# <var name="indexStoreRawText" type="bool"><brief>Decide if we store the # <var name="indexStoreDocText" type="bool"><brief>Decide if we store the
# documents' text content in the index.</brief><descr>Storing the text # documents' text content in the index.</brief><descr>Storing the text
# allows extracting snippets from it at query time, # allows extracting snippets from it at query time,
# instead of building them from index position data. This Has become # instead of building them from index position data. This Has become
@ -244,7 +244,7 @@ indexStripChars = 1
# but also allows for nicer snippets, so it may be worth enabling it even # but also allows for nicer snippets, so it may be worth enabling it even
# if not strictly needed for performance if you can afford the space. # if not strictly needed for performance if you can afford the space.
# </desc></var> # </desc></var>
indexStoreRawText = 0 indexStoreDocText = 0
# <var name="nonumbers" type="bool"><brief>Decides if terms will be # <var name="nonumbers" type="bool"><brief>Decides if terms will be
# generated for numbers.</brief><descr>For example "123", "1.5e6", # generated for numbers.</brief><descr>For example "123", "1.5e6",

View File

@ -33,7 +33,9 @@ struct HighlightData {
std::vector<std::vector<std::string> > ugroups; std::vector<std::vector<std::string> > ugroups;
/** Processed/expanded terms and groups. Used for looking for /** Processed/expanded terms and groups. Used for looking for
* regions to highlight. Terms are just groups with 1 entry. All * regions to highlight. A group can be a PHRASE or NEAR entry (we
* process everything as NEAR to keep things reasonably
* simple. Terms are just groups with 1 entry. All
* terms are transformed to be compatible with index content * terms are transformed to be compatible with index content
* (unaccented and lowercased as needed depending on * (unaccented and lowercased as needed depending on
* configuration), and the list may include values * configuration), and the list may include values
@ -45,7 +47,9 @@ struct HighlightData {
/** Index into ugroups for each group. Parallel to groups. As a /** Index into ugroups for each group. Parallel to groups. As a
* user term or group may generate many processed/expanded terms * user term or group may generate many processed/expanded terms
* or groups, this is how we relate an expansion to its source. * or groups, this is how we relate an expansion to its source
* (used, e.g. for generating anchors for walking search matches
* in the preview window).
*/ */
std::vector<size_t> grpsugidx; std::vector<size_t> grpsugidx;