Changed new param name storerawtext->storedoctext. + comments
This commit is contained in:
parent
b4493ed9e1
commit
bb810f9ceb
@ -64,11 +64,10 @@ bool o_index_stripchars = true;
|
|||||||
// instead of building them from index position data. Has become
|
// instead of building them from index position data. Has become
|
||||||
// necessary for versions of Xapian 1.6, which have dropped support
|
// necessary for versions of Xapian 1.6, which have dropped support
|
||||||
// for the chert index format, and adopted a setup which renders our
|
// for the chert index format, and adopted a setup which renders our
|
||||||
// use of positions list unacceptably slow in cases. 'raw' text here
|
// use of positions list unacceptably slow in cases. The text just
|
||||||
// means that the text is not stripped of upper-case, diacritics, or
|
// translated from its original format to UTF-8 plain text, and is not
|
||||||
// punctuation signs. It is still translated from its original format
|
// stripped of upper-case, diacritics, or punctuation signs.
|
||||||
// to UTF-8 plain text.
|
bool o_index_storedoctext = false;
|
||||||
bool o_index_storerawtext = false;
|
|
||||||
|
|
||||||
bool o_uptodate_test_use_mtime = false;
|
bool o_uptodate_test_use_mtime = false;
|
||||||
|
|
||||||
@ -401,7 +400,7 @@ bool RclConfig::updateMainConfig()
|
|||||||
static int m_index_stripchars_init = 0;
|
static int m_index_stripchars_init = 0;
|
||||||
if (!m_index_stripchars_init) {
|
if (!m_index_stripchars_init) {
|
||||||
getConfParam("indexStripChars", &o_index_stripchars);
|
getConfParam("indexStripChars", &o_index_stripchars);
|
||||||
getConfParam("indexStoreRawText", &o_index_storerawtext);
|
getConfParam("indexStoreDocText", &o_index_storedoctext);
|
||||||
getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
|
getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
|
||||||
m_index_stripchars_init = 1;
|
m_index_stripchars_init = 1;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -446,7 +446,7 @@ extern bool o_index_stripchars;
|
|||||||
// means that the text is not stripped of upper-case, diacritics, or
|
// means that the text is not stripped of upper-case, diacritics, or
|
||||||
// punctuation signs. It is still translated from its original format
|
// punctuation signs. It is still translated from its original format
|
||||||
// to UTF-8 plain text.
|
// to UTF-8 plain text.
|
||||||
extern bool o_index_storerawtext;
|
extern bool o_index_storedoctext;
|
||||||
|
|
||||||
// This global variable defines if we use mtime instead of ctime for
|
// This global variable defines if we use mtime instead of ctime for
|
||||||
// up-to-date tests. This is mostly incompatible with xattr indexing,
|
// up-to-date tests. This is mostly incompatible with xattr indexing,
|
||||||
|
|||||||
@ -242,9 +242,9 @@ int Query::Native::abstractFromText(
|
|||||||
return ABSRES_ERROR;
|
return ABSRES_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
// tryout the xapian internal method.
|
|
||||||
#if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2) && \
|
#if 0 && ! (XAPIAN_MAJOR_VERSION <= 1 && XAPIAN_MINOR_VERSION <= 2) && \
|
||||||
(defined(RAWTEXT_IN_DATA) || defined(RAWTEXT_IN_VALUE))
|
(defined(RAWTEXT_IN_DATA) || defined(RAWTEXT_IN_VALUE))
|
||||||
|
// Tryout the Xapian internal method.
|
||||||
string snippet = xmset.snippet(rawtext);
|
string snippet = xmset.snippet(rawtext);
|
||||||
LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n");
|
LOGDEB("SNIPPET: [" << snippet << "] END SNIPPET\n");
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -649,7 +649,7 @@ int Query::Native::makeAbstract(Xapian::docid docid,
|
|||||||
LOGABS("makeAbstract:" << chron.millis() << "mS: mxttloccs " <<
|
LOGABS("makeAbstract:" << chron.millis() << "mS: mxttloccs " <<
|
||||||
maxtotaloccs << " ctxwords " << ctxwords << "\n");
|
maxtotaloccs << " ctxwords " << ctxwords << "\n");
|
||||||
|
|
||||||
if (o_index_storerawtext) {
|
if (o_index_storedoctext) {
|
||||||
return abstractFromText(ndb, docid, matchedTerms, byQ,
|
return abstractFromText(ndb, docid, matchedTerms, byQ,
|
||||||
totalweight, ctxwords, maxtotaloccs, vabs,
|
totalweight, ctxwords, maxtotaloccs, vabs,
|
||||||
chron);
|
chron);
|
||||||
|
|||||||
@ -794,8 +794,10 @@ bool Db::open(OpenMode mode, OpenError *error)
|
|||||||
{
|
{
|
||||||
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
||||||
Xapian::DB_CREATE_OR_OVERWRITE;
|
Xapian::DB_CREATE_OR_OVERWRITE;
|
||||||
if (::access(dir.c_str(), 0) != 0) {
|
if (!o_index_storedoctext && ::access(dir.c_str(), 0) != 0) {
|
||||||
// New index. use a stub to force using Chert
|
// New index. use a stub to force using Chert. No
|
||||||
|
// sense in doing this if we are storing the text
|
||||||
|
// anyway.
|
||||||
string stub = path_cat(m_config->getConfDir(),
|
string stub = path_cat(m_config->getConfDir(),
|
||||||
"xapian.stub");
|
"xapian.stub");
|
||||||
FILE *fp = fopen(stub.c_str(), "w");
|
FILE *fp = fopen(stub.c_str(), "w");
|
||||||
@ -1463,7 +1465,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||||||
LOGDEB("Db::addOrUpdate: split failed for main text\n");
|
LOGDEB("Db::addOrUpdate: split failed for main text\n");
|
||||||
} else {
|
} else {
|
||||||
#ifdef RAWTEXT_IN_VALUE
|
#ifdef RAWTEXT_IN_VALUE
|
||||||
if (o_index_storerawtext) {
|
if (o_index_storedoctext) {
|
||||||
ZLibUtBuf buf;
|
ZLibUtBuf buf;
|
||||||
deflateToBuf(doc.text.c_str(), doc.text.size(), buf);
|
deflateToBuf(doc.text.c_str(), doc.text.size(), buf);
|
||||||
string tt;
|
string tt;
|
||||||
@ -1683,7 +1685,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RAWTEXT_IN_DATA
|
#ifdef RAWTEXT_IN_DATA
|
||||||
if (o_index_storerawtext) {
|
if (o_index_storedoctext) {
|
||||||
RECORD_APPEND(record, string("RAWTEXT"),
|
RECORD_APPEND(record, string("RAWTEXT"),
|
||||||
neutchars(doc.text, cstr_nc));
|
neutchars(doc.text, cstr_nc));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -231,7 +231,7 @@ membermaxkbs = 50000
|
|||||||
# implies an index reset.</descr></var>
|
# implies an index reset.</descr></var>
|
||||||
indexStripChars = 1
|
indexStripChars = 1
|
||||||
|
|
||||||
# <var name="indexStoreRawText" type="bool"><brief>Decide if we store the
|
# <var name="indexStoreDocText" type="bool"><brief>Decide if we store the
|
||||||
# documents' text content in the index.</brief><descr>Storing the text
|
# documents' text content in the index.</brief><descr>Storing the text
|
||||||
# allows extracting snippets from it at query time,
|
# allows extracting snippets from it at query time,
|
||||||
# instead of building them from index position data. This Has become
|
# instead of building them from index position data. This Has become
|
||||||
@ -244,7 +244,7 @@ indexStripChars = 1
|
|||||||
# but also allows for nicer snippets, so it may be worth enabling it even
|
# but also allows for nicer snippets, so it may be worth enabling it even
|
||||||
# if not strictly needed for performance if you can afford the space.
|
# if not strictly needed for performance if you can afford the space.
|
||||||
# </desc></var>
|
# </desc></var>
|
||||||
indexStoreRawText = 0
|
indexStoreDocText = 0
|
||||||
|
|
||||||
# <var name="nonumbers" type="bool"><brief>Decides if terms will be
|
# <var name="nonumbers" type="bool"><brief>Decides if terms will be
|
||||||
# generated for numbers.</brief><descr>For example "123", "1.5e6",
|
# generated for numbers.</brief><descr>For example "123", "1.5e6",
|
||||||
|
|||||||
@ -33,7 +33,9 @@ struct HighlightData {
|
|||||||
std::vector<std::vector<std::string> > ugroups;
|
std::vector<std::vector<std::string> > ugroups;
|
||||||
|
|
||||||
/** Processed/expanded terms and groups. Used for looking for
|
/** Processed/expanded terms and groups. Used for looking for
|
||||||
* regions to highlight. Terms are just groups with 1 entry. All
|
* regions to highlight. A group can be a PHRASE or NEAR entry (we
|
||||||
|
* process everything as NEAR to keep things reasonably
|
||||||
|
* simple. Terms are just groups with 1 entry. All
|
||||||
* terms are transformed to be compatible with index content
|
* terms are transformed to be compatible with index content
|
||||||
* (unaccented and lowercased as needed depending on
|
* (unaccented and lowercased as needed depending on
|
||||||
* configuration), and the list may include values
|
* configuration), and the list may include values
|
||||||
@ -45,7 +47,9 @@ struct HighlightData {
|
|||||||
|
|
||||||
/** Index into ugroups for each group. Parallel to groups. As a
|
/** Index into ugroups for each group. Parallel to groups. As a
|
||||||
* user term or group may generate many processed/expanded terms
|
* user term or group may generate many processed/expanded terms
|
||||||
* or groups, this is how we relate an expansion to its source.
|
* or groups, this is how we relate an expansion to its source
|
||||||
|
* (used, e.g. for generating anchors for walking search matches
|
||||||
|
* in the preview window).
|
||||||
*/
|
*/
|
||||||
std::vector<size_t> grpsugidx;
|
std::vector<size_t> grpsugidx;
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user