make constant lengths for abstracts config params
This commit is contained in:
parent
d76382ce2e
commit
b536c9c46c
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.34 2006-04-30 07:39:09 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.35 2006-09-13 13:53:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -81,6 +81,9 @@ bool DbIndexer::indexDb(bool resetbefore, list<string> *topdirs)
|
||||
// Set the current directory in config so that subsequent
|
||||
// getConfParams() will get local values
|
||||
m_config->setKeyDir(*it);
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db.setAbstractParams(abslen, -1, -1);
|
||||
|
||||
// Set up skipped patterns for this subtree. This probably should be
|
||||
// done in the directory change code in processone() instead.
|
||||
@ -179,6 +182,9 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
|
||||
list<string>::const_iterator it;
|
||||
for (it = filenames.begin(); it != filenames.end();it++) {
|
||||
m_config->setKeyDir(path_getfather(*it));
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db.setAbstractParams(abslen, -1, -1);
|
||||
struct stat stb;
|
||||
if (stat(it->c_str(), &stb) != 0) {
|
||||
LOGERR(("DbIndexer::indexFiles: stat(%s): %s", it->c_str(),
|
||||
@ -228,6 +234,9 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||
flg == FsTreeWalker::FtwDirReturn) {
|
||||
m_config->setKeyDir(fn);
|
||||
int abslen;
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db.setAbstractParams(abslen, -1, -1);
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.16 2006-09-13 08:13:36 dockes Exp $ (C) 2005 Jean-Francois Dockes";
|
||||
static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.17 2006-09-13 13:53:35 dockes Exp $ (C) 2005 Jean-Francois Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -151,6 +151,10 @@ void rwSettings(bool writing)
|
||||
"/Recoll/prefs/query/buildAbstract", Bool, true);
|
||||
SETTING_RW(prefs.queryReplaceAbstract,
|
||||
"/Recoll/prefs/query/replaceAbstract", Bool, false);
|
||||
SETTING_RW(prefs.syntAbsLen, "/Recoll/prefs/query/syntAbsLen",
|
||||
Num, 250);
|
||||
SETTING_RW(prefs.syntAbsCtx, "/Recoll/prefs/query/syntAbsCtx",
|
||||
Num, 4);
|
||||
|
||||
// Ssearch combobox history list
|
||||
if (writing) {
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
#ifndef _GUIUTILS_H_INCLUDED_
|
||||
#define _GUIUTILS_H_INCLUDED_
|
||||
/*
|
||||
* @(#$Id: guiutils.h,v 1.8 2006-09-13 08:13:36 dockes Exp $ (C) 2005 Jean-Francois Dockes
|
||||
* @(#$Id: guiutils.h,v 1.9 2006-09-13 13:53:35 dockes Exp $ (C) 2005 Jean-Francois Dockes
|
||||
* jean-francois.dockes@wanadoo.fr
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -78,6 +78,9 @@ class PrefsPack {
|
||||
// Ignored file types in adv search (startup default)
|
||||
QStringList asearchIgnFilTyps;
|
||||
|
||||
int syntAbsLen;
|
||||
int syntAbsCtx;
|
||||
|
||||
PrefsPack() :
|
||||
showicons(true),
|
||||
respagesize(8),
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.48 2006-09-13 08:13:36 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.49 2006-09-13 13:53:35 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -103,6 +103,7 @@ bool maybeOpenDb(string &reason, bool force)
|
||||
dbdir + " wait for indexing to complete?";
|
||||
return false;
|
||||
}
|
||||
rcldb->setAbstractParams(-1, prefs.syntAbsLen, prefs.syntAbsCtx);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.31 2006-09-13 08:13:36 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclmain.cpp,v 1.32 2006-09-13 13:53:35 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -131,12 +131,6 @@ void RclMain::init()
|
||||
|
||||
nextPageAction->setIconSet(createIconSet("nextpage.png"));
|
||||
prevPageAction->setIconSet(createIconSet("prevpage.png"));
|
||||
|
||||
|
||||
if (prefs.startWithAdvSearchOpen)
|
||||
showAdvSearchDialog();
|
||||
if (prefs.startWithSortToolOpen)
|
||||
showSortDialog();
|
||||
}
|
||||
|
||||
// We also want to get rid of the advanced search form and previews
|
||||
@ -667,7 +661,6 @@ void RclMain::docExpand(int docnum)
|
||||
// We need to insert item here, its not auto-done like when the user types
|
||||
// CR
|
||||
sSearch->queryText->setEditText(text);
|
||||
sSearch->queryText->insertItem(text, 0);
|
||||
sSearch->setAnyTermMode();
|
||||
sSearch->startSimpleSearch();
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.4 2006-09-12 10:11:36 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.5 2006-09-13 13:53:35 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -89,12 +89,20 @@ void SSearch::startSimpleSearch()
|
||||
// the listbox list, The qt listbox doesn't do lru correctly (if
|
||||
// already in the list the new entry would remain at it's place,
|
||||
// not jump at the top as it should
|
||||
LOGDEB3(("Querytext list count %d\n", queryText->count()));
|
||||
// Have to save current text, this will change while we clean up the list
|
||||
QString txt = queryText->currentText();
|
||||
bool changed;
|
||||
do {
|
||||
changed = false;
|
||||
for (int index = 0; index < queryText->count(); index++) {
|
||||
LOGDEB3(("Querytext[%d] = [%s]\n", index,
|
||||
(const char *)(queryText->text(index).utf8())));
|
||||
if (queryText->text(index).length() == 0 ||
|
||||
queryText->text(index) == queryText->currentText()) {
|
||||
QString::compare(queryText->text(index), txt) == 0) {
|
||||
LOGDEB3(("Querytext removing at %d [%s] [%s]\n", index,
|
||||
(const char *)(queryText->text(index).utf8()),
|
||||
(const char *)(txt.utf8())));
|
||||
queryText->removeItem(index);
|
||||
changed = true;
|
||||
break;
|
||||
@ -102,13 +110,14 @@ void SSearch::startSimpleSearch()
|
||||
}
|
||||
} while (changed);
|
||||
// The combobox is set for no insertion, insert here:
|
||||
queryText->insertItem(queryText->currentText(), 0);
|
||||
queryText->insertItem(txt, 0);
|
||||
queryText->setCurrentItem(0);
|
||||
|
||||
// Save the current state of the listbox list to file
|
||||
prefs.ssearchHistory.clear();
|
||||
for (int index = 0; index < queryText->count(); index++)
|
||||
for (int index = 0; index < queryText->count(); index++) {
|
||||
prefs.ssearchHistory.push_back(queryText->text(index).utf8());
|
||||
|
||||
}
|
||||
emit startSearch(sdata);
|
||||
}
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
</property>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout5</cstring>
|
||||
<cstring>layout1</cstring>
|
||||
</property>
|
||||
<hbox>
|
||||
<property name="name">
|
||||
@ -92,7 +92,7 @@
|
||||
</property>
|
||||
<widget class="QLabel">
|
||||
<property name="name">
|
||||
<cstring>textLabel3</cstring>
|
||||
<cstring>textLabel4</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Result list font</string>
|
||||
@ -181,7 +181,6 @@
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
|
||||
<widget class="QCheckBox">
|
||||
<property name="name">
|
||||
<cstring>initStartAdvCB</cstring>
|
||||
@ -204,9 +203,6 @@
|
||||
<bool>false</bool>
|
||||
</property>
|
||||
</widget>
|
||||
|
||||
|
||||
|
||||
</vbox>
|
||||
</widget>
|
||||
</vbox>
|
||||
@ -276,6 +272,97 @@ May be slow for big documents.</string>
|
||||
<string>Do we synthetize an abstract even if the document seemed to have one?</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout16</cstring>
|
||||
</property>
|
||||
<hbox>
|
||||
<property name="name">
|
||||
<cstring>unnamed</cstring>
|
||||
</property>
|
||||
<widget class="QLabel">
|
||||
<property name="name">
|
||||
<cstring>textLabel2</cstring>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy>
|
||||
<hsizetype>5</hsizetype>
|
||||
<vsizetype>5</vsizetype>
|
||||
<horstretch>2</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Synthetic abstract size (characters)</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QSpinBox">
|
||||
<property name="name">
|
||||
<cstring>syntlenSB</cstring>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy>
|
||||
<hsizetype>7</hsizetype>
|
||||
<vsizetype>0</vsizetype>
|
||||
<horstretch>1</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="lineStep">
|
||||
<number>10</number>
|
||||
</property>
|
||||
<property name="minValue">
|
||||
<number>80</number>
|
||||
</property>
|
||||
<property name="maxValue">
|
||||
<number>999</number>
|
||||
</property>
|
||||
<property name="value">
|
||||
<number>250</number>
|
||||
</property>
|
||||
</widget>
|
||||
</hbox>
|
||||
</widget>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout17</cstring>
|
||||
</property>
|
||||
<hbox>
|
||||
<property name="name">
|
||||
<cstring>unnamed</cstring>
|
||||
</property>
|
||||
<widget class="QLabel">
|
||||
<property name="name">
|
||||
<cstring>textLabel3</cstring>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy>
|
||||
<hsizetype>5</hsizetype>
|
||||
<vsizetype>5</vsizetype>
|
||||
<horstretch>1</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Synthetic abstract context words</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QSpinBox">
|
||||
<property name="name">
|
||||
<cstring>syntctxSB</cstring>
|
||||
</property>
|
||||
<property name="maxValue">
|
||||
<number>20</number>
|
||||
</property>
|
||||
<property name="minValue">
|
||||
<number>2</number>
|
||||
</property>
|
||||
<property name="value">
|
||||
<number>4</number>
|
||||
</property>
|
||||
</widget>
|
||||
</hbox>
|
||||
</widget>
|
||||
<spacer>
|
||||
<property name="name">
|
||||
<cstring>spacer2</cstring>
|
||||
@ -310,7 +397,7 @@ May be slow for big documents.</string>
|
||||
</property>
|
||||
<widget class="QLayoutWidget">
|
||||
<property name="name">
|
||||
<cstring>layout12</cstring>
|
||||
<cstring>layout15</cstring>
|
||||
</property>
|
||||
<hbox>
|
||||
<property name="name">
|
||||
@ -479,7 +566,7 @@ May be slow for big documents.</string>
|
||||
</property>
|
||||
<widget class="QLabel">
|
||||
<property name="name">
|
||||
<cstring>textLabel3</cstring>
|
||||
<cstring>textLabel5</cstring>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Active databases</string>
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.3 2006-09-13 08:13:36 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.4 2006-09-13 13:53:35 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -54,6 +54,8 @@ void UIPrefsDialog::init()
|
||||
// Show icons checkbox
|
||||
useIconsCB->setChecked(prefs.showicons);
|
||||
autoSearchCB->setChecked(prefs.autoSearchOnWS);
|
||||
syntlenSB->setValue(prefs.syntAbsLen);
|
||||
syntctxSB->setValue(prefs.syntAbsCtx);
|
||||
|
||||
initStartAdvCB->setChecked(prefs.startWithAdvSearchOpen);
|
||||
initStartSortCB->setChecked(prefs.startWithSortToolOpen);
|
||||
@ -156,6 +158,9 @@ void UIPrefsDialog::accept()
|
||||
prefs.startWithAdvSearchOpen = initStartAdvCB->isChecked();
|
||||
prefs.startWithSortToolOpen = initStartSortCB->isChecked();
|
||||
|
||||
prefs.syntAbsLen = syntlenSB->value();
|
||||
prefs.syntAbsCtx = syntctxSB->value();
|
||||
|
||||
prefs.activeExtraDbs.clear();
|
||||
for (unsigned int i = 0; i < actDbsLB->count(); i++) {
|
||||
QListBoxItem *item = actDbsLB->item(i);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.75 2006-05-09 10:15:14 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.76 2006-09-13 13:53:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -56,17 +56,6 @@ using namespace std;
|
||||
#ifndef NO_NAMESPACES
|
||||
namespace Rcl {
|
||||
#endif
|
||||
// This is how long an abstract we keep or build from beginning of text when
|
||||
// indexing. It only has an influence on the size of the db as we are free
|
||||
// to shorten it again when displaying
|
||||
#define INDEX_ABSTRACT_SIZE 250
|
||||
|
||||
// This is the size of the abstract that we synthetize out of query
|
||||
// term contexts at query time
|
||||
#define MA_ABSTRACT_SIZE 250
|
||||
// This is how many words (context size) we keep around query terms
|
||||
// when building the abstract
|
||||
#define MA_EXTRACT_WIDTH 4
|
||||
|
||||
// Truncate longer path and uniquize with hash . The goal for this is
|
||||
// to avoid xapian max term length limitations, not to gain space (we
|
||||
@ -81,6 +70,7 @@ const static string rclSyntAbs = "?!#@";
|
||||
// ones for indexing or query as there is not much in common.
|
||||
class Native {
|
||||
public:
|
||||
Db *m_db;
|
||||
bool m_isopen;
|
||||
bool m_iswritable;
|
||||
Db::OpenMode m_mode;
|
||||
@ -106,8 +96,9 @@ class Native {
|
||||
Xapian::docid docid,
|
||||
const list<string>& terms);
|
||||
|
||||
Native()
|
||||
: m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
|
||||
Native(Db *db)
|
||||
: m_db(db),
|
||||
m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
|
||||
{ }
|
||||
~Native() {
|
||||
delete enquire;
|
||||
@ -149,9 +140,10 @@ class Native {
|
||||
};
|
||||
|
||||
Db::Db()
|
||||
: m_qOpts(QO_NONE)
|
||||
: m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
||||
m_synthAbsWordCtxLen(4)
|
||||
{
|
||||
m_ndb = new Native;
|
||||
m_ndb = new Native(this);
|
||||
}
|
||||
|
||||
Db::~Db()
|
||||
@ -282,7 +274,7 @@ bool Db::close()
|
||||
LOGDEB(("Rcl:Db: Called xapian flush\n"));
|
||||
}
|
||||
delete m_ndb;
|
||||
m_ndb = new Native;
|
||||
m_ndb = new Native(this);
|
||||
if (m_ndb)
|
||||
return true;
|
||||
} catch (const Xapian::Error &e) {
|
||||
@ -442,6 +434,19 @@ bool dumb_string(const string &in, string &out)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Let our user set the parameters for abstract processing
|
||||
void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
||||
{
|
||||
LOGDEB(("Db::setAbstractParams: trunc %d syntlen %d ctxlen %d\n",
|
||||
idxtrunc, syntlen, syntctxlen));
|
||||
if (idxtrunc > 0 && idxtrunc < 2000)
|
||||
m_idxAbsTruncLen = idxtrunc;
|
||||
if (syntlen > 0 && syntlen < 2000)
|
||||
m_synthAbsLen = syntlen;
|
||||
if (syntctxlen > 0 && syntctxlen < 20)
|
||||
m_synthAbsWordCtxLen = syntctxlen;
|
||||
}
|
||||
|
||||
// Add document in internal form to the database: index the terms in
|
||||
// the title abstract and body and add special terms for file name,
|
||||
// date, mime type ... , create the document data record (more
|
||||
@ -457,14 +462,16 @@ bool Db::add(const string &fn, const Doc &idoc,
|
||||
|
||||
// Truncate abstract, title and keywords to reasonable lengths. If
|
||||
// abstract is currently empty, we make up one with the beginning
|
||||
// of the document.
|
||||
// of the document. This is then not indexed, but part of the doc
|
||||
// data so that we can return it to a query without having to
|
||||
// decode the original file.
|
||||
bool syntabs = false;
|
||||
if (doc.abstract.empty()) {
|
||||
syntabs = true;
|
||||
doc.abstract = rclSyntAbs +
|
||||
truncate_to_word(doc.text, INDEX_ABSTRACT_SIZE);
|
||||
truncate_to_word(doc.text, m_idxAbsTruncLen);
|
||||
} else {
|
||||
doc.abstract = truncate_to_word(doc.abstract, INDEX_ABSTRACT_SIZE);
|
||||
doc.abstract = truncate_to_word(doc.abstract, m_idxAbsTruncLen);
|
||||
}
|
||||
doc.abstract = neutchars(doc.abstract, "\n\r");
|
||||
doc.title = truncate_to_word(doc.title, 100);
|
||||
@ -513,14 +520,20 @@ bool Db::add(const string &fn, const Doc &idoc,
|
||||
splitter.text_to_words(noacc);
|
||||
splitData.basepos += splitData.curpos + 100;
|
||||
|
||||
// Split and index abstract
|
||||
// Split and index abstract. We don't do this if it is synthetic
|
||||
// any more (this used to give a relevance boost to the beginning
|
||||
// of text, why ?)
|
||||
LOGDEB2(("Db::add: split abstract [%s]\n", doc.abstract.c_str()));
|
||||
if (!dumb_string(syntabs ? doc.abstract.substr(rclSyntAbs.length()) :
|
||||
doc.abstract, noacc)) {
|
||||
LOGERR(("Db::add: dumb_string failed\n"));
|
||||
return false;
|
||||
if (!syntabs) {
|
||||
// syntabs indicator test kept here in case we want to go back
|
||||
// to indexing synthetic abstracts one day
|
||||
if (!dumb_string(syntabs ? doc.abstract.substr(rclSyntAbs.length()) :
|
||||
doc.abstract, noacc)) {
|
||||
LOGERR(("Db::add: dumb_string failed\n"));
|
||||
return false;
|
||||
}
|
||||
splitter.text_to_words(noacc);
|
||||
}
|
||||
splitter.text_to_words(noacc);
|
||||
splitData.basepos += splitData.curpos + 100;
|
||||
|
||||
////// Special terms for metadata
|
||||
@ -1182,17 +1195,21 @@ bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
||||
parms.get(string("caption"), doc.title);
|
||||
parms.get(string("keywords"), doc.keywords);
|
||||
parms.get(string("abstract"), doc.abstract);
|
||||
// Possibly remove synthetic abstract indicator (if it's there, we
|
||||
// used to index the beginning of the text as abstract).
|
||||
bool syntabs = false;
|
||||
if (doc.abstract.find(rclSyntAbs) == 0) {
|
||||
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
|
||||
syntabs = true;
|
||||
}
|
||||
// If the option is set and the abstract is synthetic or empty , build
|
||||
// abstract from position data.
|
||||
if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
|
||||
LOGDEB1(("dbDataToRclDoc:: building abstract from position data\n"));
|
||||
LOGDEB(("dbDataToRclDoc:: building abstract from position data\n"));
|
||||
if (doc.abstract.empty() || syntabs ||
|
||||
(qopts & Db::QO_REPLACE_ABSTRACT))
|
||||
doc.abstract = makeAbstract(docid, terms);
|
||||
}
|
||||
}
|
||||
parms.get(string("ipath"), doc.ipath);
|
||||
parms.get(string("fbytes"), doc.fbytes);
|
||||
parms.get(string("dbytes"), doc.dbytes);
|
||||
@ -1397,6 +1414,7 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
|
||||
// remember the position and its neigbours
|
||||
vector<unsigned int> qtermposs; // The term positions
|
||||
set<unsigned int> chunkposs; // All the positions we shall populate
|
||||
int totaloccs = 0;
|
||||
for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();
|
||||
qit++) {
|
||||
Xapian::PositionIterator pos;
|
||||
@ -1409,15 +1427,15 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
|
||||
unsigned int ipos = *pos;
|
||||
LOGDEB1(("Abstract: [%s] at %d\n", qit->c_str(), ipos));
|
||||
// Possibly extend the array. Do it in big chunks
|
||||
if (ipos + MA_EXTRACT_WIDTH >= buf.size()) {
|
||||
buf.resize(ipos + MA_EXTRACT_WIDTH + 1000);
|
||||
if (ipos + m_db->m_synthAbsWordCtxLen >= buf.size()) {
|
||||
buf.resize(ipos + m_db->m_synthAbsWordCtxLen + 1000);
|
||||
}
|
||||
buf[ipos] = *qit;
|
||||
// Remember the term position
|
||||
qtermposs.push_back(ipos);
|
||||
// Add adjacent slots to the set to populate at next step
|
||||
for (unsigned int ii = MAX(0, ipos-MA_EXTRACT_WIDTH);
|
||||
ii <= MIN(ipos+MA_EXTRACT_WIDTH, buf.size()-1); ii++) {
|
||||
for (unsigned int ii = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
|
||||
ii <= MIN(ipos+m_db->m_synthAbsWordCtxLen, buf.size()-1); ii++) {
|
||||
chunkposs.insert(ii);
|
||||
}
|
||||
// Limit the number of occurences we keep for each
|
||||
@ -1427,6 +1445,9 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
|
||||
}
|
||||
} catch (...) {
|
||||
}
|
||||
// Limit total size
|
||||
if (totaloccs++ > 100)
|
||||
break;
|
||||
}
|
||||
|
||||
LOGDEB1(("Abstract:%d:chosen number of positions %d. Populating\n",
|
||||
@ -1470,21 +1491,21 @@ string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
|
||||
for (vector<unsigned int>::const_iterator it = qtermposs.begin();
|
||||
it != qtermposs.end(); it++) {
|
||||
unsigned int ipos = *it;
|
||||
unsigned int start = MAX(0, ipos-MA_EXTRACT_WIDTH);
|
||||
unsigned int end = MIN(ipos+MA_EXTRACT_WIDTH, buf.size()-1);
|
||||
unsigned int start = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
|
||||
unsigned int end = MIN(ipos+m_db->m_synthAbsWordCtxLen, buf.size()-1);
|
||||
string chunk;
|
||||
for (unsigned int ii = start; ii <= end; ii++) {
|
||||
if (!buf[ii].empty()) {
|
||||
chunk += buf[ii] + " ";
|
||||
abslen += buf[ii].length();
|
||||
}
|
||||
if (abslen > MA_ABSTRACT_SIZE)
|
||||
if (int(abslen) > m_db->m_synthAbsLen)
|
||||
break;
|
||||
}
|
||||
if (end != buf.size()-1)
|
||||
chunk += "... ";
|
||||
mabs[ipos] = chunk;
|
||||
if (abslen > MA_ABSTRACT_SIZE)
|
||||
if (int(abslen) > m_db->m_synthAbsLen)
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.35 2006-04-27 06:12:10 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.36 2006-09-13 13:53:35 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -171,6 +171,8 @@ class Db {
|
||||
std::list<std::string> getStemLangs();
|
||||
|
||||
string getDbDir();
|
||||
void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
|
||||
|
||||
private:
|
||||
|
||||
string m_filterTopDir; // Current query filter on subtree top directory
|
||||
@ -183,6 +185,17 @@ private:
|
||||
// xapian)-specific defs to show in here
|
||||
|
||||
unsigned int m_qOpts;
|
||||
|
||||
// This is how long an abstract we keep or build from beginning of
|
||||
// text when indexing. It only has an influence on the size of the
|
||||
// db as we are free to shorten it again when displaying
|
||||
int m_idxAbsTruncLen;
|
||||
// This is the size of the abstract that we synthetize out of query
|
||||
// term contexts at *query time*
|
||||
int m_synthAbsLen;
|
||||
// This is how many words (context size) we keep around query terms
|
||||
// when building the abstract
|
||||
int m_synthAbsWordCtxLen;
|
||||
|
||||
bool reOpen(); // Close/open, same mode/opts
|
||||
/* Copyconst and assignemt private and forbidden */
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: recoll.conf.in,v 1.10 2006-09-08 08:51:47 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: recoll.conf.in,v 1.11 2006-09-13 13:53:35 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
#
|
||||
# Recoll default configuration file. This should be copied to
|
||||
# ~/.recoll/recoll.conf
|
||||
@ -56,6 +56,9 @@ usesystemfilecommand = 1
|
||||
# know? (we can otherwise just ignore them)
|
||||
indexallfilenames = 1
|
||||
|
||||
# Length of abstracts we store while indexing. Longer will make for a
|
||||
# bigger db
|
||||
# idxabsmlen = 250
|
||||
|
||||
# You could specify different parameters for a subdirectory like this:
|
||||
#[~/hungariandocs/plain]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user