diff --git a/src/qtgui/snippets.ui b/src/qtgui/snippets.ui
index 5e3b3892..0d7d4613 100644
--- a/src/qtgui/snippets.ui
+++ b/src/qtgui/snippets.ui
@@ -6,8 +6,8 @@
0
0
- 516
- 395
+ 640
+ 400
diff --git a/src/qtgui/snippets_w.cpp b/src/qtgui/snippets_w.cpp
index 917c2041..751c3624 100644
--- a/src/qtgui/snippets_w.cpp
+++ b/src/qtgui/snippets_w.cpp
@@ -50,6 +50,17 @@ void SnippetsW::init()
if (m_source.isNull())
return;
+ // Make title out of file name if none yet
+ string titleOrFilename;
+ string utf8fn;
+ m_doc.getmeta(Rcl::Doc::keytt, &titleOrFilename);
+ m_doc.getmeta(Rcl::Doc::keyfn, &utf8fn);
+ if (titleOrFilename.empty()) {
+ titleOrFilename = utf8fn;
+ }
+
+ setWindowTitle(QString::fromUtf8(titleOrFilename.c_str()));
+
vector > vpabs;
m_source->getAbstract(m_doc, vpabs);
diff --git a/src/query/docseq.h b/src/query/docseq.h
index 69169975..4d3ba12c 100644
--- a/src/query/docseq.h
+++ b/src/query/docseq.h
@@ -98,7 +98,6 @@ class DocSequence {
virtual bool getAbstract(Rcl::Doc& doc,
std::vector >& abs)
{
- fprintf(stderr, "DocSequence::getAbstract/pair\n");
abs.push_back(std::pair(0,
doc.meta[Rcl::Doc::keyabs]));
return true;
diff --git a/src/query/docseqdb.cpp b/src/query/docseqdb.cpp
index a1e1762d..195519a8 100644
--- a/src/query/docseqdb.cpp
+++ b/src/query/docseqdb.cpp
@@ -65,19 +65,32 @@ int DocSequenceDb::getResCnt()
return m_rescnt;
}
+// This one only gets called to fill-up the snippets window
+// We ignore most abstract/snippets preferences.
bool DocSequenceDb::getAbstract(Rcl::Doc &doc,
vector >& vpabs)
{
LOGDEB(("DocSequenceDb::getAbstract/pair\n"));
setQuery();
- if (m_q->whatDb() &&
- m_queryBuildAbstract && (doc.syntabs || m_queryReplaceAbstract)) {
- m_q->whatDb()->makeDocAbstract(doc, m_q.getptr(), vpabs);
+
+ // Have to put the limit somewhere.
+ int maxoccs = 500;
+ Rcl::abstract_result ret = Rcl::ABSRES_ERROR;
+ if (m_q->whatDb()) {
+ ret = m_q->whatDb()->makeDocAbstract(doc, m_q.getptr(), vpabs,
+ maxoccs,
+ m_q->whatDb()->getAbsCtxLen()+ 2);
}
if (vpabs.empty())
vpabs.push_back(pair(0, doc.meta[Rcl::Doc::keyabs]));
+
+ // If the list was probably truncated, indicate it.
+ if (ret == Rcl::ABSRES_TRUNC)
+ vpabs.push_back(pair(-1, "[...]"));
+
return true;
}
+
bool DocSequenceDb::getAbstract(Rcl::Doc &doc, vector& vabs)
{
setQuery();
diff --git a/src/query/docseqdb.h b/src/query/docseqdb.h
index e4d1ad67..7bdef8f5 100644
--- a/src/query/docseqdb.h
+++ b/src/query/docseqdb.h
@@ -31,7 +31,11 @@ class DocSequenceDb : public DocSequence {
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
virtual int getResCnt();
virtual void getTerms(HighlightData& hld);
+
+ // Called to fill-up the snippets window. Ignoers
+ // buildabstract/replaceabstract and syntabslen
virtual bool getAbstract(Rcl::Doc &doc, vector >&);
+
virtual bool getAbstract(Rcl::Doc &doc, vector&);
virtual int getFirstMatchPage(Rcl::Doc&);
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
index f26ae7ae..dec9fdb5 100644
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -244,7 +244,7 @@ void Db::Native::setDbWideQTermsFreqs(Query *query)
for (vector::const_iterator qit = qterms.begin();
qit != qterms.end(); qit++) {
query->m_nq->termfreqs[*qit] = xrdb.get_termfreq(*qit) / doccnt;
- LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(),
+ LOGABS(("set..QTermFreqs: [%s] db freq %.1e\n", qit->c_str(),
query->m_nq->termfreqs[*qit]));
}
}
@@ -298,6 +298,7 @@ double Db::Native::qualityTerms(Xapian::docid docid,
}
#ifdef DEBUGABSTRACT
+ LOGDEB(("Db::qualityTerms:\n"));
for (multimap::reverse_iterator qit = byQ.rbegin();
qit != byQ.rend(); qit++) {
LOGDEB(("%.1e->[%s]\n", qit->first, qit->second.c_str()));
@@ -415,12 +416,13 @@ int Db::Native::getFirstMatchPage(Xapian::docid docid, Query *query)
//
// DatabaseModified and other general exceptions are catched and
// possibly retried by our caller
-bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
- vector >& vabs)
+abstract_result Db::Native::makeAbstract(Xapian::docid docid, Query *query,
+ vector >& vabs,
+ int imaxoccs, int ictxwords)
{
Chrono chron;
- LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
- m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
+ LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d imaxoccs %d\n", chron.ms(),
+ m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen, imaxoccs));
// The (unprefixed) terms matched by this document
vector matchedTerms;
@@ -430,7 +432,7 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
noPrefixList(iterms, matchedTerms);
if (matchedTerms.empty()) {
LOGDEB(("makeAbstract::Empty term list\n"));
- return false;
+ return ABSRES_ERROR;
}
}
listList("Match terms: ", matchedTerms);
@@ -453,7 +455,7 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
// This can't happen, but would crash us
if (totalweight == 0.0) {
LOGERR(("makeAbstract: totalweight == 0.0 !\n"));
- return false;
+ return ABSRES_ERROR;
}
///////////////////
@@ -474,13 +476,17 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
// abstract size parameter in characters, we basically only deal
// with words. We used to limit the character size at the end, but
// this damaged our careful selection of terms
- const unsigned int maxtotaloccs =
+ const unsigned int maxtotaloccs = imaxoccs > 0 ? imaxoccs :
m_rcldb->m_synthAbsLen /(7 * (m_rcldb->m_synthAbsWordCtxLen+1));
- LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
+ int ctxwords = ictxwords == -1 ? m_rcldb->m_synthAbsWordCtxLen : ictxwords;
+ LOGABS(("makeAbstract:%d: mxttloccs %d ctxwords %d\n",
+ chron.ms(), maxtotaloccs, ctxwords));
// This is used to mark positions overlapped by a multi-word match term
const string occupiedmarker("?");
+ abstract_result ret = ABSRES_OK;
+
// Let's go populate
for (multimap::reverse_iterator qit = byQ.rbegin();
qit != byQ.rend(); qit++) {
@@ -522,7 +528,7 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
// step by inserting empty strings. Special provisions
// for adding ellipsis and for positions overlapped by
// the match term.
- unsigned int sta = MAX(0, ipos-m_rcldb->m_synthAbsWordCtxLen);
+ unsigned int sta = MAX(0, ipos - ctxwords);
unsigned int sto = ipos + qtrmwrdcnt-1 +
m_rcldb->m_synthAbsWordCtxLen;
for (unsigned int ii = sta; ii <= sto; ii++) {
@@ -548,14 +554,20 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
// Limit to allocated occurences and total size
if (++occurrences >= maxoccs ||
- totaloccs >= maxtotaloccs)
+ totaloccs >= maxtotaloccs) {
+ ret = ABSRES_TRUNC;
+ LOGDEB(("Db::makeAbstract: max occurrences cutoff\n"));
break;
+ }
}
} catch (...) {
// Term does not occur. No problem.
}
- if (totaloccs >= maxtotaloccs)
+ if (totaloccs >= maxtotaloccs) {
+ ret = ABSRES_TRUNC;
+ LOGDEB(("Db::makeAbstract: max1 occurrences cutoff\n"));
break;
+ }
}
LOGABS(("makeAbstract:%d:chosen number of positions %d\n",
chron.millis(), totaloccs));
@@ -564,7 +576,7 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
// etc. but not elsewhere ?
if (totaloccs == 0) {
LOGDEB1(("makeAbstract: no occurrences\n"));
- return false;
+ return ABSRES_ERROR;
}
// Walk all document's terms position lists and populate slots
@@ -582,6 +594,7 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
if ('A' <= (*term).at(0) && (*term).at(0) <= 'Z')
continue;
if (cutoff-- < 0) {
+ ret = ABSRES_TRUNC;
LOGDEB0(("makeAbstract: max term count cutoff\n"));
break;
}
@@ -590,6 +603,7 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
for (pos = xrdb.positionlist_begin(docid, *term);
pos != xrdb.positionlist_end(docid, *term); pos++) {
if (cutoff-- < 0) {
+ ret = ABSRES_TRUNC;
LOGDEB0(("makeAbstract: max term count cutoff\n"));
break;
}
@@ -600,8 +614,8 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
// at the same position, we want to keep only the
// first one (ie: dockes and dockes@wanadoo.fr)
if (vit->second.empty()) {
- LOGABS(("makeAbstract: populating: [%s] at %d\n",
- (*term).c_str(), *pos));
+ LOGDEB2(("makeAbstract: populating: [%s] at %d\n",
+ (*term).c_str(), *pos));
sparseDoc[*pos] = *term;
}
}
@@ -665,7 +679,7 @@ bool Db::Native::makeAbstract(Xapian::docid docid, Query *query,
vabs.push_back(pair(page, chunk));
LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
- return true;
+ return ret;
}
/* Rcl::Db methods ///////////////////////////////// */
@@ -2119,17 +2133,22 @@ bool Db::stemDiffers(const string& lang, const string& word,
return true;
}
-bool Db::makeDocAbstract(Doc &doc, Query *query,
- vector >& abstract)
+abstract_result Db::makeDocAbstract(Doc &doc, Query *query,
+ vector >& abstract,
+ int maxoccs, int ctxwords)
{
+ LOGDEB(("makeDocAbstract: maxoccs %d ctxwords %d\n", maxoccs, ctxwords));
if (!m_ndb || !m_ndb->m_isopen) {
LOGERR(("Db::makeDocAbstract: no db\n"));
- return false;
+ return ABSRES_ERROR;
}
- bool ret = false;
- XAPTRY(ret = m_ndb->makeAbstract(doc.xdocid, query, abstract),
+ abstract_result ret = ABSRES_ERROR;
+ XAPTRY(ret = m_ndb->makeAbstract(doc.xdocid, query, abstract,
+ maxoccs, ctxwords),
m_ndb->xrdb, m_reason);
- return (ret && m_reason.empty()) ? true : false;
+ if (!m_reason.empty())
+ return ABSRES_ERROR;
+ return ret;
}
bool Db::makeDocAbstract(Doc &doc, Query *query, vector& abstract)
diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h
index 96630b16..80ef2eb8 100644
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@@ -66,6 +66,11 @@ enum value_slot {
VALUE_SIG = 10 // Doc sig as chosen by app (ex: mtime+size
};
+enum abstract_result {
+ ABSRES_ERROR = 0,
+ ABSRES_OK = 1,
+ ABSRES_TRUNC = 2
+};
class SearchData;
class TermIter;
class Query;
@@ -220,6 +225,10 @@ class Db {
/** Set parameters for synthetic abstract generation */
void setAbstractParams(int idxTrunc, int synthLen, int syntCtxLen);
+ int getAbsCtxLen() const
+ {
+ return m_synthAbsWordCtxLen;
+ }
/** Build synthetic abstract for document, extracting chunks relevant for
* the input query. This uses index data only (no access to the file) */
@@ -227,9 +236,10 @@ class Db {
bool makeDocAbstract(Doc &doc, Query *query, string& abstract);
// Returned as a snippets vector
bool makeDocAbstract(Doc &doc, Query *query, vector& abstract);
- // Returned as a vector of page,snippet page is 0 if unknown
- bool makeDocAbstract(Doc &doc, Query *query,
- vector >& abstract);
+ // Returned as a vector of pair page is 0 if unknown
+ abstract_result makeDocAbstract(Doc &doc, Query *query,
+ vector >& abstract,
+ int maxoccs= -1, int ctxwords = -1);
/** Retrieve detected page breaks positions */
int getFirstMatchPage(Doc &doc, Query *query);
diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h
index f5af8ece..efbe8f91 100644
--- a/src/rcldb/rcldb_p.h
+++ b/src/rcldb/rcldb_p.h
@@ -89,8 +89,9 @@ class Db::Native {
const vector& terms,
std::multimap& byQ);
void setDbWideQTermsFreqs(Query *query);
- bool makeAbstract(Xapian::docid id, Query *query,
- vector >&);
+ abstract_result makeAbstract(Xapian::docid id, Query *query,
+ vector >&, int maxoccs = -1,
+ int ctxwords = -1);
bool getPagePositions(Xapian::docid docid, vector& vpos);
int getFirstMatchPage(Xapian::docid docid, Query *query);
int getPageNumberForPosition(const vector& pbreaks, unsigned int pos);
diff --git a/src/sampleconf/recoll.conf.in b/src/sampleconf/recoll.conf.in
index d79eeb0d..bfe24711 100644
--- a/src/sampleconf/recoll.conf.in
+++ b/src/sampleconf/recoll.conf.in
@@ -81,6 +81,8 @@ indexstemminglanguages = english
# unac_except_trans = Ää Öö Üü ää öö üü ßss
# In French, you probably want to decompose oe and ae
# unac_except_trans = œoe Œoe æae Æae
+# Actually, this seems a reasonable default for all until someone protests.
+unac_except_trans = åå Åå ää Ää öö Öö üü Üü ßss œoe Œoe æae ÆAE fifi flfl
# Where to store the database (directory). This may be an absolute path,
# else it is taken as relative to the configuration directory (-c argument
diff --git a/website/index.html.en b/website/index.html.en
index 968b5f67..baf58fb4 100644
--- a/website/index.html.en
+++ b/website/index.html.en
@@ -86,6 +86,13 @@
News
+ - 2012-09-21: an
+ easy
+ way to extend the "Beagle queue"
+ Recoll web history indexing mechanism to other browsers than
+ Firefox (Elinks in this case).
+
+
- 2012-09-13: the next Recoll version will maybe acquire switchable
case and diacritics sensitivity. I am writing
a few pages about the