Add possibility to display a list of sub-documents for a given result

This commit is contained in:
Jean-Francois Dockes 2013-04-24 16:33:53 +02:00
parent 782b60dfa1
commit 2b80c77c23
20 changed files with 480 additions and 118 deletions

View File

@ -81,5 +81,6 @@ DEF_CSTR(dj_keyipath, "ipath");
DEF_CSTR(dj_keymd5, "md5");
DEF_CSTR(dj_keymt, "mimetype");
DEF_CSTR(dj_keydocsize, "docsize");
DEF_CSTR(dj_keyanc, "rclanc");
#endif /* _CSTR_H_INCLUDED_ */

View File

@ -744,6 +744,12 @@ FsIndexer::processonefile(RclConfig *config,
hadNullIpath = true;
if (m_havemdreapers)
reapmetadata(mdreapers, fn, doc);
if (hadNonNullIpath) {
// Note that only the filters can reliably compute
// this. What we do is dependant of the doc order (if
// we see the top doc first, we won't set the flag)
doc.haschildren = true;
}
} else {
hadNonNullIpath = true;
make_udi(fn, doc.ipath, udi);
@ -832,6 +838,7 @@ FsIndexer::processonefile(RclConfig *config,
Rcl::Doc fileDoc;
fileDoc.fmtime = ascdate;
fileDoc.meta[Rcl::Doc::keyfn] = utf8fn;
fileDoc.haschildren = true;
fileDoc.mimetype = interner.getMimetype();
fileDoc.url = cstr_fileu + fn;
if (m_havelocalfields)

View File

@ -573,6 +573,8 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
}
} else if (it->first == cstr_dj_keymd) {
doc.dmtime = it->second;
} else if (it->first == cstr_dj_keyanc) {
doc.haschildren = true;
} else if (it->first == cstr_dj_keyorigcharset) {
doc.origcharset = it->second;
} else if (it->first == cstr_dj_keyfn) {

View File

@ -183,12 +183,15 @@ bool MimeHandlerMail::next_document()
if (m_idx == -1) {
m_metaData[cstr_dj_keymt] = cstr_textplain;
res = processMsg(m_bincdoc, 0);
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
m_metaData[cstr_dj_keymt].c_str()));
LOGDEB1(("MimeHandlerMail::next_document: mt %s, att cnt %d\n",
m_metaData[cstr_dj_keymt].c_str(), m_attachments.size()));
const string& txt = m_metaData[cstr_dj_keycontent];
if (m_startoftext < txt.size())
m_metaData[cstr_dj_keyabstract] =
truncate_to_word(txt.substr(m_startoftext), 250);
if (m_attachments.size() > 0) {
m_metaData[cstr_dj_keyanc] = "t";
}
} else {
m_metaData[cstr_dj_keyabstract].clear();
res = processAttach();

View File

@ -70,6 +70,7 @@ using std::pair;
#include "internfile.h"
#include "docseqdb.h"
#include "docseqhist.h"
#include "docseqdocs.h"
#include "confguiindex.h"
#include "restable.h"
#include "listdialog.h"
@ -87,7 +88,6 @@ using namespace confgui;
#include "rclhelp.h"
#include "moc_rclmain_w.cpp"
extern "C" int XFlush(void *);
QString g_stringAllStem, g_stringNoStem;
static const QKeySequence quitKeySeq("Ctrl+q");
@ -274,7 +274,7 @@ void RclMain::init()
connect(toggleFullScreenAction, SIGNAL(activated()),
this, SLOT(toggleFullScreen()));
connect(actionShowQueryDetails, SIGNAL(activated()),
this, SLOT(showQueryDetails()));
reslist, SLOT(showQueryDetails()));
connect(periodictimer, SIGNAL(timeout()),
this, SLOT(periodic100()));
connect(this, SIGNAL(docSourceChanged(RefCntr<DocSequence>)),
@ -329,6 +329,8 @@ void RclMain::init()
this, SLOT(enablePrevPage(bool)));
connect(reslist, SIGNAL(docEditClicked(Rcl::Doc)),
this, SLOT(startNativeViewer(Rcl::Doc)));
connect(reslist, SIGNAL(showSubDocs(Rcl::Doc)),
this, SLOT(showSubDocs(Rcl::Doc)));
connect(reslist, SIGNAL(docSaveToFileClicked(Rcl::Doc)),
this, SLOT(saveDocToFile(Rcl::Doc)));
connect(reslist, SIGNAL(editRequested(Rcl::Doc)),
@ -337,8 +339,6 @@ void RclMain::init()
this, SLOT(startPreview(int, Rcl::Doc, int)));
connect(reslist, SIGNAL(previewRequested(Rcl::Doc)),
this, SLOT(startPreview(Rcl::Doc)));
connect(reslist, SIGNAL(headerClicked()),
this, SLOT(showQueryDetails()));
if (prefs.keepSort && prefs.sortActive) {
m_sortspec.field = (const char *)prefs.sortField.toUtf8();
@ -1541,6 +1541,34 @@ static bool lookForHtmlBrowser(string &exefile)
return false;
}
void RclMain::showSubDocs(Rcl::Doc doc)
{
LOGDEB(("RclMain::showSubDocs\n"));
string reason;
if (!maybeOpenDb(reason)) {
QMessageBox::critical(0, "Recoll", QString(reason.c_str()));
return;
}
vector<Rcl::Doc> docs;
if (!rcldb->getSubDocs(doc, docs)) {
QMessageBox::warning(0, "Recoll", QString("Can't get subdocs"));
return;
}
DocSequenceDocs *src =
new DocSequenceDocs(rcldb, docs,
qs2utf8s(tr("Sub-documents and attachments")));
src->setDescription(qs2utf8s(tr("Sub-documents and attachments")));
RefCntr<DocSequence>
source(new DocSource(theconfig, RefCntr<DocSequence>(src)));
ResList *res = new ResList();
res->setRclMain(this);
res->setIsMainList(0);
res->setDocSource(source);
res->readDocSource();
res->show();
}
void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
{
string apptag;
@ -1935,19 +1963,6 @@ QString RclMain::getQueryDescription()
return QString::fromUtf8(m_source->getDescription().c_str());
}
/** Show detailed expansion of a query */
void RclMain::showQueryDetails()
{
if (m_source.isNull())
return;
string oq = breakIntoLines(m_source->getDescription(), 100, 50);
QString str;
QString desc = tr("Result count (est.)") + ": " +
str.setNum(m_source->getResCnt()) + "<br>";
desc += tr("Query details") + ": " + QString::fromUtf8(oq.c_str());
QMessageBox::information(this, tr("Query details"), desc);
}
// User pressed a category button: set filter params in reslist
void RclMain::catgFilter(int id)
{

View File

@ -118,6 +118,7 @@ public slots:
virtual void enableNextPage(bool);
virtual void enablePrevPage(bool);
virtual void docExpand(Rcl::Doc);
virtual void showSubDocs(Rcl::Doc);
virtual void startPreview(int docnum, Rcl::Doc doc, int keymods);
virtual void startPreview(Rcl::Doc);
virtual void startNativeViewer(Rcl::Doc, int pagenum = -1,
@ -140,7 +141,6 @@ public slots:
virtual void on_actionShowResultsAsTable_toggled(bool on);
virtual void onSortDataChanged(DocSeqSortSpec);
virtual void resultCount(int);
virtual void showQueryDetails();
virtual void applyStyleSheet();
signals:

View File

@ -72,7 +72,7 @@ static const QKeySequence closeKeySeq("Ctrl+w");
class QtGuiResListPager : public ResListPager {
public:
QtGuiResListPager(ResList *p, int ps)
: ResListPager(ps), m_parent(p)
: ResListPager(ps), m_reslist(p)
{}
virtual bool append(const string& data);
virtual bool append(const string& data, int idx, const Rcl::Doc& doc);
@ -88,7 +88,7 @@ public:
virtual string absSep() {return (const char *)(prefs.abssep.toUtf8());}
virtual string iconUrl(RclConfig *, Rcl::Doc& doc);
private:
ResList *m_parent;
ResList *m_reslist;
};
#if 0
@ -110,7 +110,7 @@ bool QtGuiResListPager::append(const string& data)
{
LOGDEB2(("QtGuiReslistPager::appendString : %s\n", data.c_str()));
logdata(data.c_str());
m_parent->append(QString::fromUtf8(data.c_str()));
m_reslist->append(QString::fromUtf8(data.c_str()));
return true;
}
@ -118,24 +118,24 @@ bool QtGuiResListPager::append(const string& data, int docnum,
const Rcl::Doc&)
{
LOGDEB2(("QtGuiReslistPager::appendDoc: blockCount %d, %s\n",
m_parent->document()->blockCount(), data.c_str()));
m_reslist->document()->blockCount(), data.c_str()));
logdata(data.c_str());
#ifdef RESLIST_TEXTBROWSER
int blkcnt0 = m_parent->document()->blockCount();
m_parent->moveCursor(QTextCursor::End, QTextCursor::MoveAnchor);
m_parent->textCursor().insertBlock();
m_parent->insertHtml(QString::fromUtf8(data.c_str()));
m_parent->moveCursor(QTextCursor::Start, QTextCursor::MoveAnchor);
m_parent->ensureCursorVisible();
int blkcnt1 = m_parent->document()->blockCount();
int blkcnt0 = m_reslist->document()->blockCount();
m_reslist->moveCursor(QTextCursor::End, QTextCursor::MoveAnchor);
m_reslist->textCursor().insertBlock();
m_reslist->insertHtml(QString::fromUtf8(data.c_str()));
m_reslist->moveCursor(QTextCursor::Start, QTextCursor::MoveAnchor);
m_reslist->ensureCursorVisible();
int blkcnt1 = m_reslist->document()->blockCount();
for (int block = blkcnt0; block < blkcnt1; block++) {
m_parent->m_pageParaToReldocnums[block] = docnum;
m_reslist->m_pageParaToReldocnums[block] = docnum;
}
#else
QString sdoc = QString("<div class=\"rclresult\" rcldocnum=\"%1\">").arg(docnum);
m_parent->append(sdoc);
m_parent->append(QString::fromUtf8(data.c_str()));
m_parent->append("</div>");
m_reslist->append(sdoc);
m_reslist->append(QString::fromUtf8(data.c_str()));
m_reslist->append("</div>");
#endif
return true;
}
@ -276,7 +276,8 @@ static PlainToRichQtReslist g_hiliter;
/////////////////////////////////////
ResList::ResList(QWidget* parent, const char* name)
: RESLIST_PARENTCLASS(parent), m_parent(0)
: RESLIST_PARENTCLASS(parent), m_curPvDoc(-1), m_lstClckMod(0),
m_listId(0), m_rclmain(0), m_ismainlist(true), m_coninit(false)
{
if (!name)
setObjectName("resList");
@ -299,6 +300,7 @@ ResList::ResList(QWidget* parent, const char* name)
page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks);
settings()->setAttribute(QWebSettings::JavascriptEnabled, true);
#endif
setFont();
languageChange();
@ -311,13 +313,11 @@ ResList::ResList(QWidget* parent, const char* name)
connect(this, SIGNAL(highlighted(const QString &)),
this, SLOT(highlighted(const QString &)));
#endif
setContextMenuPolicy(Qt::CustomContextMenu);
connect(this, SIGNAL(customContextMenuRequested(const QPoint&)),
this, SLOT(createPopupMenu(const QPoint&)));
m_curPvDoc = -1;
m_lstClckMod = 0;
m_listId = 0;
m_pager = new QtGuiResListPager(this, prefs.respagesize);
m_pager->setHighLighter(&g_hiliter);
}
@ -379,6 +379,17 @@ void ResList::setDocSource(RefCntr<DocSequence> nsource)
{
LOGDEB(("ResList::setDocSource()\n"));
m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource));
if (!m_ismainlist && !m_coninit) {
m_coninit = true;
connect(new QShortcut(closeKeySeq, this), SIGNAL (activated()),
this, SLOT (close()));
connect(new QShortcut(quitKeySeq, this), SIGNAL (activated()),
m_rclmain, SLOT (fileExit()));
connect(this, SIGNAL(previewRequested(Rcl::Doc)),
m_rclmain, SLOT(startPreview(Rcl::Doc)));
connect(this, SIGNAL(docEditClicked(Rcl::Doc)),
m_rclmain, SLOT(startNativeViewer(Rcl::Doc)));
}
}
// A query was executed, or the filtering/sorting parameters changed,
@ -715,7 +726,6 @@ void ResList::displayPage()
// Possibly color paragraph of current preview if any
previewExposed(m_curPvDoc);
}
// Color paragraph (if any) of currently visible preview
@ -804,11 +814,11 @@ void ResList::mouseDoubleClickEvent(QMouseEvent *event)
void ResList::newSnippetsW(const Rcl::Doc& doc)
{
SnippetsW *sp = new SnippetsW(doc, m_source);
if (m_parent) {
if (m_rclmain) {
connect(sp, SIGNAL(startNativeViewer(Rcl::Doc, int, QString)),
m_parent, SLOT(startNativeViewer(Rcl::Doc, int, QString)));
m_rclmain, SLOT(startNativeViewer(Rcl::Doc, int, QString)));
connect(new QShortcut(quitKeySeq, sp), SIGNAL (activated()),
m_parent, SLOT (fileExit()));
m_rclmain, SLOT (fileExit()));
}
connect(new QShortcut(closeKeySeq, sp), SIGNAL (activated()),
sp, SLOT (close()));
@ -841,6 +851,18 @@ void ResList::newDupsW(const Rcl::Doc&, const vector<Rcl::Doc>& dups)
dialog.exec();
}
void ResList::showQueryDetails()
{
if (m_source.isNull())
return;
string oq = breakIntoLines(m_source->getDescription(), 100, 50);
QString str;
QString desc = tr("Result count (est.)") + ": " +
str.setNum(m_source->getResCnt()) + "<br>";
desc += tr("Query details") + ": " + QString::fromUtf8(oq.c_str());
QMessageBox::information(this, tr("Query details"), desc);
}
void ResList::linkWasClicked(const QUrl &url)
{
string ascurl = (const char *)url.toString().toAscii();;
@ -885,7 +907,7 @@ void ResList::linkWasClicked(const QUrl &url)
// Show query details
case 'H':
{
emit headerClicked();
showQueryDetails();
break;
}
@ -899,10 +921,15 @@ void ResList::linkWasClicked(const QUrl &url)
LOGERR(("ResList::linkWasClicked: can't get doc for %d\n", i));
return;
}
if (what == 'P')
emit docPreviewClicked(i, doc, m_lstClckMod);
else
if (what == 'P') {
if (m_ismainlist) {
emit docPreviewClicked(i, doc, m_lstClckMod);
} else {
emit previewRequested(doc);
}
} else {
emit docEditClicked(doc);
}
}
break;
@ -967,7 +994,8 @@ void ResList::createPopupMenu(const QPoint& pos)
if (havedoc)
doc.getmeta(Rcl::Doc::keyapptg, &apptag);
if (havedoc && !theconfig->getMimeViewerDef(doc.mimetype, apptag, 0).empty()) {
if (havedoc &&
!theconfig->getMimeViewerDef(doc.mimetype, apptag, 0).empty()) {
popup->addAction(tr("&Open"), this, SLOT(menuEdit()));
}
popup->addAction(tr("Copy &File Name"), this, SLOT(menuCopyFN()));
@ -990,6 +1018,10 @@ void ResList::createPopupMenu(const QPoint& pos)
popup->addAction(tr("Open &Snippets window"),
this, SLOT(menuOpenSnippets()));
if (havedoc && rcldb && rcldb->hasSubDocs(doc))
popup->addAction(tr("Show subdocuments / attachments"),
this, SLOT(menuShowSubDocs()));
popup->popup(mapToGlobal(pos));
}
@ -1051,6 +1083,13 @@ void ResList::menuOpenSnippets()
newSnippetsW(doc);
}
void ResList::menuShowSubDocs()
{
Rcl::Doc doc;
if (getDoc(m_popDoc, doc))
emit showSubDocs(doc);
}
void ResList::menuEdit()
{
Rcl::Doc doc;

View File

@ -69,7 +69,11 @@ class ResList : public RESLIST_PARENTCLASS
void setFont();
void setRclMain(RclMain *m)
{
m_parent = m;
m_rclmain = m;
}
void setIsMainList(bool onoff)
{
m_ismainlist = onoff;
}
public slots:
virtual void setDocSource(RefCntr<DocSequence> nsource);
@ -89,11 +93,13 @@ class ResList : public RESLIST_PARENTCLASS
virtual void menuPreviewParent();
virtual void menuOpenParent();
virtual void menuOpenSnippets();
virtual void menuShowSubDocs();
virtual void previewExposed(int);
virtual void append(const QString &text);
virtual void readDocSource();
virtual void highlighted(const QString& link);
virtual void createPopupMenu(const QPoint& pos);
virtual void showQueryDetails();
signals:
void nextPageAvailable(bool);
@ -102,8 +108,8 @@ class ResList : public RESLIST_PARENTCLASS
void docPreviewClicked(int, Rcl::Doc, int);
void docSaveToFileClicked(Rcl::Doc);
void previewRequested(Rcl::Doc);
void showSubDocs(Rcl::Doc);
void editRequested(Rcl::Doc);
void headerClicked();
void docExpand(Rcl::Doc);
void wordSelect(QString);
void wordReplace(const QString&, const QString&);
@ -137,7 +143,9 @@ class ResList : public RESLIST_PARENTCLASS
// so we store the page and display it when done.
QString m_text;
#endif
RclMain *m_parent;
RclMain *m_rclmain;
bool m_ismainlist;
bool m_coninit;
virtual void displayPage(); // Display current page
static int newListId();

View File

@ -18,6 +18,7 @@
#include "filtseq.h"
#include "sortseq.h"
#include "debuglog.h"
#include "internfile.h"
string DocSequence::o_sort_trans;
string DocSequence::o_filt_trans;
@ -35,6 +36,19 @@ int DocSequence::getSeqSlice(int offs, int cnt, vector<ResListEntry>& result)
return ret;
}
bool DocSequence::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
{
// Note: no need for setQuery here, we're just passing through a
// query-independant request
string udi;
if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath,
udi))
return false;
bool dbret = getDb()->getDoc(udi, pdoc);
return dbret && pdoc.pc != -1;
}
// Remove stacked modifying sources (sort, filter) until we get to a real one
void DocSource::stripStack()
{
@ -107,3 +121,4 @@ bool DocSource::setSortSpec(const DocSeqSortSpec &s)
buildStack();
return true;
}

View File

@ -114,7 +114,7 @@ class DocSequence {
return false;
}
virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0;
virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&);
/** Get estimated total count in results */
virtual int getResCnt() = 0;
@ -159,10 +159,15 @@ class DocSequence {
o_sort_trans = sort;
o_filt_trans = filt;
}
virtual Rcl::Db *getDb() = 0;
protected:
static std::string o_sort_trans;
static std::string o_filt_trans;
std::string m_reason;
private:
std::string m_title;
};
@ -228,14 +233,29 @@ public:
return string();
return m_seq->getReason();
}
virtual std::string title() {return m_seq->title();}
virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}
virtual std::string title()
{
return m_seq->title();
}
virtual RefCntr<DocSequence> getSourceSeq()
{
return m_seq;
}
virtual Rcl::Db *getDb()
{
if (m_seq.isNull())
return 0;
return m_seq->getDb();
}
protected:
RefCntr<DocSequence> m_seq;
};
class RclConfig;
// A DocSource can juggle docseqs of different kinds to implement
// sorting and filtering in ways depending on the base seqs capabilities
class DocSource : public DocSeqModifier {

View File

@ -23,7 +23,6 @@ using std::list;
#include "docseqdb.h"
#include "rcldb.h"
#include "debuglog.h"
#include "internfile.h"
#include "wasatorcl.h"
DocSequenceDb::DocSequenceDb(RefCntr<Rcl::Query> q, const string &t,
@ -129,17 +128,9 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term)
return -1;
}
bool DocSequenceDb::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
Rcl::Db *DocSequenceDb::getDb()
{
// Note: no need for setQuery here, we're just passing through a
// query-independant request
string udi;
if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath,
udi))
return false;
bool dbret = m_q->whatDb()->getDoc(udi, pdoc);
return dbret && pdoc.pc != -1;
return m_q.isNotNull() ? m_q->whatDb() : 0;
}
list<string> DocSequenceDb::expand(Rcl::Doc &doc)

View File

@ -38,7 +38,7 @@ class DocSequenceDb : public DocSequence {
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
virtual int getFirstMatchPage(Rcl::Doc&, std::string& term);
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
virtual Rcl::Db *getDb();
virtual bool docDups(const Rcl::Doc& doc, std::vector<Rcl::Doc>& dups);
virtual string getDescription();
virtual list<string> expand(Rcl::Doc &doc);

69
src/query/docseqdocs.h Normal file
View File

@ -0,0 +1,69 @@
/* Copyright (C) 2004-2013 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _DOCSEQDOCS_H_INCLUDED_
#define _DOCSEQDOCS_H_INCLUDED_
#include "docseq.h"
#include "rcldoc.h"
namespace Rcl {
class Db;
}
/** A DocSequence that's just built from a bunch of docs */
class DocSequenceDocs : public DocSequence {
public:
DocSequenceDocs(Rcl::Db *d, const std::vector<Rcl::Doc> docs,
const string &t)
: DocSequence(t), m_db(d), m_docs(docs)
{
}
virtual ~DocSequenceDocs()
{
}
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0)
{
if (sh)
*sh = string();
if (num < 0 || num >= int(m_docs.size()))
return false;
doc = m_docs[num];
return true;
}
virtual Rcl::Db *getDb()
{
return m_db;
}
virtual int getResCnt()
{
return m_docs.size();
}
virtual string getDescription()
{
return m_description;
}
void setDescription(const string& desc)
{
m_description = desc;
}
private:
Rcl::Db *m_db;
string m_description;
std::vector<Rcl::Doc> m_docs;
};
#endif /* _DOCSEQ_H_INCLUDED_ */

View File

@ -25,7 +25,6 @@ using std::list;
#include "docseqhist.h"
#include "rcldb.h"
#include "fileudi.h"
#include "internfile.h"
#include "base64.h"
#include "debuglog.h"
#include "smallut.h"
@ -145,14 +144,9 @@ bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, string *sh)
return ret;
}
bool DocSequenceHistory::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
Rcl::Db *DocSequenceHistory::getDb()
{
string udi;
if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath,
udi))
return false;
bool dbret = m_db->getDoc(udi, pdoc);
return dbret && pdoc.pc != -1;
return m_db;
}
int DocSequenceHistory::getResCnt()

View File

@ -48,7 +48,7 @@ class DocSequenceHistory : public DocSequence {
virtual ~DocSequenceHistory() {}
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0);
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
virtual Rcl::Db *getDb();
virtual int getResCnt();
virtual string getDescription() {return m_description;}
void setDescription(const string& desc) {m_description = desc;}

View File

@ -80,6 +80,9 @@ string start_of_field_term;
string end_of_field_term;
const string page_break_term = "XXPG/";
// Special term to mark documents with children.
const string has_children_term("XXC/");
// Field name for the unsplit file name. Has to exist in the field file
// because of usage in termmatch()
const string unsplitFilenameFieldName = "rclUnsplitFN";
@ -235,6 +238,73 @@ bool Db::Native::subDocs(const string &udi, vector<Xapian::docid>& docids)
}
}
bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi)
{
Xapian::TermIterator xit;
XAPTRY(xit = xdoc.termlist_begin();
xit.skip_to(wrap_prefix(udi_prefix)),
xrdb, m_rcldb->m_reason);
if (!m_rcldb->m_reason.empty()) {
LOGERR(("xdocToUdi: xapian error: %s\n", m_rcldb->m_reason.c_str()));
return false;
}
if (xit != xdoc.termlist_end()) {
udi = *xit;
if (!udi.empty()) {
udi = udi.substr(wrap_prefix(udi_prefix).size());
return true;
}
}
return false;
}
// Check if doc given by udi is indexed by term
bool Db::Native::hasTerm(const string& udi, const string& term)
{
LOGDEB2(("Native::hasTerm: udi [%s] term [%s]\n",udi.c_str(),term.c_str()));
Xapian::Document xdoc;
if (getDoc(udi, xdoc)) {
Xapian::TermIterator xit;
XAPTRY(xit = xdoc.termlist_begin();
xit.skip_to(term);,
xrdb, m_rcldb->m_reason);
if (!m_rcldb->m_reason.empty()) {
LOGERR(("Rcl::Native::hasTerm: %s\n", m_rcldb->m_reason.c_str()));
return false;
}
if (xit != xdoc.termlist_end() && !term.compare(*xit)) {
return true;
}
}
return false;
}
// Retrieve Xapian document, given udi
Xapian::docid Db::Native::getDoc(const string& udi, Xapian::Document& xdoc)
{
string uniterm = make_uniterm(udi);
for (int tries = 0; tries < 2; tries++) {
try {
Xapian::PostingIterator docid = xrdb.postlist_begin(uniterm);
if (docid == xrdb.postlist_end(uniterm)) {
// Udi not in Db.
return 0;
} else {
xdoc = xrdb.get_document(*docid);
return *docid;
}
} catch (const Xapian::DatabaseModifiedError &e) {
m_rcldb->m_reason = e.get_msg();
xrdb.reopen();
continue;
} XCATCHERROR(m_rcldb->m_reason);
break;
}
LOGERR(("Db::Native::getDoc: Xapian error: %s\n",
m_rcldb->m_reason.c_str()));
return 0;
}
// Turn data record from db into document fields
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
Doc &doc)
@ -492,6 +562,7 @@ bool Db::Native::purgeFileWrite(bool orphansOnly, const string& udi,
return false;
}
/* Rcl::Db methods ///////////////////////////////// */
bool Db::o_inPlaceReset;
@ -1210,7 +1281,9 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
leftzeropad(doc.fbytes, 12);
newdocument.add_value(VALUE_SIZE, doc.fbytes);
}
if (doc.haschildren) {
newdocument.add_boolean_term(has_children_term);
}
if (!doc.pcbytes.empty())
RECORD_APPEND(record, Doc::keypcs, doc.pcbytes);
char sizebuf[30];
@ -1697,26 +1770,116 @@ bool Db::getDoc(const string &udi, Doc &doc)
// will make partial display in case of error
doc.meta[Rcl::Doc::keyrr] = "100%";
doc.pc = 100;
Xapian::Document xdoc;
Xapian::docid docid;
if ((docid = m_ndb->getDoc(udi, xdoc))) {
string data = xdoc.get_data();
doc.meta[Rcl::Doc::keyudi] = udi;
return m_ndb->dbDataToRclDoc(docid, data, doc);
} else {
// Document found in history no longer in the
// database. We return true (because their might be
// other ok docs further) but indicate the error with
// pc = -1
doc.pc = -1;
LOGINFO(("Db:getDoc: no such doc in index: [%s]\n", udi.c_str()));
return true;
}
}
string uniterm = make_uniterm(udi);
bool Db::hasSubDocs(const Doc &idoc)
{
if (m_ndb == 0)
return false;
string inudi;
if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
LOGERR(("Db::hasSubDocs: no input udi or empty\n"));
return false;
}
vector<Xapian::docid> docids;
if (!m_ndb->subDocs(inudi, docids)) {
LOGDEB(("Db:getSubDocs: lower level subdocs failed\n"));
return false;
}
if (!docids.empty())
return true;
// Check if doc has an has_children term
if (m_ndb->hasTerm(inudi, has_children_term))
return true;
return false;
}
// Retrieve all subdocuments of a given one, which may not be a file-level
// one (in which case, we have to retrieve this first, then filter the ipaths)
bool Db::getSubDocs(const Doc &idoc, vector<Doc>& subdocs)
{
if (m_ndb == 0)
return false;
string inudi;
if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
LOGERR(("Db::getSubDocs: no input udi or empty\n"));
return false;
}
string rootudi;
string ipath = idoc.ipath;
if (ipath.empty()) {
// File-level doc. Use it as root
rootudi = inudi;
} else {
// See if we have a parent term
Xapian::Document xdoc;
if (!m_ndb->getDoc(inudi, xdoc)) {
LOGERR(("Db::getSubDocs: can't get Xapian document\n"));
return false;
}
Xapian::TermIterator xit;
XAPTRY(xit = xdoc.termlist_begin();
xit.skip_to(wrap_prefix(parent_prefix)),
m_ndb->xrdb, m_reason);
if (!m_reason.empty()) {
LOGERR(("Db::getSubDocs: xapian error: %s\n", m_reason.c_str()));
return false;
}
if (xit == xdoc.termlist_end()) {
LOGERR(("Db::getSubDocs: parent term not found\n"));
return false;
}
rootudi = strip_prefix(*xit);
}
LOGDEB(("Db::getSubDocs: root: [%s]\n", rootudi.c_str()));
// Retrieve all subdoc xapian ids for the root
vector<Xapian::docid> docids;
if (!m_ndb->subDocs(rootudi, docids)) {
LOGDEB(("Db:getSubDocs: lower level subdocs failed\n"));
return false;
}
// Retrieve doc, filter, and build output list
for (int tries = 0; tries < 2; tries++) {
try {
if (!m_ndb->xrdb.term_exists(uniterm)) {
// Document found in history no longer in the
// database. We return true (because their might be
// other ok docs further) but indicate the error with
// pc = -1
doc.pc = -1;
LOGINFO(("Db:getDoc: no such doc in index: [%s] (len %d)\n",
uniterm.c_str(), uniterm.length()));
return true;
}
Xapian::PostingIterator docid =
m_ndb->xrdb.postlist_begin(uniterm);
Xapian::Document xdoc = m_ndb->xrdb.get_document(*docid);
string data = xdoc.get_data();
doc.meta[Rcl::Doc::keyudi] = udi;
return m_ndb->dbDataToRclDoc(*docid, data, doc);
for (vector<Xapian::docid>::const_iterator it = docids.begin();
it != docids.end(); it++) {
Xapian::Document xdoc = m_ndb->xrdb.get_document(*it);
string data = xdoc.get_data();
string docudi;
m_ndb->xdocToUdi(xdoc, docudi);
Doc doc;
doc.meta[Doc::keyudi] = docudi;
doc.meta[Doc::keyrr] = "100%";
doc.pc = 100;
if (!m_ndb->dbDataToRclDoc(*it, data, doc)) {
LOGERR(("Db::getSubDocs: doc conversion error\n"));
return false;
}
if (ipath.empty() || doc.ipath.find(ipath) == 0)
subdocs.push_back(doc);
}
return true;
} catch (const Xapian::DatabaseModifiedError &e) {
m_reason = e.get_msg();
m_ndb->xrdb.reopen();
@ -1725,7 +1888,7 @@ bool Db::getDoc(const string &udi, Doc &doc)
break;
}
LOGERR(("Db::getDoc: %s\n", m_reason.c_str()));
LOGERR(("Db::getSubDocs: Xapian error: %s\n", m_reason.c_str()));
return false;
}

View File

@ -357,6 +357,29 @@ class Db {
*/
bool getDoc(const string &udi, Doc &doc);
/** Test if documents has sub-documents.
*
* This can always be detected for file-level documents, using the
* postlist for the parent term constructed with udi.
*
* For non file-level documents (e.g.: does an email inside an
* mbox have attachments ?), detection is dependant on the filter
* having set an appropriate flag at index time. Higher level code
* can't detect it because the doc for the parent may have been
* seen before any children. The flag is stored as a value in the
* index.
*/
bool hasSubDocs(const Doc &idoc);
/** Get subdocuments of given document.
*
* For file-level documents, these are all docs indexed by the
* parent term built on idoc.udi. For embedded documents, the
* parent doc is looked for, then its subdocs list is
* filtered using the idoc ipath as a prefix.
*/
bool getSubDocs(const Doc& idoc, vector<Doc>& subdocs);
/** Get duplicates (md5) of document */
bool docDups(const Doc& idoc, std::vector<Doc>& odocs);

View File

@ -101,26 +101,39 @@ class Db::Native {
bool addOrUpdateWrite(const string& udi, const string& uniterm,
Xapian::Document& doc, size_t txtlen);
/** Delete all documents which are contained in the input document,
* which must be a file-level one.
*
* @param onlyOrphans if true, only delete documents which have
* not the same signature as the input. This is used to delete docs
* which do not exist any more in the file after an update, for
* example the tail messages after a folder truncation). If false,
* delete all.
* @param udi the parent document identifier.
* @param uniterm equivalent to udi, passed just to avoid recomputing.
*/
bool purgeFileWrite(bool onlyOrphans, const string& udi,
const string& uniterm);
bool getPagePositions(Xapian::docid docid, vector<int>& vpos);
int getPageNumberForPosition(const vector<int>& pbreaks, unsigned int pos);
bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
bool xdocToUdi(Xapian::Document& xdoc, string &udi)
{
Xapian::TermIterator xit = xdoc.termlist_begin();
xit.skip_to(wrap_prefix(udi_prefix));
if (xit != xdoc.termlist_end()) {
udi = *xit;
if (!udi.empty()) {
udi = udi.substr(wrap_prefix(udi_prefix).size());
return true;
}
}
return false;
}
/** Retrieve Xapian::docid, given unique document identifier,
* using the posting list for the derived term.
*
* @return 0 if not found
*/
Xapian::docid getDoc(const string& udi, Xapian::Document& xdoc);
/** Retrieve unique document identifier for given Xapian document,
* using the document termlist
*/
bool xdocToUdi(Xapian::Document& xdoc, string &udi);
/** Check if doc is indexed by term */
bool hasTerm(const string& udi, const string& term);
/** Compute list of subdocuments for a given udi. We look for documents
* indexed by a parent term matching the udi, the posting list for the
@ -131,14 +144,12 @@ class Db::Native {
* Ie: in a mail folder, all messages, attachments, attachments of
* attached messages etc. must have the folder file document as
* parent.
* Parent-child relationships are defined by the indexer (rcldb user)
*
* Finer grain parent-child relationships are defined by the
* indexer (rcldb user), using the ipath.
*
* The file-system indexer currently works this way (flatly),
* subDocs() could be relatively easily changed to support full recursivity
* if needed.
*/
bool subDocs(const string &udi, vector<Xapian::docid>& docids);
};
// This is the word position offset at which we index the body text

View File

@ -20,7 +20,6 @@
namespace Rcl {
const string Doc::keyabs("abstract");
const string Doc::keyanc("rclanc");
const string Doc::keyapptg("rclaptg");
const string Doc::keyau("author");
const string Doc::keybcknd("rclbes");

View File

@ -121,6 +121,10 @@ class Doc {
// Page breaks were stored during indexing.
bool haspages;
// Has children, either as content of file-level container or
// ipath descendants.
bool haschildren;
///////////////////////////////////////////////////////////////////
void erase() {
@ -141,9 +145,10 @@ class Doc {
pc = 0;
xdocid = 0;
haspages = false;
haschildren = false;
}
Doc()
: syntabs(false), pc(0), xdocid(0), haspages(false)
: syntabs(false), pc(0), xdocid(0), haspages(false), haschildren(false)
{
}
/** Get value for named field. If value pointer is 0, just test existence */
@ -225,9 +230,6 @@ class Doc {
static const string keyudi;
static const string keyapptg; // apptag. Set from localfields (fsindexer)
static const string keybght; // beagle hit type ("beagleHitType")
// Boolean used to indicate if the doc has descendants in the ipath sense
// (different from the file/contend parent_udi thing).
static const string keyanc;
};