From 2b80c77c23f916f01e56b4efc79cf97d06f7d1ac Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 24 Apr 2013 16:33:53 +0200 Subject: [PATCH] Add possibility to display a list of sub-documents for a given result --- src/common/cstr.h | 1 + src/index/fsindexer.cpp | 7 ++ src/internfile/internfile.cpp | 2 + src/internfile/mh_mail.cpp | 7 +- src/qtgui/rclmain_w.cpp | 49 ++++++--- src/qtgui/rclmain_w.h | 2 +- src/qtgui/reslist.cpp | 95 +++++++++++----- src/qtgui/reslist.h | 14 ++- src/query/docseq.cpp | 15 +++ src/query/docseq.h | 26 ++++- src/query/docseqdb.cpp | 13 +-- src/query/docseqdb.h | 2 +- src/query/docseqdocs.h | 69 ++++++++++++ src/query/docseqhist.cpp | 10 +- src/query/docseqhist.h | 2 +- src/rcldb/rcldb.cpp | 201 ++++++++++++++++++++++++++++++---- src/rcldb/rcldb.h | 23 ++++ src/rcldb/rcldb_p.h | 49 +++++---- src/rcldb/rcldoc.cpp | 1 - src/rcldb/rcldoc.h | 10 +- 20 files changed, 480 insertions(+), 118 deletions(-) create mode 100644 src/query/docseqdocs.h diff --git a/src/common/cstr.h b/src/common/cstr.h index e13950cd..c3c55ca1 100644 --- a/src/common/cstr.h +++ b/src/common/cstr.h @@ -81,5 +81,6 @@ DEF_CSTR(dj_keyipath, "ipath"); DEF_CSTR(dj_keymd5, "md5"); DEF_CSTR(dj_keymt, "mimetype"); DEF_CSTR(dj_keydocsize, "docsize"); +DEF_CSTR(dj_keyanc, "rclanc"); #endif /* _CSTR_H_INCLUDED_ */ diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index e880e5be..55466641 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -744,6 +744,12 @@ FsIndexer::processonefile(RclConfig *config, hadNullIpath = true; if (m_havemdreapers) reapmetadata(mdreapers, fn, doc); + if (hadNonNullIpath) { + // Note that only the filters can reliably compute + // this. What we do is dependant of the doc order (if + // we see the top doc first, we won't set the flag) + doc.haschildren = true; + } } else { hadNonNullIpath = true; make_udi(fn, doc.ipath, udi); @@ -832,6 +838,7 @@ FsIndexer::processonefile(RclConfig *config, Rcl::Doc fileDoc; fileDoc.fmtime = ascdate; fileDoc.meta[Rcl::Doc::keyfn] = utf8fn; + fileDoc.haschildren = true; fileDoc.mimetype = interner.getMimetype(); fileDoc.url = cstr_fileu + fn; if (m_havelocalfields) diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 1774f6c1..a48d206c 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -573,6 +573,8 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) } } else if (it->first == cstr_dj_keymd) { doc.dmtime = it->second; + } else if (it->first == cstr_dj_keyanc) { + doc.haschildren = true; } else if (it->first == cstr_dj_keyorigcharset) { doc.origcharset = it->second; } else if (it->first == cstr_dj_keyfn) { diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index 14512fff..ce327cd0 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -183,12 +183,15 @@ bool MimeHandlerMail::next_document() if (m_idx == -1) { m_metaData[cstr_dj_keymt] = cstr_textplain; res = processMsg(m_bincdoc, 0); - LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n", - m_metaData[cstr_dj_keymt].c_str())); + LOGDEB1(("MimeHandlerMail::next_document: mt %s, att cnt %d\n", + m_metaData[cstr_dj_keymt].c_str(), m_attachments.size())); const string& txt = m_metaData[cstr_dj_keycontent]; if (m_startoftext < txt.size()) m_metaData[cstr_dj_keyabstract] = truncate_to_word(txt.substr(m_startoftext), 250); + if (m_attachments.size() > 0) { + m_metaData[cstr_dj_keyanc] = "t"; + } } else { m_metaData[cstr_dj_keyabstract].clear(); res = processAttach(); diff --git a/src/qtgui/rclmain_w.cpp b/src/qtgui/rclmain_w.cpp index ecae0503..281978c8 100644 --- a/src/qtgui/rclmain_w.cpp +++ b/src/qtgui/rclmain_w.cpp @@ -70,6 +70,7 @@ using std::pair; #include "internfile.h" #include "docseqdb.h" #include "docseqhist.h" +#include "docseqdocs.h" #include "confguiindex.h" #include "restable.h" #include "listdialog.h" @@ -87,7 +88,6 @@ using namespace confgui; #include "rclhelp.h" #include "moc_rclmain_w.cpp" -extern "C" int XFlush(void *); QString g_stringAllStem, g_stringNoStem; static const QKeySequence quitKeySeq("Ctrl+q"); @@ -274,7 +274,7 @@ void RclMain::init() connect(toggleFullScreenAction, SIGNAL(activated()), this, SLOT(toggleFullScreen())); connect(actionShowQueryDetails, SIGNAL(activated()), - this, SLOT(showQueryDetails())); + reslist, SLOT(showQueryDetails())); connect(periodictimer, SIGNAL(timeout()), this, SLOT(periodic100())); connect(this, SIGNAL(docSourceChanged(RefCntr)), @@ -329,6 +329,8 @@ void RclMain::init() this, SLOT(enablePrevPage(bool))); connect(reslist, SIGNAL(docEditClicked(Rcl::Doc)), this, SLOT(startNativeViewer(Rcl::Doc))); + connect(reslist, SIGNAL(showSubDocs(Rcl::Doc)), + this, SLOT(showSubDocs(Rcl::Doc))); connect(reslist, SIGNAL(docSaveToFileClicked(Rcl::Doc)), this, SLOT(saveDocToFile(Rcl::Doc))); connect(reslist, SIGNAL(editRequested(Rcl::Doc)), @@ -337,8 +339,6 @@ void RclMain::init() this, SLOT(startPreview(int, Rcl::Doc, int))); connect(reslist, SIGNAL(previewRequested(Rcl::Doc)), this, SLOT(startPreview(Rcl::Doc))); - connect(reslist, SIGNAL(headerClicked()), - this, SLOT(showQueryDetails())); if (prefs.keepSort && prefs.sortActive) { m_sortspec.field = (const char *)prefs.sortField.toUtf8(); @@ -1541,6 +1541,34 @@ static bool lookForHtmlBrowser(string &exefile) return false; } +void RclMain::showSubDocs(Rcl::Doc doc) +{ + LOGDEB(("RclMain::showSubDocs\n")); + string reason; + if (!maybeOpenDb(reason)) { + QMessageBox::critical(0, "Recoll", QString(reason.c_str())); + return; + } + vector docs; + if (!rcldb->getSubDocs(doc, docs)) { + QMessageBox::warning(0, "Recoll", QString("Can't get subdocs")); + return; + } + DocSequenceDocs *src = + new DocSequenceDocs(rcldb, docs, + qs2utf8s(tr("Sub-documents and attachments"))); + src->setDescription(qs2utf8s(tr("Sub-documents and attachments"))); + RefCntr + source(new DocSource(theconfig, RefCntr(src))); + + ResList *res = new ResList(); + res->setRclMain(this); + res->setIsMainList(0); + res->setDocSource(source); + res->readDocSource(); + res->show(); +} + void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term) { string apptag; @@ -1935,19 +1963,6 @@ QString RclMain::getQueryDescription() return QString::fromUtf8(m_source->getDescription().c_str()); } -/** Show detailed expansion of a query */ -void RclMain::showQueryDetails() -{ - if (m_source.isNull()) - return; - string oq = breakIntoLines(m_source->getDescription(), 100, 50); - QString str; - QString desc = tr("Result count (est.)") + ": " + - str.setNum(m_source->getResCnt()) + "
"; - desc += tr("Query details") + ": " + QString::fromUtf8(oq.c_str()); - QMessageBox::information(this, tr("Query details"), desc); -} - // User pressed a category button: set filter params in reslist void RclMain::catgFilter(int id) { diff --git a/src/qtgui/rclmain_w.h b/src/qtgui/rclmain_w.h index b189ca9c..c30d5f73 100644 --- a/src/qtgui/rclmain_w.h +++ b/src/qtgui/rclmain_w.h @@ -118,6 +118,7 @@ public slots: virtual void enableNextPage(bool); virtual void enablePrevPage(bool); virtual void docExpand(Rcl::Doc); + virtual void showSubDocs(Rcl::Doc); virtual void startPreview(int docnum, Rcl::Doc doc, int keymods); virtual void startPreview(Rcl::Doc); virtual void startNativeViewer(Rcl::Doc, int pagenum = -1, @@ -140,7 +141,6 @@ public slots: virtual void on_actionShowResultsAsTable_toggled(bool on); virtual void onSortDataChanged(DocSeqSortSpec); virtual void resultCount(int); - virtual void showQueryDetails(); virtual void applyStyleSheet(); signals: diff --git a/src/qtgui/reslist.cpp b/src/qtgui/reslist.cpp index e639fa0f..8ac6e7d8 100644 --- a/src/qtgui/reslist.cpp +++ b/src/qtgui/reslist.cpp @@ -72,7 +72,7 @@ static const QKeySequence closeKeySeq("Ctrl+w"); class QtGuiResListPager : public ResListPager { public: QtGuiResListPager(ResList *p, int ps) - : ResListPager(ps), m_parent(p) + : ResListPager(ps), m_reslist(p) {} virtual bool append(const string& data); virtual bool append(const string& data, int idx, const Rcl::Doc& doc); @@ -88,7 +88,7 @@ public: virtual string absSep() {return (const char *)(prefs.abssep.toUtf8());} virtual string iconUrl(RclConfig *, Rcl::Doc& doc); private: - ResList *m_parent; + ResList *m_reslist; }; #if 0 @@ -110,7 +110,7 @@ bool QtGuiResListPager::append(const string& data) { LOGDEB2(("QtGuiReslistPager::appendString : %s\n", data.c_str())); logdata(data.c_str()); - m_parent->append(QString::fromUtf8(data.c_str())); + m_reslist->append(QString::fromUtf8(data.c_str())); return true; } @@ -118,24 +118,24 @@ bool QtGuiResListPager::append(const string& data, int docnum, const Rcl::Doc&) { LOGDEB2(("QtGuiReslistPager::appendDoc: blockCount %d, %s\n", - m_parent->document()->blockCount(), data.c_str())); + m_reslist->document()->blockCount(), data.c_str())); logdata(data.c_str()); #ifdef RESLIST_TEXTBROWSER - int blkcnt0 = m_parent->document()->blockCount(); - m_parent->moveCursor(QTextCursor::End, QTextCursor::MoveAnchor); - m_parent->textCursor().insertBlock(); - m_parent->insertHtml(QString::fromUtf8(data.c_str())); - m_parent->moveCursor(QTextCursor::Start, QTextCursor::MoveAnchor); - m_parent->ensureCursorVisible(); - int blkcnt1 = m_parent->document()->blockCount(); + int blkcnt0 = m_reslist->document()->blockCount(); + m_reslist->moveCursor(QTextCursor::End, QTextCursor::MoveAnchor); + m_reslist->textCursor().insertBlock(); + m_reslist->insertHtml(QString::fromUtf8(data.c_str())); + m_reslist->moveCursor(QTextCursor::Start, QTextCursor::MoveAnchor); + m_reslist->ensureCursorVisible(); + int blkcnt1 = m_reslist->document()->blockCount(); for (int block = blkcnt0; block < blkcnt1; block++) { - m_parent->m_pageParaToReldocnums[block] = docnum; + m_reslist->m_pageParaToReldocnums[block] = docnum; } #else QString sdoc = QString("
").arg(docnum); - m_parent->append(sdoc); - m_parent->append(QString::fromUtf8(data.c_str())); - m_parent->append("
"); + m_reslist->append(sdoc); + m_reslist->append(QString::fromUtf8(data.c_str())); + m_reslist->append(""); #endif return true; } @@ -276,7 +276,8 @@ static PlainToRichQtReslist g_hiliter; ///////////////////////////////////// ResList::ResList(QWidget* parent, const char* name) - : RESLIST_PARENTCLASS(parent), m_parent(0) + : RESLIST_PARENTCLASS(parent), m_curPvDoc(-1), m_lstClckMod(0), + m_listId(0), m_rclmain(0), m_ismainlist(true), m_coninit(false) { if (!name) setObjectName("resList"); @@ -299,6 +300,7 @@ ResList::ResList(QWidget* parent, const char* name) page()->setLinkDelegationPolicy(QWebPage::DelegateAllLinks); settings()->setAttribute(QWebSettings::JavascriptEnabled, true); #endif + setFont(); languageChange(); @@ -311,13 +313,11 @@ ResList::ResList(QWidget* parent, const char* name) connect(this, SIGNAL(highlighted(const QString &)), this, SLOT(highlighted(const QString &))); #endif + setContextMenuPolicy(Qt::CustomContextMenu); connect(this, SIGNAL(customContextMenuRequested(const QPoint&)), this, SLOT(createPopupMenu(const QPoint&))); - m_curPvDoc = -1; - m_lstClckMod = 0; - m_listId = 0; m_pager = new QtGuiResListPager(this, prefs.respagesize); m_pager->setHighLighter(&g_hiliter); } @@ -379,6 +379,17 @@ void ResList::setDocSource(RefCntr nsource) { LOGDEB(("ResList::setDocSource()\n")); m_source = RefCntr(new DocSource(theconfig, nsource)); + if (!m_ismainlist && !m_coninit) { + m_coninit = true; + connect(new QShortcut(closeKeySeq, this), SIGNAL (activated()), + this, SLOT (close())); + connect(new QShortcut(quitKeySeq, this), SIGNAL (activated()), + m_rclmain, SLOT (fileExit())); + connect(this, SIGNAL(previewRequested(Rcl::Doc)), + m_rclmain, SLOT(startPreview(Rcl::Doc))); + connect(this, SIGNAL(docEditClicked(Rcl::Doc)), + m_rclmain, SLOT(startNativeViewer(Rcl::Doc))); + } } // A query was executed, or the filtering/sorting parameters changed, @@ -715,7 +726,6 @@ void ResList::displayPage() // Possibly color paragraph of current preview if any previewExposed(m_curPvDoc); - } // Color paragraph (if any) of currently visible preview @@ -804,11 +814,11 @@ void ResList::mouseDoubleClickEvent(QMouseEvent *event) void ResList::newSnippetsW(const Rcl::Doc& doc) { SnippetsW *sp = new SnippetsW(doc, m_source); - if (m_parent) { + if (m_rclmain) { connect(sp, SIGNAL(startNativeViewer(Rcl::Doc, int, QString)), - m_parent, SLOT(startNativeViewer(Rcl::Doc, int, QString))); + m_rclmain, SLOT(startNativeViewer(Rcl::Doc, int, QString))); connect(new QShortcut(quitKeySeq, sp), SIGNAL (activated()), - m_parent, SLOT (fileExit())); + m_rclmain, SLOT (fileExit())); } connect(new QShortcut(closeKeySeq, sp), SIGNAL (activated()), sp, SLOT (close())); @@ -841,6 +851,18 @@ void ResList::newDupsW(const Rcl::Doc&, const vector& dups) dialog.exec(); } +void ResList::showQueryDetails() +{ + if (m_source.isNull()) + return; + string oq = breakIntoLines(m_source->getDescription(), 100, 50); + QString str; + QString desc = tr("Result count (est.)") + ": " + + str.setNum(m_source->getResCnt()) + "
"; + desc += tr("Query details") + ": " + QString::fromUtf8(oq.c_str()); + QMessageBox::information(this, tr("Query details"), desc); +} + void ResList::linkWasClicked(const QUrl &url) { string ascurl = (const char *)url.toString().toAscii();; @@ -885,7 +907,7 @@ void ResList::linkWasClicked(const QUrl &url) // Show query details case 'H': { - emit headerClicked(); + showQueryDetails(); break; } @@ -899,10 +921,15 @@ void ResList::linkWasClicked(const QUrl &url) LOGERR(("ResList::linkWasClicked: can't get doc for %d\n", i)); return; } - if (what == 'P') - emit docPreviewClicked(i, doc, m_lstClckMod); - else + if (what == 'P') { + if (m_ismainlist) { + emit docPreviewClicked(i, doc, m_lstClckMod); + } else { + emit previewRequested(doc); + } + } else { emit docEditClicked(doc); + } } break; @@ -967,7 +994,8 @@ void ResList::createPopupMenu(const QPoint& pos) if (havedoc) doc.getmeta(Rcl::Doc::keyapptg, &apptag); - if (havedoc && !theconfig->getMimeViewerDef(doc.mimetype, apptag, 0).empty()) { + if (havedoc && + !theconfig->getMimeViewerDef(doc.mimetype, apptag, 0).empty()) { popup->addAction(tr("&Open"), this, SLOT(menuEdit())); } popup->addAction(tr("Copy &File Name"), this, SLOT(menuCopyFN())); @@ -990,6 +1018,10 @@ void ResList::createPopupMenu(const QPoint& pos) popup->addAction(tr("Open &Snippets window"), this, SLOT(menuOpenSnippets())); + if (havedoc && rcldb && rcldb->hasSubDocs(doc)) + popup->addAction(tr("Show subdocuments / attachments"), + this, SLOT(menuShowSubDocs())); + popup->popup(mapToGlobal(pos)); } @@ -1051,6 +1083,13 @@ void ResList::menuOpenSnippets() newSnippetsW(doc); } +void ResList::menuShowSubDocs() +{ + Rcl::Doc doc; + if (getDoc(m_popDoc, doc)) + emit showSubDocs(doc); +} + void ResList::menuEdit() { Rcl::Doc doc; diff --git a/src/qtgui/reslist.h b/src/qtgui/reslist.h index cd6e072d..ba1cd914 100644 --- a/src/qtgui/reslist.h +++ b/src/qtgui/reslist.h @@ -69,7 +69,11 @@ class ResList : public RESLIST_PARENTCLASS void setFont(); void setRclMain(RclMain *m) { - m_parent = m; + m_rclmain = m; + } + void setIsMainList(bool onoff) + { + m_ismainlist = onoff; } public slots: virtual void setDocSource(RefCntr nsource); @@ -89,11 +93,13 @@ class ResList : public RESLIST_PARENTCLASS virtual void menuPreviewParent(); virtual void menuOpenParent(); virtual void menuOpenSnippets(); + virtual void menuShowSubDocs(); virtual void previewExposed(int); virtual void append(const QString &text); virtual void readDocSource(); virtual void highlighted(const QString& link); virtual void createPopupMenu(const QPoint& pos); + virtual void showQueryDetails(); signals: void nextPageAvailable(bool); @@ -102,8 +108,8 @@ class ResList : public RESLIST_PARENTCLASS void docPreviewClicked(int, Rcl::Doc, int); void docSaveToFileClicked(Rcl::Doc); void previewRequested(Rcl::Doc); + void showSubDocs(Rcl::Doc); void editRequested(Rcl::Doc); - void headerClicked(); void docExpand(Rcl::Doc); void wordSelect(QString); void wordReplace(const QString&, const QString&); @@ -137,7 +143,9 @@ class ResList : public RESLIST_PARENTCLASS // so we store the page and display it when done. QString m_text; #endif - RclMain *m_parent; + RclMain *m_rclmain; + bool m_ismainlist; + bool m_coninit; virtual void displayPage(); // Display current page static int newListId(); diff --git a/src/query/docseq.cpp b/src/query/docseq.cpp index b080c456..327bf0ad 100644 --- a/src/query/docseq.cpp +++ b/src/query/docseq.cpp @@ -18,6 +18,7 @@ #include "filtseq.h" #include "sortseq.h" #include "debuglog.h" +#include "internfile.h" string DocSequence::o_sort_trans; string DocSequence::o_filt_trans; @@ -35,6 +36,19 @@ int DocSequence::getSeqSlice(int offs, int cnt, vector& result) return ret; } +bool DocSequence::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) +{ + // Note: no need for setQuery here, we're just passing through a + // query-independant request + + string udi; + if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath, + udi)) + return false; + bool dbret = getDb()->getDoc(udi, pdoc); + return dbret && pdoc.pc != -1; +} + // Remove stacked modifying sources (sort, filter) until we get to a real one void DocSource::stripStack() { @@ -107,3 +121,4 @@ bool DocSource::setSortSpec(const DocSeqSortSpec &s) buildStack(); return true; } + diff --git a/src/query/docseq.h b/src/query/docseq.h index 68ff3847..aee8c247 100644 --- a/src/query/docseq.h +++ b/src/query/docseq.h @@ -114,7 +114,7 @@ class DocSequence { return false; } - virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&) = 0; + virtual bool getEnclosing(Rcl::Doc&, Rcl::Doc&); /** Get estimated total count in results */ virtual int getResCnt() = 0; @@ -159,10 +159,15 @@ class DocSequence { o_sort_trans = sort; o_filt_trans = filt; } + + virtual Rcl::Db *getDb() = 0; + protected: + static std::string o_sort_trans; static std::string o_filt_trans; std::string m_reason; + private: std::string m_title; }; @@ -228,14 +233,29 @@ public: return string(); return m_seq->getReason(); } - virtual std::string title() {return m_seq->title();} - virtual RefCntr getSourceSeq() {return m_seq;} + virtual std::string title() + { + return m_seq->title(); + } + virtual RefCntr getSourceSeq() + { + return m_seq; + } + + virtual Rcl::Db *getDb() + { + if (m_seq.isNull()) + return 0; + return m_seq->getDb(); + } protected: + RefCntr m_seq; }; class RclConfig; + // A DocSource can juggle docseqs of different kinds to implement // sorting and filtering in ways depending on the base seqs capabilities class DocSource : public DocSeqModifier { diff --git a/src/query/docseqdb.cpp b/src/query/docseqdb.cpp index db71e505..86c0a41d 100644 --- a/src/query/docseqdb.cpp +++ b/src/query/docseqdb.cpp @@ -23,7 +23,6 @@ using std::list; #include "docseqdb.h" #include "rcldb.h" #include "debuglog.h" -#include "internfile.h" #include "wasatorcl.h" DocSequenceDb::DocSequenceDb(RefCntr q, const string &t, @@ -129,17 +128,9 @@ int DocSequenceDb::getFirstMatchPage(Rcl::Doc &doc, string& term) return -1; } -bool DocSequenceDb::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) +Rcl::Db *DocSequenceDb::getDb() { - // Note: no need for setQuery here, we're just passing through a - // query-independant request - - string udi; - if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath, - udi)) - return false; - bool dbret = m_q->whatDb()->getDoc(udi, pdoc); - return dbret && pdoc.pc != -1; + return m_q.isNotNull() ? m_q->whatDb() : 0; } list DocSequenceDb::expand(Rcl::Doc &doc) diff --git a/src/query/docseqdb.h b/src/query/docseqdb.h index 02bda96d..a6bd5b7d 100644 --- a/src/query/docseqdb.h +++ b/src/query/docseqdb.h @@ -38,7 +38,7 @@ class DocSequenceDb : public DocSequence { virtual bool getAbstract(Rcl::Doc &doc, vector&); virtual int getFirstMatchPage(Rcl::Doc&, std::string& term); - virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc); + virtual Rcl::Db *getDb(); virtual bool docDups(const Rcl::Doc& doc, std::vector& dups); virtual string getDescription(); virtual list expand(Rcl::Doc &doc); diff --git a/src/query/docseqdocs.h b/src/query/docseqdocs.h new file mode 100644 index 00000000..e957cce8 --- /dev/null +++ b/src/query/docseqdocs.h @@ -0,0 +1,69 @@ +/* Copyright (C) 2004-2013 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _DOCSEQDOCS_H_INCLUDED_ +#define _DOCSEQDOCS_H_INCLUDED_ + +#include "docseq.h" +#include "rcldoc.h" + +namespace Rcl { + class Db; +} + +/** A DocSequence that's just built from a bunch of docs */ +class DocSequenceDocs : public DocSequence { + public: + DocSequenceDocs(Rcl::Db *d, const std::vector docs, + const string &t) + : DocSequence(t), m_db(d), m_docs(docs) + { + } + virtual ~DocSequenceDocs() + { + } + virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) + { + if (sh) + *sh = string(); + if (num < 0 || num >= int(m_docs.size())) + return false; + doc = m_docs[num]; + return true; + } + virtual Rcl::Db *getDb() + { + return m_db; + } + virtual int getResCnt() + { + return m_docs.size(); + } + virtual string getDescription() + { + return m_description; + } + void setDescription(const string& desc) + { + m_description = desc; + } + private: + Rcl::Db *m_db; + string m_description; + std::vector m_docs; +}; + +#endif /* _DOCSEQ_H_INCLUDED_ */ diff --git a/src/query/docseqhist.cpp b/src/query/docseqhist.cpp index 2837325a..0025360e 100644 --- a/src/query/docseqhist.cpp +++ b/src/query/docseqhist.cpp @@ -25,7 +25,6 @@ using std::list; #include "docseqhist.h" #include "rcldb.h" #include "fileudi.h" -#include "internfile.h" #include "base64.h" #include "debuglog.h" #include "smallut.h" @@ -145,14 +144,9 @@ bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, string *sh) return ret; } -bool DocSequenceHistory::getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) +Rcl::Db *DocSequenceHistory::getDb() { - string udi; - if (!FileInterner::getEnclosing(doc.url, doc.ipath, pdoc.url, pdoc.ipath, - udi)) - return false; - bool dbret = m_db->getDoc(udi, pdoc); - return dbret && pdoc.pc != -1; + return m_db; } int DocSequenceHistory::getResCnt() diff --git a/src/query/docseqhist.h b/src/query/docseqhist.h index 82195402..8a2f0db2 100644 --- a/src/query/docseqhist.h +++ b/src/query/docseqhist.h @@ -48,7 +48,7 @@ class DocSequenceHistory : public DocSequence { virtual ~DocSequenceHistory() {} virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0); - virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc); + virtual Rcl::Db *getDb(); virtual int getResCnt(); virtual string getDescription() {return m_description;} void setDescription(const string& desc) {m_description = desc;} diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index e47c6efb..3ca55fd5 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -80,6 +80,9 @@ string start_of_field_term; string end_of_field_term; const string page_break_term = "XXPG/"; +// Special term to mark documents with children. +const string has_children_term("XXC/"); + // Field name for the unsplit file name. Has to exist in the field file // because of usage in termmatch() const string unsplitFilenameFieldName = "rclUnsplitFN"; @@ -235,6 +238,73 @@ bool Db::Native::subDocs(const string &udi, vector& docids) } } +bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi) +{ + Xapian::TermIterator xit; + XAPTRY(xit = xdoc.termlist_begin(); + xit.skip_to(wrap_prefix(udi_prefix)), + xrdb, m_rcldb->m_reason); + if (!m_rcldb->m_reason.empty()) { + LOGERR(("xdocToUdi: xapian error: %s\n", m_rcldb->m_reason.c_str())); + return false; + } + if (xit != xdoc.termlist_end()) { + udi = *xit; + if (!udi.empty()) { + udi = udi.substr(wrap_prefix(udi_prefix).size()); + return true; + } + } + return false; +} + +// Check if doc given by udi is indexed by term +bool Db::Native::hasTerm(const string& udi, const string& term) +{ + LOGDEB2(("Native::hasTerm: udi [%s] term [%s]\n",udi.c_str(),term.c_str())); + Xapian::Document xdoc; + if (getDoc(udi, xdoc)) { + Xapian::TermIterator xit; + XAPTRY(xit = xdoc.termlist_begin(); + xit.skip_to(term);, + xrdb, m_rcldb->m_reason); + if (!m_rcldb->m_reason.empty()) { + LOGERR(("Rcl::Native::hasTerm: %s\n", m_rcldb->m_reason.c_str())); + return false; + } + if (xit != xdoc.termlist_end() && !term.compare(*xit)) { + return true; + } + } + return false; +} + +// Retrieve Xapian document, given udi +Xapian::docid Db::Native::getDoc(const string& udi, Xapian::Document& xdoc) +{ + string uniterm = make_uniterm(udi); + for (int tries = 0; tries < 2; tries++) { + try { + Xapian::PostingIterator docid = xrdb.postlist_begin(uniterm); + if (docid == xrdb.postlist_end(uniterm)) { + // Udi not in Db. + return 0; + } else { + xdoc = xrdb.get_document(*docid); + return *docid; + } + } catch (const Xapian::DatabaseModifiedError &e) { + m_rcldb->m_reason = e.get_msg(); + xrdb.reopen(); + continue; + } XCATCHERROR(m_rcldb->m_reason); + break; + } + LOGERR(("Db::Native::getDoc: Xapian error: %s\n", + m_rcldb->m_reason.c_str())); + return 0; +} + // Turn data record from db into document fields bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc) @@ -492,6 +562,7 @@ bool Db::Native::purgeFileWrite(bool orphansOnly, const string& udi, return false; } + /* Rcl::Db methods ///////////////////////////////// */ bool Db::o_inPlaceReset; @@ -1210,7 +1281,9 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) leftzeropad(doc.fbytes, 12); newdocument.add_value(VALUE_SIZE, doc.fbytes); } - + if (doc.haschildren) { + newdocument.add_boolean_term(has_children_term); + } if (!doc.pcbytes.empty()) RECORD_APPEND(record, Doc::keypcs, doc.pcbytes); char sizebuf[30]; @@ -1697,26 +1770,116 @@ bool Db::getDoc(const string &udi, Doc &doc) // will make partial display in case of error doc.meta[Rcl::Doc::keyrr] = "100%"; doc.pc = 100; + Xapian::Document xdoc; + Xapian::docid docid; + if ((docid = m_ndb->getDoc(udi, xdoc))) { + string data = xdoc.get_data(); + doc.meta[Rcl::Doc::keyudi] = udi; + return m_ndb->dbDataToRclDoc(docid, data, doc); + } else { + // Document found in history no longer in the + // database. We return true (because their might be + // other ok docs further) but indicate the error with + // pc = -1 + doc.pc = -1; + LOGINFO(("Db:getDoc: no such doc in index: [%s]\n", udi.c_str())); + return true; + } +} - string uniterm = make_uniterm(udi); +bool Db::hasSubDocs(const Doc &idoc) +{ + if (m_ndb == 0) + return false; + string inudi; + if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) { + LOGERR(("Db::hasSubDocs: no input udi or empty\n")); + return false; + } + vector docids; + if (!m_ndb->subDocs(inudi, docids)) { + LOGDEB(("Db:getSubDocs: lower level subdocs failed\n")); + return false; + } + if (!docids.empty()) + return true; + + // Check if doc has an has_children term + if (m_ndb->hasTerm(inudi, has_children_term)) + return true; + return false; +} + +// Retrieve all subdocuments of a given one, which may not be a file-level +// one (in which case, we have to retrieve this first, then filter the ipaths) +bool Db::getSubDocs(const Doc &idoc, vector& subdocs) +{ + if (m_ndb == 0) + return false; + + string inudi; + if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) { + LOGERR(("Db::getSubDocs: no input udi or empty\n")); + return false; + } + + string rootudi; + string ipath = idoc.ipath; + if (ipath.empty()) { + // File-level doc. Use it as root + rootudi = inudi; + } else { + // See if we have a parent term + Xapian::Document xdoc; + if (!m_ndb->getDoc(inudi, xdoc)) { + LOGERR(("Db::getSubDocs: can't get Xapian document\n")); + return false; + } + Xapian::TermIterator xit; + XAPTRY(xit = xdoc.termlist_begin(); + xit.skip_to(wrap_prefix(parent_prefix)), + m_ndb->xrdb, m_reason); + if (!m_reason.empty()) { + LOGERR(("Db::getSubDocs: xapian error: %s\n", m_reason.c_str())); + return false; + } + if (xit == xdoc.termlist_end()) { + LOGERR(("Db::getSubDocs: parent term not found\n")); + return false; + } + rootudi = strip_prefix(*xit); + } + + LOGDEB(("Db::getSubDocs: root: [%s]\n", rootudi.c_str())); + + // Retrieve all subdoc xapian ids for the root + vector docids; + if (!m_ndb->subDocs(rootudi, docids)) { + LOGDEB(("Db:getSubDocs: lower level subdocs failed\n")); + return false; + } + + // Retrieve doc, filter, and build output list for (int tries = 0; tries < 2; tries++) { try { - if (!m_ndb->xrdb.term_exists(uniterm)) { - // Document found in history no longer in the - // database. We return true (because their might be - // other ok docs further) but indicate the error with - // pc = -1 - doc.pc = -1; - LOGINFO(("Db:getDoc: no such doc in index: [%s] (len %d)\n", - uniterm.c_str(), uniterm.length())); - return true; - } - Xapian::PostingIterator docid = - m_ndb->xrdb.postlist_begin(uniterm); - Xapian::Document xdoc = m_ndb->xrdb.get_document(*docid); - string data = xdoc.get_data(); - doc.meta[Rcl::Doc::keyudi] = udi; - return m_ndb->dbDataToRclDoc(*docid, data, doc); + for (vector::const_iterator it = docids.begin(); + it != docids.end(); it++) { + Xapian::Document xdoc = m_ndb->xrdb.get_document(*it); + string data = xdoc.get_data(); + string docudi; + m_ndb->xdocToUdi(xdoc, docudi); + Doc doc; + doc.meta[Doc::keyudi] = docudi; + doc.meta[Doc::keyrr] = "100%"; + doc.pc = 100; + if (!m_ndb->dbDataToRclDoc(*it, data, doc)) { + LOGERR(("Db::getSubDocs: doc conversion error\n")); + return false; + } + if (ipath.empty() || doc.ipath.find(ipath) == 0) + subdocs.push_back(doc); + } + return true; } catch (const Xapian::DatabaseModifiedError &e) { m_reason = e.get_msg(); m_ndb->xrdb.reopen(); @@ -1725,7 +1888,7 @@ bool Db::getDoc(const string &udi, Doc &doc) break; } - LOGERR(("Db::getDoc: %s\n", m_reason.c_str())); + LOGERR(("Db::getSubDocs: Xapian error: %s\n", m_reason.c_str())); return false; } diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 2d105f7c..61f350e9 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -357,6 +357,29 @@ class Db { */ bool getDoc(const string &udi, Doc &doc); + /** Test if documents has sub-documents. + * + * This can always be detected for file-level documents, using the + * postlist for the parent term constructed with udi. + * + * For non file-level documents (e.g.: does an email inside an + * mbox have attachments ?), detection is dependant on the filter + * having set an appropriate flag at index time. Higher level code + * can't detect it because the doc for the parent may have been + * seen before any children. The flag is stored as a value in the + * index. + */ + bool hasSubDocs(const Doc &idoc); + + /** Get subdocuments of given document. + * + * For file-level documents, these are all docs indexed by the + * parent term built on idoc.udi. For embedded documents, the + * parent doc is looked for, then its subdocs list is + * filtered using the idoc ipath as a prefix. + */ + bool getSubDocs(const Doc& idoc, vector& subdocs); + /** Get duplicates (md5) of document */ bool docDups(const Doc& idoc, std::vector& odocs); diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h index acab768a..8e32e02a 100644 --- a/src/rcldb/rcldb_p.h +++ b/src/rcldb/rcldb_p.h @@ -101,26 +101,39 @@ class Db::Native { bool addOrUpdateWrite(const string& udi, const string& uniterm, Xapian::Document& doc, size_t txtlen); + /** Delete all documents which are contained in the input document, + * which must be a file-level one. + * + * @param onlyOrphans if true, only delete documents which have + * not the same signature as the input. This is used to delete docs + * which do not exist any more in the file after an update, for + * example the tail messages after a folder truncation). If false, + * delete all. + * @param udi the parent document identifier. + * @param uniterm equivalent to udi, passed just to avoid recomputing. + */ bool purgeFileWrite(bool onlyOrphans, const string& udi, const string& uniterm); + bool getPagePositions(Xapian::docid docid, vector& vpos); int getPageNumberForPosition(const vector& pbreaks, unsigned int pos); bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc); - - bool xdocToUdi(Xapian::Document& xdoc, string &udi) - { - Xapian::TermIterator xit = xdoc.termlist_begin(); - xit.skip_to(wrap_prefix(udi_prefix)); - if (xit != xdoc.termlist_end()) { - udi = *xit; - if (!udi.empty()) { - udi = udi.substr(wrap_prefix(udi_prefix).size()); - return true; - } - } - return false; - } + + /** Retrieve Xapian::docid, given unique document identifier, + * using the posting list for the derived term. + * + * @return 0 if not found + */ + Xapian::docid getDoc(const string& udi, Xapian::Document& xdoc); + + /** Retrieve unique document identifier for given Xapian document, + * using the document termlist + */ + bool xdocToUdi(Xapian::Document& xdoc, string &udi); + + /** Check if doc is indexed by term */ + bool hasTerm(const string& udi, const string& term); /** Compute list of subdocuments for a given udi. We look for documents * indexed by a parent term matching the udi, the posting list for the @@ -131,14 +144,12 @@ class Db::Native { * Ie: in a mail folder, all messages, attachments, attachments of * attached messages etc. must have the folder file document as * parent. - * Parent-child relationships are defined by the indexer (rcldb user) + * + * Finer grain parent-child relationships are defined by the + * indexer (rcldb user), using the ipath. * - * The file-system indexer currently works this way (flatly), - * subDocs() could be relatively easily changed to support full recursivity - * if needed. */ bool subDocs(const string &udi, vector& docids); - }; // This is the word position offset at which we index the body text diff --git a/src/rcldb/rcldoc.cpp b/src/rcldb/rcldoc.cpp index 92b05df1..bdcb3102 100644 --- a/src/rcldb/rcldoc.cpp +++ b/src/rcldb/rcldoc.cpp @@ -20,7 +20,6 @@ namespace Rcl { const string Doc::keyabs("abstract"); - const string Doc::keyanc("rclanc"); const string Doc::keyapptg("rclaptg"); const string Doc::keyau("author"); const string Doc::keybcknd("rclbes"); diff --git a/src/rcldb/rcldoc.h b/src/rcldb/rcldoc.h index 2e366301..a7b62479 100644 --- a/src/rcldb/rcldoc.h +++ b/src/rcldb/rcldoc.h @@ -121,6 +121,10 @@ class Doc { // Page breaks were stored during indexing. bool haspages; + // Has children, either as content of file-level container or + // ipath descendants. + bool haschildren; + /////////////////////////////////////////////////////////////////// void erase() { @@ -141,9 +145,10 @@ class Doc { pc = 0; xdocid = 0; haspages = false; + haschildren = false; } Doc() - : syntabs(false), pc(0), xdocid(0), haspages(false) + : syntabs(false), pc(0), xdocid(0), haspages(false), haschildren(false) { } /** Get value for named field. If value pointer is 0, just test existence */ @@ -225,9 +230,6 @@ class Doc { static const string keyudi; static const string keyapptg; // apptag. Set from localfields (fsindexer) static const string keybght; // beagle hit type ("beagleHitType") - // Boolean used to indicate if the doc has descendants in the ipath sense - // (different from the file/contend parent_udi thing). - static const string keyanc; };