diff --git a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp index 0b5774bc..329a88fb 100644 --- a/src/internfile/mh_exec.cpp +++ b/src/internfile/mh_exec.cpp @@ -25,6 +25,7 @@ static char rcsid[] = "@(#$Id: mh_exec.cpp,v 1.14 2008-10-09 09:19:37 dockes Exp #include "cancelcheck.h" #include "smallut.h" #include "transcode.h" +#include "md5.h" #include #include @@ -130,5 +131,14 @@ bool MimeHandlerExec::next_document() // could still be overridden by the content-type meta tag. m_metaData["charset"] = charset; m_metaData["mimetype"] = mt; + + string md5, xmd5, reason; + if (MD5File(m_fn, md5, &reason)) { + m_metaData["md5"] = MD5HexPrint(md5, xmd5); + } else { + LOGERR(("MimeHandlerExec: cant compute md5 for [%s]: %s\n", + m_fn.c_str(), reason.c_str())); + } + return true; } diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 2c9140cb..48cc173c 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -29,6 +29,7 @@ static char rcsid[] = "@(#$Id: mh_html.cpp,v 1.26 2008-10-03 06:17:46 dockes Exp #include "indextext.h" #include "mh_html.h" #include "smallut.h" +#include "md5.h" #include @@ -53,6 +54,12 @@ bool MimeHandlerHtml::set_document_string(const string& htext) { m_html = htext; m_havedoc = true; + + // We want to compute the md5 now because we may modify m_html later + string md5, xmd5; + MD5String(htext, md5); + m_metaData["md5"] = MD5HexPrint(md5, xmd5); + return true; } diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index 89dad472..789f16af 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -38,6 +38,7 @@ static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.35 2008-10-04 14:26:59 dockes Exp #include "mh_html.h" #include "rclconfig.h" #include "mimetype.h" +#include "md5.h" // binc imap mime definitions #include "mime.h" @@ -81,6 +82,18 @@ bool MimeHandlerMail::set_document_file(const string &fn) close(m_fd); m_fd = -1; } + + // Yes, we read the file twice. It would be possible in theory to add + // the md5 computation to the mime analysis, but ... + string md5, xmd5, reason; + if (MD5File(fn, md5, &reason)) { + m_metaData["md5"] = MD5HexPrint(md5, xmd5); + } else { + LOGERR(("MimeHandlerMail: cant compute md5 for [%s]: %s\n", fn.c_str(), + reason.c_str())); + } + + m_fd = open(fn.c_str(), 0); if (m_fd < 0) { LOGERR(("MimeHandlerMail::set_document_file: open(%s) errno %d\n", @@ -104,6 +117,11 @@ bool MimeHandlerMail::set_document_string(const string &msgtxt) LOGDEB1(("MimeHandlerMail::set_document_string\n")); LOGDEB2(("Message text: [%s]\n", msgtxt.c_str())); delete m_stream; + + string md5, xmd5; + MD5String(msgtxt, md5); + m_metaData["md5"] = MD5HexPrint(md5, xmd5); + m_stream = new stringstream(msgtxt); delete m_bincdoc; m_bincdoc = new Binc::MimeDocument; diff --git a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp index 5a76981f..c41430d6 100644 --- a/src/internfile/mh_text.cpp +++ b/src/internfile/mh_text.cpp @@ -29,19 +29,26 @@ using namespace std; #include "debuglog.h" #include "readfile.h" #include "transcode.h" +#include "md5.h" // Process a plain text file bool MimeHandlerText::set_document_file(const string &fn) { string otext; - if (!file_to_string(fn, otext)) + string reason; + if (!file_to_string(fn, otext, &reason)) { + LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str())); return false; + } return set_document_string(otext); } bool MimeHandlerText::set_document_string(const string& otext) { m_text = otext; + string md5, xmd5; + MD5String(m_text, md5); + m_metaData["md5"] = MD5HexPrint(md5, xmd5); m_havedoc = true; return true; } diff --git a/src/qtgui/guiutils.cpp b/src/qtgui/guiutils.cpp index f8112317..04ff6914 100644 --- a/src/qtgui/guiutils.cpp +++ b/src/qtgui/guiutils.cpp @@ -175,6 +175,8 @@ void rwSettings(bool writing) SETTING_RW(prefs.ssearchAutoPhrase, "/Recoll/prefs/ssearchAutoPhrase", Bool, false); SETTING_RW(prefs.respagesize, "/Recoll/prefs/reslist/pagelen", Num, 8); + SETTING_RW(prefs.collapseDuplicates, + "/Recoll/prefs/reslist/collapseDuplicates", Bool, false); SETTING_RW(prefs.maxhltextmbs, "/Recoll/prefs/preview/maxhltextmbs", Num, 3); SETTING_RW(prefs.qtermcolor, "/Recoll/prefs/qtermcolor", , "blue"); if (!writing && prefs.qtermcolor == "") diff --git a/src/qtgui/guiutils.h b/src/qtgui/guiutils.h index 93e992a9..9f213773 100644 --- a/src/qtgui/guiutils.h +++ b/src/qtgui/guiutils.h @@ -82,6 +82,7 @@ class PrefsPack { bool startWithAdvSearchOpen; bool startWithSortToolOpen; bool previewHtml; + bool collapseDuplicates; // Extra query indexes. This are encoded to base64 before storing // to the qt settings file to avoid any bin string/ charset conv issues list allExtraDbs; diff --git a/src/qtgui/rclmain_w.cpp b/src/qtgui/rclmain_w.cpp index 41df30e2..d3cea459 100644 --- a/src/qtgui/rclmain_w.cpp +++ b/src/qtgui/rclmain_w.cpp @@ -471,6 +471,7 @@ void RclMain::startSearch(RefCntr sdata) sdata->setStemlang(stemLang); Rcl::Query *query = new Rcl::Query(rcldb); + query->setCollapseDuplicates(prefs.collapseDuplicates); if (!query || !query->setQuery(sdata)) { QMessageBox::warning(0, "Recoll", tr("Can't start query: ") + diff --git a/src/qtgui/uiprefs.ui b/src/qtgui/uiprefs.ui index 334c177f..bb0af112 100644 --- a/src/qtgui/uiprefs.ui +++ b/src/qtgui/uiprefs.ui @@ -90,6 +90,20 @@ + + + collapseDupsCB + + + If checked, results with the same content under different names will only be shown once. + + + Hide duplicate results. + + + false + + qtermcolor diff --git a/src/qtgui/uiprefs_w.cpp b/src/qtgui/uiprefs_w.cpp index cffe42f6..33b2b6d2 100644 --- a/src/qtgui/uiprefs_w.cpp +++ b/src/qtgui/uiprefs_w.cpp @@ -97,6 +97,7 @@ void UIPrefsDialog::setFromPrefs() { // Entries per result page spinbox pageLenSB->setValue(prefs.respagesize); + collapseDupsCB->setChecked(prefs.collapseDuplicates); maxHLTSB->setValue(prefs.maxhltextmbs); autoSearchCB->setChecked(prefs.autoSearchOnWS); syntlenSB->setValue(prefs.syntAbsLen); @@ -173,6 +174,7 @@ void UIPrefsDialog::accept() { prefs.autoSearchOnWS = autoSearchCB->isChecked(); prefs.respagesize = pageLenSB->value(); + prefs.collapseDuplicates = collapseDupsCB->isChecked(); prefs.maxhltextmbs = maxHLTSB->value(); prefs.qtermcolor = qtermColorLE->text(); diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 1d172d71..1e5cf2fd 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -48,7 +48,7 @@ using namespace std; #include "searchdata.h" #include "rclquery.h" #include "rclquery_p.h" - +#include "md5.h" #ifndef MAX #define MAX(A,B) (A>B?A:B) @@ -57,15 +57,6 @@ using namespace std; #define MIN(A,B) (A sdata) string d; try { m_nq->enquire = new Xapian::Enquire(m_db->m_ndb->db); - m_nq->enquire->set_query(m_nq->query); + if (m_collapseDuplicates) { + m_nq->enquire->set_collapse_key(Rcl::VALUE_MD5); + } else { + m_nq->enquire->set_collapse_key(Xapian::BAD_VALUENO); + } if (!m_sortField.empty()) { if (m_sorter) { delete (QSorter*)m_sorter; @@ -167,6 +172,7 @@ bool Query::setQuery(RefCntr sdata) m_nq->enquire->set_sort_by_key((QSorter*)m_sorter, !m_sortAscending); } + m_nq->enquire->set_query(m_nq->query); m_nq->mset = Xapian::MSet(); // Get the query description and trim the "Xapian::Query" d = m_nq->query.get_description(); diff --git a/src/rcldb/rclquery.h b/src/rcldb/rclquery.h index bae86a54..10f7a3bb 100644 --- a/src/rcldb/rclquery.h +++ b/src/rcldb/rclquery.h @@ -54,6 +54,7 @@ class Query { /** Choose sort order. Must be called before setQuery */ void setSortBy(const string& fld, bool ascending = true); + void setCollapseDuplicates(bool on) {m_collapseDuplicates = on;} const string& getSortBy() const {return m_sortField;} bool getSortAscending() const {return m_sortAscending;} @@ -90,8 +91,9 @@ private: string m_reason; // Error explanation Db *m_db; void *m_sorter; - string m_sortField; - bool m_sortAscending; + string m_sortField; + bool m_sortAscending; + bool m_collapseDuplicates; /* Copyconst and assignement private and forbidden */ Query(const Query &) {} Query & operator=(const Query &) {return *this;};