From 02556e7d088ce630cde956759d0768c0d42fba71 Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes
recoll -c ~/.indexes-email+
+ recoll -c ~/.indexes-email
Then Recoll would
use configuration files stored in ~/.indexes-email/ and, (unless
@@ -3874,8 +3874,8 @@ fs.inotify.max_user_watches=32768
that every user does not have to do it. The variable
should define a colon-separated list of index
directories, ie:
export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db+
+ export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db
Another environment variable, RECOLL_ACTIVE_EXTRA_DBS allows adding to
the active list of indexes. This variable was suggested
@@ -4677,8 +4677,8 @@ fs.inotify.max_user_watches=32768
parent folder expansion, usually creating a file
manager window on the folder where the container file
resides. E.g.:
<a href="F%N">%P</a>+
+ <a href="F%N">%P</a>
A link target defined as R%N|
@@ -4820,8 +4820,8 @@ fs.inotify.max_user_watches=32768
javascript program to
the documents, like the following example, which would
initiate a search by double-clicking any term:scriptname
<script language="JavaScript">
+
+ <script language="JavaScript">
function recollsearch() {
var t = document.getSelection();
window.location.href = 'recoll://search/query?qtp=a&p=0&q=' +
@@ -5115,7 +5115,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
ext specifies the
file name extension (Ex: ext:html)
rclmd5 the MD5
+ checksum for the document. This is used for
+ displaying the duplicates of a search result (when
+ querying with the option to collapse duplicate
+ results). Incidentally, this could be used to find
+ the duplicates of any given file by computing its MD5
+ checksum and executing a query with just the
+ rclmd5 value.
[~/.kde/share/apps/okular/docdata] ++ [~/.kde/share/apps/okular/docdata] .xml = application/x-okular-notesThe
recoll_noindexmimemapvariable has been diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index 66ccd5cb..fefe4b3f 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -3896,27 +3896,36 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r name for an email attachment.+ set for all documents, both top-level and contained + sub-documents, and is always the name of the filesystem directory + entry which contains the data. The terms from this field can + only be matched by an explicit field specification (as opposed + to terms from containerfilename . This is - set for all documents, both top-level and contained - sub-documents, and is always the name of the filesystem directory - entry which contains the data. The terms from this field can - only be matched by an explicit field specification (as opposed - to terms fromfilename which are also indexed - as general document content). This avoids getting matches for - all the sub-documents when searching for the container file - name.filename which are also indexed + as general document content). This avoids getting matches for + all the sub-documents when searching for the container file + name.+ + + name extension + (Ex: ext specifies the file - name extension (Ex:ext:html )ext:html ).rclmd5 the MD5 checksum for the + document. This is used for displaying the duplicates of a + search result (when querying with the option to collapse + duplicate results). Incidentally, this could be used to find + the duplicates of any given file by computing its MD5 checksum + and executing a query with just therclmd5 + value.&RCL; 1.20 and later have a way to specify aliases for the - field names, which will save typing, for example by aliasing - filename tofn or -containerfilename to -cfn . See the - section about thefields file. + field names, which will save typing, for example by aliasing +filename tofn or +containerfilename to +cfn . See the + section about thefields file.The document input handlers used while indexing have the diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 6cedda3c..5a94c2f2 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1866,10 +1866,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str()); } - // If the file's md5 was computed, add value and term. - // The value is optionally used for query result duplicate elimination, - // and the term to find the duplicates. - // We don't do this for empty docs. + // If the file's md5 was computed, add value and term. The + // value is optionally used for query result duplicate + // elimination, and the term to find the duplicates (XM is the + // prefix for rclmd5 in fields) We don't do this for empty + // docs. const string *md5; if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() && md5->compare(cstr_md5empty)) { diff --git a/src/rcldb/rcldups.cpp b/src/rcldb/rcldups.cpp index 1a8e3780..06f5266b 100644 --- a/src/rcldb/rcldups.cpp +++ b/src/rcldb/rcldups.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 J.F.Dockes +/* Copyright (C) 2013-2020 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -35,36 +35,36 @@ using namespace std; namespace Rcl { /** Retrieve the dups of a given document. The input has to be a query result - * because we use the xdocid. We get the md5 from this, then the dups */ + * because we use the xdocid. We get the md5 from this, then the dups */ bool Db::docDups(const Doc& idoc, vector & odocs) { if (m_ndb == 0) { - LOGERR("Db::docDups: no db\n" ); - return false; + LOGERR("Db::docDups: no db\n"); + return false; } if (idoc.xdocid == 0) { - LOGERR("Db::docDups: null xdocid in input doc\n" ); - return false; + LOGERR("Db::docDups: null xdocid in input doc\n"); + return false; } // Get the xapian doc Xapian::Document xdoc; XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)), - m_ndb->xrdb, m_reason); + m_ndb->xrdb, m_reason); if (!m_reason.empty()) { - LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" ); - return false; + LOGERR("Db::docDups: xapian error: " << m_reason << "\n"); + return false; } // Get the md5 string digest; XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason); if (!m_reason.empty()) { - LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" ); - return false; + LOGERR("Db::docDups: xapian error: " << m_reason << "\n"); + return false; } if (digest.empty()) { - LOGDEB("Db::docDups: doc has no md5\n" ); - return false; + LOGDEB("Db::docDups: doc has no md5\n"); + return false; } string md5; MD5HexPrint(digest, md5); @@ -72,45 +72,27 @@ bool Db::docDups(const Doc& idoc, vector & odocs) SearchData *sdp = new SearchData(); std::shared_ptr sd(sdp); SearchDataClauseSimple *sdc = - new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5"); + new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5"); sdc->addModifier(SearchDataClause::SDCM_CASESENS); sdc->addModifier(SearchDataClause::SDCM_DIACSENS); sd->addClause(sdc); Query query(this); query.setCollapseDuplicates(0); if (!query.setQuery(sd)) { - LOGERR("Db::docDups: setQuery failed\n" ); - return false; + LOGERR("Db::docDups: setQuery failed\n"); + return false; } int cnt = query.getResCnt(); for (int i = 0; i < cnt; i++) { - Doc doc; - if (!query.getDoc(i, doc)) { - LOGERR("Db::docDups: getDoc failed at " << (i) << " (cnt " << (cnt) << ")\n" ); - return false; - } - odocs.push_back(doc); + Doc doc; + if (!query.getDoc(i, doc)) { + LOGERR("Db::docDups: getDoc failed at " << i << " (cnt " << cnt << + ")\n"); + return false; + } + odocs.push_back(doc); } return true; } -#if 0 - { - vector dups; - bool ret; - LOGDEB("DOCDUPS\n" ); - ret = m_db->docDups(doc, dups); - if (!ret) { - LOGDEB("docDups failed\n" ); - } else if (dups.size() == 1) { - LOGDEB("No dups\n" ); - } else { - for (unsigned int i = 0; i < dups.size(); i++) { - LOGDEB("Dup: " << (dups[i].url) << "\n" ); - } - } - } -#endif - } -