diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index 78a7b56c..29c8d24e 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -3,7 +3,7 @@ + "HTML Tidy for HTML5 for Linux version 5.2.0"> Recoll user manual @@ -1135,8 +1135,8 @@ alink="#0000FF"> different areas of the file system to different indexes. For example, if you were to issue the following command:

-
recoll -c ~/.indexes-email
+
+              recoll -c ~/.indexes-email

Then Recoll would use configuration files stored in ~/.indexes-email/ and, (unless @@ -3874,8 +3874,8 @@ fs.inotify.max_user_watches=32768 that every user does not have to do it. The variable should define a colon-separated list of index directories, ie:

-
export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db
+
+          export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db

Another environment variable, RECOLL_ACTIVE_EXTRA_DBS allows adding to the active list of indexes. This variable was suggested @@ -4677,8 +4677,8 @@ fs.inotify.max_user_watches=32768 parent folder expansion, usually creating a file manager window on the folder where the container file resides. E.g.:

-
<a href="F%N">%P</a>
+
+              <a href="F%N">%P</a>

A link target defined as R%N|scriptname @@ -4820,8 +4820,8 @@ fs.inotify.max_user_watches=32768 javascript program to the documents, like the following example, which would initiate a search by double-clicking any term:

-
<script language="JavaScript">
+          
+          <script language="JavaScript">
         function recollsearch() {
         var t = document.getSelection();
         window.location.href = 'recoll://search/query?qtp=a&p=0&q=' +
@@ -5115,7 +5115,17 @@ text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
             
  • ext specifies the file name extension (Ex: ext:html)

    + "literal">ext:html).

    +
  • +
  • +

    rclmd5 the MD5 + checksum for the document. This is used for + displaying the duplicates of a search result (when + querying with the option to collapse duplicate + results). Incidentally, this could be used to find + the duplicates of any given file by computing its MD5 + checksum and executing a query with just the + rclmd5 value.

  • @@ -10055,8 +10065,8 @@ for i in range(nres): "filename">.xml extension but should be handled specially, which is possible because they are usually all located in one place. Example:

    -
    [~/.kde/share/apps/okular/docdata]
    +          
    +          [~/.kde/share/apps/okular/docdata]
             .xml = application/x-okular-notes

    The recoll_noindex mimemap variable has been diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index 66ccd5cb..fefe4b3f 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -3896,27 +3896,36 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r name for an email attachment. containerfilename. This is - set for all documents, both top-level and contained - sub-documents, and is always the name of the filesystem directory - entry which contains the data. The terms from this field can - only be matched by an explicit field specification (as opposed - to terms from filename which are also indexed - as general document content). This avoids getting matches for - all the sub-documents when searching for the container file - name. + set for all documents, both top-level and contained + sub-documents, and is always the name of the filesystem directory + entry which contains the data. The terms from this field can + only be matched by an explicit field specification (as opposed + to terms from filename which are also indexed + as general document content). This avoids getting matches for + all the sub-documents when searching for the container file + name. ext specifies the file - name extension (Ex: ext:html) + name extension + (Ex: ext:html). + + rclmd5 the MD5 checksum for the + document. This is used for displaying the duplicates of a + search result (when querying with the option to collapse + duplicate results). Incidentally, this could be used to find + the duplicates of any given file by computing its MD5 checksum + and executing a query with just the rclmd5 + value. &RCL; 1.20 and later have a way to specify aliases for the - field names, which will save typing, for example by aliasing - filename to fn or - containerfilename to - cfn. See the - section about the fields file. + field names, which will save typing, for example by aliasing + filename to fn or + containerfilename to + cfn. See the + section about the fields file. The document input handlers used while indexing have the diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 6cedda3c..5a94c2f2 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1866,10 +1866,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str()); } - // If the file's md5 was computed, add value and term. - // The value is optionally used for query result duplicate elimination, - // and the term to find the duplicates. - // We don't do this for empty docs. + // If the file's md5 was computed, add value and term. The + // value is optionally used for query result duplicate + // elimination, and the term to find the duplicates (XM is the + // prefix for rclmd5 in fields) We don't do this for empty + // docs. const string *md5; if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() && md5->compare(cstr_md5empty)) { diff --git a/src/rcldb/rcldups.cpp b/src/rcldb/rcldups.cpp index 1a8e3780..06f5266b 100644 --- a/src/rcldb/rcldups.cpp +++ b/src/rcldb/rcldups.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2013 J.F.Dockes +/* Copyright (C) 2013-2020 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -35,36 +35,36 @@ using namespace std; namespace Rcl { /** Retrieve the dups of a given document. The input has to be a query result - * because we use the xdocid. We get the md5 from this, then the dups */ + * because we use the xdocid. We get the md5 from this, then the dups */ bool Db::docDups(const Doc& idoc, vector& odocs) { if (m_ndb == 0) { - LOGERR("Db::docDups: no db\n" ); - return false; + LOGERR("Db::docDups: no db\n"); + return false; } if (idoc.xdocid == 0) { - LOGERR("Db::docDups: null xdocid in input doc\n" ); - return false; + LOGERR("Db::docDups: null xdocid in input doc\n"); + return false; } // Get the xapian doc Xapian::Document xdoc; XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)), - m_ndb->xrdb, m_reason); + m_ndb->xrdb, m_reason); if (!m_reason.empty()) { - LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" ); - return false; + LOGERR("Db::docDups: xapian error: " << m_reason << "\n"); + return false; } // Get the md5 string digest; XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason); if (!m_reason.empty()) { - LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" ); - return false; + LOGERR("Db::docDups: xapian error: " << m_reason << "\n"); + return false; } if (digest.empty()) { - LOGDEB("Db::docDups: doc has no md5\n" ); - return false; + LOGDEB("Db::docDups: doc has no md5\n"); + return false; } string md5; MD5HexPrint(digest, md5); @@ -72,45 +72,27 @@ bool Db::docDups(const Doc& idoc, vector& odocs) SearchData *sdp = new SearchData(); std::shared_ptr sd(sdp); SearchDataClauseSimple *sdc = - new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5"); + new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5"); sdc->addModifier(SearchDataClause::SDCM_CASESENS); sdc->addModifier(SearchDataClause::SDCM_DIACSENS); sd->addClause(sdc); Query query(this); query.setCollapseDuplicates(0); if (!query.setQuery(sd)) { - LOGERR("Db::docDups: setQuery failed\n" ); - return false; + LOGERR("Db::docDups: setQuery failed\n"); + return false; } int cnt = query.getResCnt(); for (int i = 0; i < cnt; i++) { - Doc doc; - if (!query.getDoc(i, doc)) { - LOGERR("Db::docDups: getDoc failed at " << (i) << " (cnt " << (cnt) << ")\n" ); - return false; - } - odocs.push_back(doc); + Doc doc; + if (!query.getDoc(i, doc)) { + LOGERR("Db::docDups: getDoc failed at " << i << " (cnt " << cnt << + ")\n"); + return false; + } + odocs.push_back(doc); } return true; } -#if 0 - { - vector dups; - bool ret; - LOGDEB("DOCDUPS\n" ); - ret = m_db->docDups(doc, dups); - if (!ret) { - LOGDEB("docDups failed\n" ); - } else if (dups.size() == 1) { - LOGDEB("No dups\n" ); - } else { - for (unsigned int i = 0; i < dups.size(); i++) { - LOGDEB("Dup: " << (dups[i].url) << "\n" ); - } - } - } -#endif - } -