doc and comments

2020-06-25 16:06:45 +02:00 · 2020-06-25 16:06:45 +02:00 · 02556e7d08
commit 02556e7d08
parent 101a566dec
4 changed files with 73 additions and 71 deletions
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
@ -3,7 +3,7 @@
 <html>
 <head>
  <meta name="generator" content=
-  "HTML Tidy for HTML5 for Linux version 5.6.0">
+  "HTML Tidy for HTML5 for Linux version 5.2.0">
  <meta http-equiv="Content-Type" content=
  "text/html; charset=utf-8">
  <title>Recoll user manual</title>
@ -1135,8 +1135,8 @@ alink="#0000FF">
              different areas of the file system to different
              indexes. For example, if you were to issue the
              following command:</p>
-              <pre class=
+              <pre class="programlisting">
-              "programlisting">recoll -c ~/.indexes-email</pre>
+              recoll -c ~/.indexes-email</pre>
              <p>Then <span class="application">Recoll</span> would
              use configuration files stored in <code class=
              "filename">~/.indexes-email/</code> and, (unless
@ -3874,8 +3874,8 @@ fs.inotify.max_user_watches=32768
          that every user does not have to do it. The variable
          should define a colon-separated list of index
          directories, ie:</p>
-          <pre class=
+          <pre class="screen">
-          "screen">export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre>
+          export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre>
          <p>Another environment variable, <code class=
          "envar">RECOLL_ACTIVE_EXTRA_DBS</code> allows adding to
          the active list of indexes. This variable was suggested
@ -4677,8 +4677,8 @@ fs.inotify.max_user_watches=32768
              parent folder expansion, usually creating a file
              manager window on the folder where the container file
              resides. E.g.:</p>
-              <pre class=
+              <pre class="programlisting">
-              "programlisting">&lt;a href="F%N"&gt;%P&lt;/a&gt;</pre>
+              &lt;a href="F%N"&gt;%P&lt;/a&gt;</pre>
              <p>A link target defined as <code class=
              "literal">R%N|<em class=
              "replaceable"><code>scriptname</code></em></code>
@ -4820,8 +4820,8 @@ fs.inotify.max_user_watches=32768
          <span class="application">javascript</span> program to
          the documents, like the following example, which would
          initiate a search by double-clicking any term:</p>
-          <pre class=
+          <pre class="programlisting">
-          "programlisting">&lt;script language="JavaScript"&gt;
+          &lt;script language="JavaScript"&gt;
        function recollsearch() {
        var t = document.getSelection();
        window.location.href = 'recoll://search/query?qtp=a&amp;p=0&amp;q=' +
@ -5115,7 +5115,17 @@ text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
            <li class="listitem">
              <p><code class="literal">ext</code> specifies the
              file name extension (Ex: <code class=
-              "literal">ext:html</code>)</p>
+              "literal">ext:html</code>).</p>
            </li>
            <li class="listitem">
              <p><code class="literal">rclmd5</code> the MD5
              checksum for the document. This is used for
              displaying the duplicates of a search result (when
              querying with the option to collapse duplicate
              results). Incidentally, this could be used to find
              the duplicates of any given file by computing its MD5
              checksum and executing a query with just the
              <code class="literal">rclmd5</code> value.</p>
            </li>
          </ul>
        </div>
@ -10055,8 +10065,8 @@ for i in range(nres):
          "filename">.xml</code> extension but should be handled
          specially, which is possible because they are usually all
          located in one place. Example:</p>
-          <pre class=
+          <pre class="programlisting">
-          "programlisting">[~/.kde/share/apps/okular/docdata]
+          [~/.kde/share/apps/okular/docdata]
        .xml = application/x-okular-notes</pre>
          <p>The <code class="varname">recoll_noindex</code>
          <code class="filename">mimemap</code> variable has been
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
@ -3896,27 +3896,36 @@ text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
        name for an email attachment.</para></listitem> 
        <listitem><para><literal>containerfilename</literal>. This is
-        set for all documents, both top-level and contained
+            set for all documents, both top-level and contained
-        sub-documents, and is always the name of the filesystem directory
+            sub-documents, and is always the name of the filesystem directory
-        entry which contains the data. The terms from this field can
+            entry which contains the data. The terms from this field can
-        only be matched by an explicit field specification (as opposed
+            only be matched by an explicit field specification (as opposed
-        to terms from <literal>filename</literal> which are also indexed
+            to terms from <literal>filename</literal> which are also indexed
-        as general document content). This avoids getting matches for
+            as general document content). This avoids getting matches for
-        all the sub-documents when searching for the container file
+            all the sub-documents when searching for the container file
-        name.</para></listitem> 
+            name.</para></listitem> 
        <listitem><para><literal>ext</literal> specifies the file
-        name extension (Ex: <literal>ext:html</literal>)</para>
+            name extension
            (Ex: <literal>ext:html</literal>).</para></listitem>
        <listitem><para><literal>rclmd5</literal> the MD5 checksum for the
            document. This is used for displaying the duplicates of a
            search result (when querying with the option to collapse
            duplicate results). Incidentally, this could be used to find
            the duplicates of any given file by computing its MD5 checksum
            and executing a query with just the <literal>rclmd5</literal>
            value.</para>
        </listitem>
      </itemizedlist>
      <para>&RCL; 1.20 and later have a way to specify aliases for the
-      field names, which will save typing, for example by aliasing
+        field names, which will save typing, for example by aliasing
-      <literal>filename</literal> to <replaceable>fn</replaceable> or
+        <literal>filename</literal> to <replaceable>fn</replaceable> or
-      <literal>containerfilename</literal> to
+        <literal>containerfilename</literal> to
-      <replaceable>cfn</replaceable>. See the
+        <replaceable>cfn</replaceable>. See the
-      <link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>.
+        <link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>.
      </para> 
      <para>The document input handlers used while indexing have the
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@ -1866,10 +1866,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
            RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str());
        }
-        // If the file's md5 was computed, add value and term. 
+        // If the file's md5 was computed, add value and term.  The
-        // The value is optionally used for query result duplicate elimination, 
+        // value is optionally used for query result duplicate
-        // and the term to find the duplicates.
+        // elimination, and the term to find the duplicates (XM is the
-        // We don't do this for empty docs.
+        // prefix for rclmd5 in fields) We don't do this for empty
        // docs.
        const string *md5;
        if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() &&
            md5->compare(cstr_md5empty)) {
--- a/src/rcldb/rcldups.cpp
+++ b/src/rcldb/rcldups.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2013 J.F.Dockes
+/* Copyright (C) 2013-2020 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -35,36 +35,36 @@ using namespace std;
 namespace Rcl {
 /** Retrieve the dups of a given document. The input has to be a query result
-  * because we use the xdocid. We get the md5 from this, then the dups */
+ * because we use the xdocid. We get the md5 from this, then the dups */
 bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
 {
    if (m_ndb == 0) {
-    LOGERR("Db::docDups: no db\n" );
+        LOGERR("Db::docDups: no db\n");
-    return false;
+        return false;
    }
    if (idoc.xdocid == 0) {
-    LOGERR("Db::docDups: null xdocid in input doc\n" );
+        LOGERR("Db::docDups: null xdocid in input doc\n");
-    return false;
+        return false;
    }
    // Get the xapian doc
    Xapian::Document xdoc;
    XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)), 
-       m_ndb->xrdb, m_reason);
+           m_ndb->xrdb, m_reason);
    if (!m_reason.empty()) {
-    LOGERR("Db::docDups: xapian error: "  << (m_reason) << "\n" );
+        LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
-    return false;
+        return false;
    }
    // Get the md5
    string digest;
    XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason);
    if (!m_reason.empty()) {
-    LOGERR("Db::docDups: xapian error: "  << (m_reason) << "\n" );
+        LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
-    return false;
+        return false;
    }
    if (digest.empty()) {
-    LOGDEB("Db::docDups: doc has no md5\n" );
+        LOGDEB("Db::docDups: doc has no md5\n");
-    return false;
+        return false;
    }
    string md5;
    MD5HexPrint(digest, md5);
@ -72,45 +72,27 @@ bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
    SearchData *sdp = new SearchData();
    std::shared_ptr<SearchData> sd(sdp);
    SearchDataClauseSimple *sdc = 
-    new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5");
+        new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5");
    sdc->addModifier(SearchDataClause::SDCM_CASESENS);
    sdc->addModifier(SearchDataClause::SDCM_DIACSENS);
    sd->addClause(sdc);
    Query query(this);
    query.setCollapseDuplicates(0);
    if (!query.setQuery(sd)) {
-    LOGERR("Db::docDups: setQuery failed\n" );
+        LOGERR("Db::docDups: setQuery failed\n");
-    return false;
+        return false;
    }
    int cnt = query.getResCnt();
    for (int i = 0; i < cnt; i++) {
-    Doc doc;
+        Doc doc;
-    if (!query.getDoc(i, doc)) {
+        if (!query.getDoc(i, doc)) {
-        LOGERR("Db::docDups: getDoc failed at "  << (i) << " (cnt "  << (cnt) << ")\n" );
+            LOGERR("Db::docDups: getDoc failed at " << i << " (cnt " << cnt <<
-        return false;
+                   ")\n");
-    }
+            return false;
-    odocs.push_back(doc);
+        }
        odocs.push_back(doc);
    }
    return true;
 }
 #if 0
    {
    vector<Doc> dups;
    bool ret;
    LOGDEB("DOCDUPS\n" );
    ret = m_db->docDups(doc, dups);
    if (!ret) {
        LOGDEB("docDups failed\n" );
    } else if (dups.size() == 1) {
        LOGDEB("No dups\n" );
    } else {
        for (unsigned int i = 0; i < dups.size(); i++) {
        LOGDEB("Dup: "  << (dups[i].url) << "\n" );
        }
    }
    }
 #endif
 }