doc and comments

This commit is contained in:
Jean-Francois Dockes 2020-06-25 16:06:45 +02:00
parent 101a566dec
commit 02556e7d08
4 changed files with 73 additions and 71 deletions

View File

@ -3,7 +3,7 @@
<html> <html>
<head> <head>
<meta name="generator" content= <meta name="generator" content=
"HTML Tidy for HTML5 for Linux version 5.6.0"> "HTML Tidy for HTML5 for Linux version 5.2.0">
<meta http-equiv="Content-Type" content= <meta http-equiv="Content-Type" content=
"text/html; charset=utf-8"> "text/html; charset=utf-8">
<title>Recoll user manual</title> <title>Recoll user manual</title>
@ -1135,8 +1135,8 @@ alink="#0000FF">
different areas of the file system to different different areas of the file system to different
indexes. For example, if you were to issue the indexes. For example, if you were to issue the
following command:</p> following command:</p>
<pre class= <pre class="programlisting">
"programlisting">recoll -c ~/.indexes-email</pre> recoll -c ~/.indexes-email</pre>
<p>Then <span class="application">Recoll</span> would <p>Then <span class="application">Recoll</span> would
use configuration files stored in <code class= use configuration files stored in <code class=
"filename">~/.indexes-email/</code> and, (unless "filename">~/.indexes-email/</code> and, (unless
@ -3874,8 +3874,8 @@ fs.inotify.max_user_watches=32768
that every user does not have to do it. The variable that every user does not have to do it. The variable
should define a colon-separated list of index should define a colon-separated list of index
directories, ie:</p> directories, ie:</p>
<pre class= <pre class="screen">
"screen">export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre> export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre>
<p>Another environment variable, <code class= <p>Another environment variable, <code class=
"envar">RECOLL_ACTIVE_EXTRA_DBS</code> allows adding to "envar">RECOLL_ACTIVE_EXTRA_DBS</code> allows adding to
the active list of indexes. This variable was suggested the active list of indexes. This variable was suggested
@ -4677,8 +4677,8 @@ fs.inotify.max_user_watches=32768
parent folder expansion, usually creating a file parent folder expansion, usually creating a file
manager window on the folder where the container file manager window on the folder where the container file
resides. E.g.:</p> resides. E.g.:</p>
<pre class= <pre class="programlisting">
"programlisting">&lt;a href="F%N"&gt;%P&lt;/a&gt;</pre> &lt;a href="F%N"&gt;%P&lt;/a&gt;</pre>
<p>A link target defined as <code class= <p>A link target defined as <code class=
"literal">R%N|<em class= "literal">R%N|<em class=
"replaceable"><code>scriptname</code></em></code> "replaceable"><code>scriptname</code></em></code>
@ -4820,8 +4820,8 @@ fs.inotify.max_user_watches=32768
<span class="application">javascript</span> program to <span class="application">javascript</span> program to
the documents, like the following example, which would the documents, like the following example, which would
initiate a search by double-clicking any term:</p> initiate a search by double-clicking any term:</p>
<pre class= <pre class="programlisting">
"programlisting">&lt;script language="JavaScript"&gt; &lt;script language="JavaScript"&gt;
function recollsearch() { function recollsearch() {
var t = document.getSelection(); var t = document.getSelection();
window.location.href = 'recoll://search/query?qtp=a&amp;p=0&amp;q=' + window.location.href = 'recoll://search/query?qtp=a&amp;p=0&amp;q=' +
@ -5115,7 +5115,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
<li class="listitem"> <li class="listitem">
<p><code class="literal">ext</code> specifies the <p><code class="literal">ext</code> specifies the
file name extension (Ex: <code class= file name extension (Ex: <code class=
"literal">ext:html</code>)</p> "literal">ext:html</code>).</p>
</li>
<li class="listitem">
<p><code class="literal">rclmd5</code> the MD5
checksum for the document. This is used for
displaying the duplicates of a search result (when
querying with the option to collapse duplicate
results). Incidentally, this could be used to find
the duplicates of any given file by computing its MD5
checksum and executing a query with just the
<code class="literal">rclmd5</code> value.</p>
</li> </li>
</ul> </ul>
</div> </div>
@ -10055,8 +10065,8 @@ for i in range(nres):
"filename">.xml</code> extension but should be handled "filename">.xml</code> extension but should be handled
specially, which is possible because they are usually all specially, which is possible because they are usually all
located in one place. Example:</p> located in one place. Example:</p>
<pre class= <pre class="programlisting">
"programlisting">[~/.kde/share/apps/okular/docdata] [~/.kde/share/apps/okular/docdata]
.xml = application/x-okular-notes</pre> .xml = application/x-okular-notes</pre>
<p>The <code class="varname">recoll_noindex</code> <p>The <code class="varname">recoll_noindex</code>
<code class="filename">mimemap</code> variable has been <code class="filename">mimemap</code> variable has been

View File

@ -3896,27 +3896,36 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
name for an email attachment.</para></listitem> name for an email attachment.</para></listitem>
<listitem><para><literal>containerfilename</literal>. This is <listitem><para><literal>containerfilename</literal>. This is
set for all documents, both top-level and contained set for all documents, both top-level and contained
sub-documents, and is always the name of the filesystem directory sub-documents, and is always the name of the filesystem directory
entry which contains the data. The terms from this field can entry which contains the data. The terms from this field can
only be matched by an explicit field specification (as opposed only be matched by an explicit field specification (as opposed
to terms from <literal>filename</literal> which are also indexed to terms from <literal>filename</literal> which are also indexed
as general document content). This avoids getting matches for as general document content). This avoids getting matches for
all the sub-documents when searching for the container file all the sub-documents when searching for the container file
name.</para></listitem> name.</para></listitem>
<listitem><para><literal>ext</literal> specifies the file <listitem><para><literal>ext</literal> specifies the file
name extension (Ex: <literal>ext:html</literal>)</para> name extension
(Ex: <literal>ext:html</literal>).</para></listitem>
<listitem><para><literal>rclmd5</literal> the MD5 checksum for the
document. This is used for displaying the duplicates of a
search result (when querying with the option to collapse
duplicate results). Incidentally, this could be used to find
the duplicates of any given file by computing its MD5 checksum
and executing a query with just the <literal>rclmd5</literal>
value.</para>
</listitem> </listitem>
</itemizedlist> </itemizedlist>
<para>&RCL; 1.20 and later have a way to specify aliases for the <para>&RCL; 1.20 and later have a way to specify aliases for the
field names, which will save typing, for example by aliasing field names, which will save typing, for example by aliasing
<literal>filename</literal> to <replaceable>fn</replaceable> or <literal>filename</literal> to <replaceable>fn</replaceable> or
<literal>containerfilename</literal> to <literal>containerfilename</literal> to
<replaceable>cfn</replaceable>. See the <replaceable>cfn</replaceable>. See the
<link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>. <link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>.
</para> </para>
<para>The document input handlers used while indexing have the <para>The document input handlers used while indexing have the

View File

@ -1866,10 +1866,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str()); RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str());
} }
// If the file's md5 was computed, add value and term. // If the file's md5 was computed, add value and term. The
// The value is optionally used for query result duplicate elimination, // value is optionally used for query result duplicate
// and the term to find the duplicates. // elimination, and the term to find the duplicates (XM is the
// We don't do this for empty docs. // prefix for rclmd5 in fields) We don't do this for empty
// docs.
const string *md5; const string *md5;
if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() && if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() &&
md5->compare(cstr_md5empty)) { md5->compare(cstr_md5empty)) {

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2013 J.F.Dockes /* Copyright (C) 2013-2020 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -35,36 +35,36 @@ using namespace std;
namespace Rcl { namespace Rcl {
/** Retrieve the dups of a given document. The input has to be a query result /** Retrieve the dups of a given document. The input has to be a query result
* because we use the xdocid. We get the md5 from this, then the dups */ * because we use the xdocid. We get the md5 from this, then the dups */
bool Db::docDups(const Doc& idoc, vector<Doc>& odocs) bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
{ {
if (m_ndb == 0) { if (m_ndb == 0) {
LOGERR("Db::docDups: no db\n" ); LOGERR("Db::docDups: no db\n");
return false; return false;
} }
if (idoc.xdocid == 0) { if (idoc.xdocid == 0) {
LOGERR("Db::docDups: null xdocid in input doc\n" ); LOGERR("Db::docDups: null xdocid in input doc\n");
return false; return false;
} }
// Get the xapian doc // Get the xapian doc
Xapian::Document xdoc; Xapian::Document xdoc;
XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)), XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)),
m_ndb->xrdb, m_reason); m_ndb->xrdb, m_reason);
if (!m_reason.empty()) { if (!m_reason.empty()) {
LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" ); LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
return false; return false;
} }
// Get the md5 // Get the md5
string digest; string digest;
XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason); XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason);
if (!m_reason.empty()) { if (!m_reason.empty()) {
LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" ); LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
return false; return false;
} }
if (digest.empty()) { if (digest.empty()) {
LOGDEB("Db::docDups: doc has no md5\n" ); LOGDEB("Db::docDups: doc has no md5\n");
return false; return false;
} }
string md5; string md5;
MD5HexPrint(digest, md5); MD5HexPrint(digest, md5);
@ -72,45 +72,27 @@ bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
SearchData *sdp = new SearchData(); SearchData *sdp = new SearchData();
std::shared_ptr<SearchData> sd(sdp); std::shared_ptr<SearchData> sd(sdp);
SearchDataClauseSimple *sdc = SearchDataClauseSimple *sdc =
new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5"); new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5");
sdc->addModifier(SearchDataClause::SDCM_CASESENS); sdc->addModifier(SearchDataClause::SDCM_CASESENS);
sdc->addModifier(SearchDataClause::SDCM_DIACSENS); sdc->addModifier(SearchDataClause::SDCM_DIACSENS);
sd->addClause(sdc); sd->addClause(sdc);
Query query(this); Query query(this);
query.setCollapseDuplicates(0); query.setCollapseDuplicates(0);
if (!query.setQuery(sd)) { if (!query.setQuery(sd)) {
LOGERR("Db::docDups: setQuery failed\n" ); LOGERR("Db::docDups: setQuery failed\n");
return false; return false;
} }
int cnt = query.getResCnt(); int cnt = query.getResCnt();
for (int i = 0; i < cnt; i++) { for (int i = 0; i < cnt; i++) {
Doc doc; Doc doc;
if (!query.getDoc(i, doc)) { if (!query.getDoc(i, doc)) {
LOGERR("Db::docDups: getDoc failed at " << (i) << " (cnt " << (cnt) << ")\n" ); LOGERR("Db::docDups: getDoc failed at " << i << " (cnt " << cnt <<
return false; ")\n");
} return false;
odocs.push_back(doc); }
odocs.push_back(doc);
} }
return true; return true;
} }
#if 0
{
vector<Doc> dups;
bool ret;
LOGDEB("DOCDUPS\n" );
ret = m_db->docDups(doc, dups);
if (!ret) {
LOGDEB("docDups failed\n" );
} else if (dups.size() == 1) {
LOGDEB("No dups\n" );
} else {
for (unsigned int i = 0; i < dups.size(); i++) {
LOGDEB("Dup: " << (dups[i].url) << "\n" );
}
}
}
#endif
} }