doc and comments
This commit is contained in:
parent
101a566dec
commit
02556e7d08
@ -3,7 +3,7 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta name="generator" content=
|
||||
"HTML Tidy for HTML5 for Linux version 5.6.0">
|
||||
"HTML Tidy for HTML5 for Linux version 5.2.0">
|
||||
<meta http-equiv="Content-Type" content=
|
||||
"text/html; charset=utf-8">
|
||||
<title>Recoll user manual</title>
|
||||
@ -1135,8 +1135,8 @@ alink="#0000FF">
|
||||
different areas of the file system to different
|
||||
indexes. For example, if you were to issue the
|
||||
following command:</p>
|
||||
<pre class=
|
||||
"programlisting">recoll -c ~/.indexes-email</pre>
|
||||
<pre class="programlisting">
|
||||
recoll -c ~/.indexes-email</pre>
|
||||
<p>Then <span class="application">Recoll</span> would
|
||||
use configuration files stored in <code class=
|
||||
"filename">~/.indexes-email/</code> and, (unless
|
||||
@ -3874,8 +3874,8 @@ fs.inotify.max_user_watches=32768
|
||||
that every user does not have to do it. The variable
|
||||
should define a colon-separated list of index
|
||||
directories, ie:</p>
|
||||
<pre class=
|
||||
"screen">export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre>
|
||||
<pre class="screen">
|
||||
export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre>
|
||||
<p>Another environment variable, <code class=
|
||||
"envar">RECOLL_ACTIVE_EXTRA_DBS</code> allows adding to
|
||||
the active list of indexes. This variable was suggested
|
||||
@ -4677,8 +4677,8 @@ fs.inotify.max_user_watches=32768
|
||||
parent folder expansion, usually creating a file
|
||||
manager window on the folder where the container file
|
||||
resides. E.g.:</p>
|
||||
<pre class=
|
||||
"programlisting"><a href="F%N">%P</a></pre>
|
||||
<pre class="programlisting">
|
||||
<a href="F%N">%P</a></pre>
|
||||
<p>A link target defined as <code class=
|
||||
"literal">R%N|<em class=
|
||||
"replaceable"><code>scriptname</code></em></code>
|
||||
@ -4820,8 +4820,8 @@ fs.inotify.max_user_watches=32768
|
||||
<span class="application">javascript</span> program to
|
||||
the documents, like the following example, which would
|
||||
initiate a search by double-clicking any term:</p>
|
||||
<pre class=
|
||||
"programlisting"><script language="JavaScript">
|
||||
<pre class="programlisting">
|
||||
<script language="JavaScript">
|
||||
function recollsearch() {
|
||||
var t = document.getSelection();
|
||||
window.location.href = 'recoll://search/query?qtp=a&p=0&q=' +
|
||||
@ -5115,7 +5115,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
|
||||
<li class="listitem">
|
||||
<p><code class="literal">ext</code> specifies the
|
||||
file name extension (Ex: <code class=
|
||||
"literal">ext:html</code>)</p>
|
||||
"literal">ext:html</code>).</p>
|
||||
</li>
|
||||
<li class="listitem">
|
||||
<p><code class="literal">rclmd5</code> the MD5
|
||||
checksum for the document. This is used for
|
||||
displaying the duplicates of a search result (when
|
||||
querying with the option to collapse duplicate
|
||||
results). Incidentally, this could be used to find
|
||||
the duplicates of any given file by computing its MD5
|
||||
checksum and executing a query with just the
|
||||
<code class="literal">rclmd5</code> value.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
@ -10055,8 +10065,8 @@ for i in range(nres):
|
||||
"filename">.xml</code> extension but should be handled
|
||||
specially, which is possible because they are usually all
|
||||
located in one place. Example:</p>
|
||||
<pre class=
|
||||
"programlisting">[~/.kde/share/apps/okular/docdata]
|
||||
<pre class="programlisting">
|
||||
[~/.kde/share/apps/okular/docdata]
|
||||
.xml = application/x-okular-notes</pre>
|
||||
<p>The <code class="varname">recoll_noindex</code>
|
||||
<code class="filename">mimemap</code> variable has been
|
||||
|
||||
@ -3896,27 +3896,36 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
|
||||
name for an email attachment.</para></listitem>
|
||||
|
||||
<listitem><para><literal>containerfilename</literal>. This is
|
||||
set for all documents, both top-level and contained
|
||||
sub-documents, and is always the name of the filesystem directory
|
||||
entry which contains the data. The terms from this field can
|
||||
only be matched by an explicit field specification (as opposed
|
||||
to terms from <literal>filename</literal> which are also indexed
|
||||
as general document content). This avoids getting matches for
|
||||
all the sub-documents when searching for the container file
|
||||
name.</para></listitem>
|
||||
set for all documents, both top-level and contained
|
||||
sub-documents, and is always the name of the filesystem directory
|
||||
entry which contains the data. The terms from this field can
|
||||
only be matched by an explicit field specification (as opposed
|
||||
to terms from <literal>filename</literal> which are also indexed
|
||||
as general document content). This avoids getting matches for
|
||||
all the sub-documents when searching for the container file
|
||||
name.</para></listitem>
|
||||
|
||||
<listitem><para><literal>ext</literal> specifies the file
|
||||
name extension (Ex: <literal>ext:html</literal>)</para>
|
||||
name extension
|
||||
(Ex: <literal>ext:html</literal>).</para></listitem>
|
||||
|
||||
<listitem><para><literal>rclmd5</literal> the MD5 checksum for the
|
||||
document. This is used for displaying the duplicates of a
|
||||
search result (when querying with the option to collapse
|
||||
duplicate results). Incidentally, this could be used to find
|
||||
the duplicates of any given file by computing its MD5 checksum
|
||||
and executing a query with just the <literal>rclmd5</literal>
|
||||
value.</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<para>&RCL; 1.20 and later have a way to specify aliases for the
|
||||
field names, which will save typing, for example by aliasing
|
||||
<literal>filename</literal> to <replaceable>fn</replaceable> or
|
||||
<literal>containerfilename</literal> to
|
||||
<replaceable>cfn</replaceable>. See the
|
||||
<link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>.
|
||||
field names, which will save typing, for example by aliasing
|
||||
<literal>filename</literal> to <replaceable>fn</replaceable> or
|
||||
<literal>containerfilename</literal> to
|
||||
<replaceable>cfn</replaceable>. See the
|
||||
<link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>.
|
||||
</para>
|
||||
|
||||
<para>The document input handlers used while indexing have the
|
||||
|
||||
@ -1866,10 +1866,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
||||
RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str());
|
||||
}
|
||||
|
||||
// If the file's md5 was computed, add value and term.
|
||||
// The value is optionally used for query result duplicate elimination,
|
||||
// and the term to find the duplicates.
|
||||
// We don't do this for empty docs.
|
||||
// If the file's md5 was computed, add value and term. The
|
||||
// value is optionally used for query result duplicate
|
||||
// elimination, and the term to find the duplicates (XM is the
|
||||
// prefix for rclmd5 in fields) We don't do this for empty
|
||||
// docs.
|
||||
const string *md5;
|
||||
if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() &&
|
||||
md5->compare(cstr_md5empty)) {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2013 J.F.Dockes
|
||||
/* Copyright (C) 2013-2020 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -35,36 +35,36 @@ using namespace std;
|
||||
namespace Rcl {
|
||||
|
||||
/** Retrieve the dups of a given document. The input has to be a query result
|
||||
* because we use the xdocid. We get the md5 from this, then the dups */
|
||||
* because we use the xdocid. We get the md5 from this, then the dups */
|
||||
bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
|
||||
{
|
||||
if (m_ndb == 0) {
|
||||
LOGERR("Db::docDups: no db\n" );
|
||||
return false;
|
||||
LOGERR("Db::docDups: no db\n");
|
||||
return false;
|
||||
}
|
||||
if (idoc.xdocid == 0) {
|
||||
LOGERR("Db::docDups: null xdocid in input doc\n" );
|
||||
return false;
|
||||
LOGERR("Db::docDups: null xdocid in input doc\n");
|
||||
return false;
|
||||
}
|
||||
// Get the xapian doc
|
||||
Xapian::Document xdoc;
|
||||
XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)),
|
||||
m_ndb->xrdb, m_reason);
|
||||
m_ndb->xrdb, m_reason);
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" );
|
||||
return false;
|
||||
LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the md5
|
||||
string digest;
|
||||
XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason);
|
||||
if (!m_reason.empty()) {
|
||||
LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" );
|
||||
return false;
|
||||
LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
|
||||
return false;
|
||||
}
|
||||
if (digest.empty()) {
|
||||
LOGDEB("Db::docDups: doc has no md5\n" );
|
||||
return false;
|
||||
LOGDEB("Db::docDups: doc has no md5\n");
|
||||
return false;
|
||||
}
|
||||
string md5;
|
||||
MD5HexPrint(digest, md5);
|
||||
@ -72,45 +72,27 @@ bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
|
||||
SearchData *sdp = new SearchData();
|
||||
std::shared_ptr<SearchData> sd(sdp);
|
||||
SearchDataClauseSimple *sdc =
|
||||
new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5");
|
||||
new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5");
|
||||
sdc->addModifier(SearchDataClause::SDCM_CASESENS);
|
||||
sdc->addModifier(SearchDataClause::SDCM_DIACSENS);
|
||||
sd->addClause(sdc);
|
||||
Query query(this);
|
||||
query.setCollapseDuplicates(0);
|
||||
if (!query.setQuery(sd)) {
|
||||
LOGERR("Db::docDups: setQuery failed\n" );
|
||||
return false;
|
||||
LOGERR("Db::docDups: setQuery failed\n");
|
||||
return false;
|
||||
}
|
||||
int cnt = query.getResCnt();
|
||||
for (int i = 0; i < cnt; i++) {
|
||||
Doc doc;
|
||||
if (!query.getDoc(i, doc)) {
|
||||
LOGERR("Db::docDups: getDoc failed at " << (i) << " (cnt " << (cnt) << ")\n" );
|
||||
return false;
|
||||
}
|
||||
odocs.push_back(doc);
|
||||
Doc doc;
|
||||
if (!query.getDoc(i, doc)) {
|
||||
LOGERR("Db::docDups: getDoc failed at " << i << " (cnt " << cnt <<
|
||||
")\n");
|
||||
return false;
|
||||
}
|
||||
odocs.push_back(doc);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#if 0
|
||||
{
|
||||
vector<Doc> dups;
|
||||
bool ret;
|
||||
LOGDEB("DOCDUPS\n" );
|
||||
ret = m_db->docDups(doc, dups);
|
||||
if (!ret) {
|
||||
LOGDEB("docDups failed\n" );
|
||||
} else if (dups.size() == 1) {
|
||||
LOGDEB("No dups\n" );
|
||||
} else {
|
||||
for (unsigned int i = 0; i < dups.size(); i++) {
|
||||
LOGDEB("Dup: " << (dups[i].url) << "\n" );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user