doc and comments
This commit is contained in:
parent
101a566dec
commit
02556e7d08
@ -3,7 +3,7 @@
|
|||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<meta name="generator" content=
|
<meta name="generator" content=
|
||||||
"HTML Tidy for HTML5 for Linux version 5.6.0">
|
"HTML Tidy for HTML5 for Linux version 5.2.0">
|
||||||
<meta http-equiv="Content-Type" content=
|
<meta http-equiv="Content-Type" content=
|
||||||
"text/html; charset=utf-8">
|
"text/html; charset=utf-8">
|
||||||
<title>Recoll user manual</title>
|
<title>Recoll user manual</title>
|
||||||
@ -1135,8 +1135,8 @@ alink="#0000FF">
|
|||||||
different areas of the file system to different
|
different areas of the file system to different
|
||||||
indexes. For example, if you were to issue the
|
indexes. For example, if you were to issue the
|
||||||
following command:</p>
|
following command:</p>
|
||||||
<pre class=
|
<pre class="programlisting">
|
||||||
"programlisting">recoll -c ~/.indexes-email</pre>
|
recoll -c ~/.indexes-email</pre>
|
||||||
<p>Then <span class="application">Recoll</span> would
|
<p>Then <span class="application">Recoll</span> would
|
||||||
use configuration files stored in <code class=
|
use configuration files stored in <code class=
|
||||||
"filename">~/.indexes-email/</code> and, (unless
|
"filename">~/.indexes-email/</code> and, (unless
|
||||||
@ -3874,8 +3874,8 @@ fs.inotify.max_user_watches=32768
|
|||||||
that every user does not have to do it. The variable
|
that every user does not have to do it. The variable
|
||||||
should define a colon-separated list of index
|
should define a colon-separated list of index
|
||||||
directories, ie:</p>
|
directories, ie:</p>
|
||||||
<pre class=
|
<pre class="screen">
|
||||||
"screen">export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre>
|
export RECOLL_EXTRA_DBS=/some/place/xapiandb:/some/other/db</pre>
|
||||||
<p>Another environment variable, <code class=
|
<p>Another environment variable, <code class=
|
||||||
"envar">RECOLL_ACTIVE_EXTRA_DBS</code> allows adding to
|
"envar">RECOLL_ACTIVE_EXTRA_DBS</code> allows adding to
|
||||||
the active list of indexes. This variable was suggested
|
the active list of indexes. This variable was suggested
|
||||||
@ -4677,8 +4677,8 @@ fs.inotify.max_user_watches=32768
|
|||||||
parent folder expansion, usually creating a file
|
parent folder expansion, usually creating a file
|
||||||
manager window on the folder where the container file
|
manager window on the folder where the container file
|
||||||
resides. E.g.:</p>
|
resides. E.g.:</p>
|
||||||
<pre class=
|
<pre class="programlisting">
|
||||||
"programlisting"><a href="F%N">%P</a></pre>
|
<a href="F%N">%P</a></pre>
|
||||||
<p>A link target defined as <code class=
|
<p>A link target defined as <code class=
|
||||||
"literal">R%N|<em class=
|
"literal">R%N|<em class=
|
||||||
"replaceable"><code>scriptname</code></em></code>
|
"replaceable"><code>scriptname</code></em></code>
|
||||||
@ -4820,8 +4820,8 @@ fs.inotify.max_user_watches=32768
|
|||||||
<span class="application">javascript</span> program to
|
<span class="application">javascript</span> program to
|
||||||
the documents, like the following example, which would
|
the documents, like the following example, which would
|
||||||
initiate a search by double-clicking any term:</p>
|
initiate a search by double-clicking any term:</p>
|
||||||
<pre class=
|
<pre class="programlisting">
|
||||||
"programlisting"><script language="JavaScript">
|
<script language="JavaScript">
|
||||||
function recollsearch() {
|
function recollsearch() {
|
||||||
var t = document.getSelection();
|
var t = document.getSelection();
|
||||||
window.location.href = 'recoll://search/query?qtp=a&p=0&q=' +
|
window.location.href = 'recoll://search/query?qtp=a&p=0&q=' +
|
||||||
@ -5115,7 +5115,17 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
|
|||||||
<li class="listitem">
|
<li class="listitem">
|
||||||
<p><code class="literal">ext</code> specifies the
|
<p><code class="literal">ext</code> specifies the
|
||||||
file name extension (Ex: <code class=
|
file name extension (Ex: <code class=
|
||||||
"literal">ext:html</code>)</p>
|
"literal">ext:html</code>).</p>
|
||||||
|
</li>
|
||||||
|
<li class="listitem">
|
||||||
|
<p><code class="literal">rclmd5</code> the MD5
|
||||||
|
checksum for the document. This is used for
|
||||||
|
displaying the duplicates of a search result (when
|
||||||
|
querying with the option to collapse duplicate
|
||||||
|
results). Incidentally, this could be used to find
|
||||||
|
the duplicates of any given file by computing its MD5
|
||||||
|
checksum and executing a query with just the
|
||||||
|
<code class="literal">rclmd5</code> value.</p>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
@ -10055,8 +10065,8 @@ for i in range(nres):
|
|||||||
"filename">.xml</code> extension but should be handled
|
"filename">.xml</code> extension but should be handled
|
||||||
specially, which is possible because they are usually all
|
specially, which is possible because they are usually all
|
||||||
located in one place. Example:</p>
|
located in one place. Example:</p>
|
||||||
<pre class=
|
<pre class="programlisting">
|
||||||
"programlisting">[~/.kde/share/apps/okular/docdata]
|
[~/.kde/share/apps/okular/docdata]
|
||||||
.xml = application/x-okular-notes</pre>
|
.xml = application/x-okular-notes</pre>
|
||||||
<p>The <code class="varname">recoll_noindex</code>
|
<p>The <code class="varname">recoll_noindex</code>
|
||||||
<code class="filename">mimemap</code> variable has been
|
<code class="filename">mimemap</code> variable has been
|
||||||
|
|||||||
@ -3896,27 +3896,36 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
|
|||||||
name for an email attachment.</para></listitem>
|
name for an email attachment.</para></listitem>
|
||||||
|
|
||||||
<listitem><para><literal>containerfilename</literal>. This is
|
<listitem><para><literal>containerfilename</literal>. This is
|
||||||
set for all documents, both top-level and contained
|
set for all documents, both top-level and contained
|
||||||
sub-documents, and is always the name of the filesystem directory
|
sub-documents, and is always the name of the filesystem directory
|
||||||
entry which contains the data. The terms from this field can
|
entry which contains the data. The terms from this field can
|
||||||
only be matched by an explicit field specification (as opposed
|
only be matched by an explicit field specification (as opposed
|
||||||
to terms from <literal>filename</literal> which are also indexed
|
to terms from <literal>filename</literal> which are also indexed
|
||||||
as general document content). This avoids getting matches for
|
as general document content). This avoids getting matches for
|
||||||
all the sub-documents when searching for the container file
|
all the sub-documents when searching for the container file
|
||||||
name.</para></listitem>
|
name.</para></listitem>
|
||||||
|
|
||||||
<listitem><para><literal>ext</literal> specifies the file
|
<listitem><para><literal>ext</literal> specifies the file
|
||||||
name extension (Ex: <literal>ext:html</literal>)</para>
|
name extension
|
||||||
|
(Ex: <literal>ext:html</literal>).</para></listitem>
|
||||||
|
|
||||||
|
<listitem><para><literal>rclmd5</literal> the MD5 checksum for the
|
||||||
|
document. This is used for displaying the duplicates of a
|
||||||
|
search result (when querying with the option to collapse
|
||||||
|
duplicate results). Incidentally, this could be used to find
|
||||||
|
the duplicates of any given file by computing its MD5 checksum
|
||||||
|
and executing a query with just the <literal>rclmd5</literal>
|
||||||
|
value.</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
|
|
||||||
<para>&RCL; 1.20 and later have a way to specify aliases for the
|
<para>&RCL; 1.20 and later have a way to specify aliases for the
|
||||||
field names, which will save typing, for example by aliasing
|
field names, which will save typing, for example by aliasing
|
||||||
<literal>filename</literal> to <replaceable>fn</replaceable> or
|
<literal>filename</literal> to <replaceable>fn</replaceable> or
|
||||||
<literal>containerfilename</literal> to
|
<literal>containerfilename</literal> to
|
||||||
<replaceable>cfn</replaceable>. See the
|
<replaceable>cfn</replaceable>. See the
|
||||||
<link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>.
|
<link linkend="RCL.INSTALL.CONFIG.FIELDS">section about the <filename>fields</filename> file</link>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>The document input handlers used while indexing have the
|
<para>The document input handlers used while indexing have the
|
||||||
|
|||||||
@ -1866,10 +1866,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
|
|||||||
RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str());
|
RECORD_APPEND(record, string(cstr_mbreaks), multibreaks.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the file's md5 was computed, add value and term.
|
// If the file's md5 was computed, add value and term. The
|
||||||
// The value is optionally used for query result duplicate elimination,
|
// value is optionally used for query result duplicate
|
||||||
// and the term to find the duplicates.
|
// elimination, and the term to find the duplicates (XM is the
|
||||||
// We don't do this for empty docs.
|
// prefix for rclmd5 in fields) We don't do this for empty
|
||||||
|
// docs.
|
||||||
const string *md5;
|
const string *md5;
|
||||||
if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() &&
|
if (doc.peekmeta(Doc::keymd5, &md5) && !md5->empty() &&
|
||||||
md5->compare(cstr_md5empty)) {
|
md5->compare(cstr_md5empty)) {
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2013 J.F.Dockes
|
/* Copyright (C) 2013-2020 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -35,36 +35,36 @@ using namespace std;
|
|||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
|
||||||
/** Retrieve the dups of a given document. The input has to be a query result
|
/** Retrieve the dups of a given document. The input has to be a query result
|
||||||
* because we use the xdocid. We get the md5 from this, then the dups */
|
* because we use the xdocid. We get the md5 from this, then the dups */
|
||||||
bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
|
bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
|
||||||
{
|
{
|
||||||
if (m_ndb == 0) {
|
if (m_ndb == 0) {
|
||||||
LOGERR("Db::docDups: no db\n" );
|
LOGERR("Db::docDups: no db\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (idoc.xdocid == 0) {
|
if (idoc.xdocid == 0) {
|
||||||
LOGERR("Db::docDups: null xdocid in input doc\n" );
|
LOGERR("Db::docDups: null xdocid in input doc\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Get the xapian doc
|
// Get the xapian doc
|
||||||
Xapian::Document xdoc;
|
Xapian::Document xdoc;
|
||||||
XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)),
|
XAPTRY(xdoc = m_ndb->xrdb.get_document(Xapian::docid(idoc.xdocid)),
|
||||||
m_ndb->xrdb, m_reason);
|
m_ndb->xrdb, m_reason);
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" );
|
LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the md5
|
// Get the md5
|
||||||
string digest;
|
string digest;
|
||||||
XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason);
|
XAPTRY(digest = xdoc.get_value(VALUE_MD5), m_ndb->xrdb, m_reason);
|
||||||
if (!m_reason.empty()) {
|
if (!m_reason.empty()) {
|
||||||
LOGERR("Db::docDups: xapian error: " << (m_reason) << "\n" );
|
LOGERR("Db::docDups: xapian error: " << m_reason << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (digest.empty()) {
|
if (digest.empty()) {
|
||||||
LOGDEB("Db::docDups: doc has no md5\n" );
|
LOGDEB("Db::docDups: doc has no md5\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
string md5;
|
string md5;
|
||||||
MD5HexPrint(digest, md5);
|
MD5HexPrint(digest, md5);
|
||||||
@ -72,45 +72,27 @@ bool Db::docDups(const Doc& idoc, vector<Doc>& odocs)
|
|||||||
SearchData *sdp = new SearchData();
|
SearchData *sdp = new SearchData();
|
||||||
std::shared_ptr<SearchData> sd(sdp);
|
std::shared_ptr<SearchData> sd(sdp);
|
||||||
SearchDataClauseSimple *sdc =
|
SearchDataClauseSimple *sdc =
|
||||||
new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5");
|
new SearchDataClauseSimple(SCLT_AND, md5, "rclmd5");
|
||||||
sdc->addModifier(SearchDataClause::SDCM_CASESENS);
|
sdc->addModifier(SearchDataClause::SDCM_CASESENS);
|
||||||
sdc->addModifier(SearchDataClause::SDCM_DIACSENS);
|
sdc->addModifier(SearchDataClause::SDCM_DIACSENS);
|
||||||
sd->addClause(sdc);
|
sd->addClause(sdc);
|
||||||
Query query(this);
|
Query query(this);
|
||||||
query.setCollapseDuplicates(0);
|
query.setCollapseDuplicates(0);
|
||||||
if (!query.setQuery(sd)) {
|
if (!query.setQuery(sd)) {
|
||||||
LOGERR("Db::docDups: setQuery failed\n" );
|
LOGERR("Db::docDups: setQuery failed\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int cnt = query.getResCnt();
|
int cnt = query.getResCnt();
|
||||||
for (int i = 0; i < cnt; i++) {
|
for (int i = 0; i < cnt; i++) {
|
||||||
Doc doc;
|
Doc doc;
|
||||||
if (!query.getDoc(i, doc)) {
|
if (!query.getDoc(i, doc)) {
|
||||||
LOGERR("Db::docDups: getDoc failed at " << (i) << " (cnt " << (cnt) << ")\n" );
|
LOGERR("Db::docDups: getDoc failed at " << i << " (cnt " << cnt <<
|
||||||
return false;
|
")\n");
|
||||||
}
|
return false;
|
||||||
odocs.push_back(doc);
|
}
|
||||||
|
odocs.push_back(doc);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
vector<Doc> dups;
|
|
||||||
bool ret;
|
|
||||||
LOGDEB("DOCDUPS\n" );
|
|
||||||
ret = m_db->docDups(doc, dups);
|
|
||||||
if (!ret) {
|
|
||||||
LOGDEB("docDups failed\n" );
|
|
||||||
} else if (dups.size() == 1) {
|
|
||||||
LOGDEB("No dups\n" );
|
|
||||||
} else {
|
|
||||||
for (unsigned int i = 0; i < dups.size(); i++) {
|
|
||||||
LOGDEB("Dup: " << (dups[i].url) << "\n" );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user