Movable datasets support

This commit is contained in:
Jean-Francois Dockes 2017-12-06 11:34:04 +01:00
parent 329ab7b90d
commit 09acb5687c
5 changed files with 4611 additions and 4270 deletions

View File

@ -1318,17 +1318,85 @@ string RclConfig::getPidfile() const
return path_cat(getCacheDir(), "index.pid"); return path_cat(getCacheDir(), "index.pid");
} }
/* Eliminate the common leaf part of file paths p1 and p2. Example:
* /mnt1/common/part /mnt2/common/part -> /mnt1 /mnt2. This is used
* for computing translations for paths when the dataset has been
* moved. Of course this could be done more efficiently than by splitting
* into vectors, but we don't care.*/
static string path_diffstems(const string& p1, const string& p2,
string& r1, string& r2)
{
string reason;
r1.clear();
r2.clear();
vector<string> v1, v2;
stringToTokens(p1, v1, "/");
stringToTokens(p2, v2, "/");
unsigned int l1 = v1.size();
unsigned int l2 = v2.size();
// Search for common leaf part
unsigned int cl = 0;
for (; cl < MIN(l1, l2); cl++) {
if (v1[l1-cl-1] != v2[l2-cl-1]) {
break;
}
}
//cerr << "Common length = " << cl << endl;
if (cl == 0) {
reason = "Input paths are empty or have no common part";
return reason;
}
for (unsigned i = 0; i < l1 - cl; i++) {
r1 += "/" + v1[i];
}
for (unsigned i = 0; i < l2 - cl; i++) {
r2 += "/" + v2[i];
}
return reason;
}
void RclConfig::urlrewrite(const string& dbdir, string& url) const void RclConfig::urlrewrite(const string& dbdir, string& url) const
{ {
LOGDEB2("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url << LOGDEB("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
"]\n"); "]\n");
// If orgidxconfdir is set, we assume that this index is for a
// movable dataset, with the configuration directory stored inside
// the dataset tree. This allows computing automatic path
// translations if the dataset has been moved.
string orig_confdir;
string cur_confdir;
string confstemorg, confstemrep;
if (m_conf->get("orgidxconfdir", orig_confdir, "")) {
if (!m_conf->get("curidxconfdir", cur_confdir, "")) {
cur_confdir = m_confdir;
}
LOGDEB("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
" cur_confdir " << cur_confdir << endl);
string reason = path_diffstems(orig_confdir, cur_confdir,
confstemorg, confstemrep);
if (!reason.empty()) {
LOGERR("urlrewrite: path_diffstems failed: " << reason <<
" : orig_confdir [" << orig_confdir <<
"] cur_confdir [" << cur_confdir << endl);
confstemorg = confstemrep = "";
}
}
// Do path translations exist for this index ? // Do path translations exist for this index ?
bool needptrans = true;
if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) { if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " << LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
m_ptrans << ")\n"); m_ptrans << ")\n");
needptrans = false;
}
if (!needptrans && confstemorg.empty()) {
return; return;
} }
bool computeurl = false;
string path = fileurltolocalpath(url); string path = fileurltolocalpath(url);
if (path.empty()) { if (path.empty()) {
@ -1336,22 +1404,34 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
return; return;
} }
// Do the movable volume thing.
if (!confstemorg.empty() && confstemorg.size() <= path.size() &&
!path.compare(0, confstemorg.size(), confstemorg)) {
path = path.replace(0, confstemorg.size(), confstemrep);
computeurl = true;
}
if (needptrans) {
// For each translation check if the prefix matches the input path, // For each translation check if the prefix matches the input path,
// replace and return the result if it does. // replace and return the result if it does.
vector<string> opaths = m_ptrans->getNames(dbdir); vector<string> opaths = m_ptrans->getNames(dbdir);
for (vector<string>::const_iterator it = opaths.begin(); for (const auto& opath: opaths) {
it != opaths.end(); it++) { if (opath.size() <= path.size() &&
if (it->size() <= path.size() && !path.compare(0, it->size(), *it)) { !path.compare(0, opath.size(), opath)) {
string npath; string npath;
// This call always succeeds because the key comes from getNames() // Key comes from getNames()=> call must succeed
if (m_ptrans->get(*it, npath, dbdir)) { if (m_ptrans->get(opath, npath, dbdir)) {
path = path.replace(0, it->size(), npath); path = path.replace(0, opath.size(), npath);
url = path_pathtofileurl(path); computeurl = true;
} }
break; break;
} }
} }
} }
if (computeurl) {
url = path_pathtofileurl(path);
}
}
bool RclConfig::sourceChanged() const bool RclConfig::sourceChanged() const
{ {

View File

@ -471,6 +471,25 @@ the log... values.</para></listitem></varlistentry>
<listitem><para>Override logfilename for the indexer in real time <listitem><para>Override logfilename for the indexer in real time
mode. The default is to use the idx... values if set, else mode. The default is to use the idx... values if set, else
the log... values.</para></listitem></varlistentry> the log... values.</para></listitem></varlistentry>
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">
<term><varname>orgidxconfdir</varname></term>
<listitem><para>Original location of the configuration directory. This is used exclusively for movable datasets. Locating the
configuration directory inside the directory tree makes it possible to
provide automatic query time path translations once the data set has
moved (for example, because it has been mounted on another
location).</para></listitem></varlistentry>
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
<term><varname>curidxconfdir</varname></term>
<listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
if the configuration directory has been copied from the dataset to
another location, either because the dataset is readonly and an r/w copy
is desired, or for performance reasons. This records the original moved
location before copy, to allow path translation computations. For
example if a dataset originally indexed as '/home/me/mydata/config' has
been mounted to '/media/me/mydata', and the GUI is running from a copied
configuration, orgidxconfdir would be '/home/me/mydata/config', and
curidxconfdir (as set in the copied configuration) would be
'/media/me/mydata/config'.</para></listitem></varlistentry>
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR"> <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
<term><varname>idxrundir</varname></term> <term><varname>idxrundir</varname></term>
<listitem><para>Indexing process current directory. The input <listitem><para>Indexing process current directory. The input

View File

@ -299,52 +299,54 @@ alink="#0000FF">
</dd> </dd>
</dl> </dl>
</dd> </dd>
<dt><span class="chapter">4. <a href= <dt><span class="chapter">4. <a href="#RCL.MOVABLE">Movable
datasets</a></span></dt>
<dt><span class="chapter">5. <a href=
"#RCL.PROGRAM">Programming interface</a></span></dt> "#RCL.PROGRAM">Programming interface</a></span></dt>
<dd> <dd>
<dl> <dl>
<dt><span class="sect1">4.1. <a href= <dt><span class="sect1">5.1. <a href=
"#RCL.PROGRAM.FILTERS">Writing a document input "#RCL.PROGRAM.FILTERS">Writing a document input
handler</a></span></dt> handler</a></span></dt>
<dd> <dd>
<dl> <dl>
<dt><span class="sect2">4.1.1. <a href= <dt><span class="sect2">5.1.1. <a href=
"#RCL.PROGRAM.FILTERS.SIMPLE">Simple input "#RCL.PROGRAM.FILTERS.SIMPLE">Simple input
handlers</a></span></dt> handlers</a></span></dt>
<dt><span class="sect2">4.1.2. <a href= <dt><span class="sect2">5.1.2. <a href=
"#RCL.PROGRAM.FILTERS.MULTIPLE">"Multiple" "#RCL.PROGRAM.FILTERS.MULTIPLE">"Multiple"
handlers</a></span></dt> handlers</a></span></dt>
<dt><span class="sect2">4.1.3. <a href= <dt><span class="sect2">5.1.3. <a href=
"#RCL.PROGRAM.FILTERS.ASSOCIATION">Telling "#RCL.PROGRAM.FILTERS.ASSOCIATION">Telling
<span class="application">Recoll</span> about the <span class="application">Recoll</span> about the
handler</a></span></dt> handler</a></span></dt>
<dt><span class="sect2">4.1.4. <a href= <dt><span class="sect2">5.1.4. <a href=
"#RCL.PROGRAM.FILTERS.HTML">Input handler "#RCL.PROGRAM.FILTERS.HTML">Input handler
output</a></span></dt> output</a></span></dt>
<dt><span class="sect2">4.1.5. <a href= <dt><span class="sect2">5.1.5. <a href=
"#RCL.PROGRAM.FILTERS.PAGES">Page "#RCL.PROGRAM.FILTERS.PAGES">Page
numbers</a></span></dt> numbers</a></span></dt>
</dl> </dl>
</dd> </dd>
<dt><span class="sect1">4.2. <a href= <dt><span class="sect1">5.2. <a href=
"#RCL.PROGRAM.FIELDS">Field data "#RCL.PROGRAM.FIELDS">Field data
processing</a></span></dt> processing</a></span></dt>
<dt><span class="sect1">4.3. <a href= <dt><span class="sect1">5.3. <a href=
"#RCL.PROGRAM.PYTHONAPI">Python API</a></span></dt> "#RCL.PROGRAM.PYTHONAPI">Python API</a></span></dt>
<dd> <dd>
<dl> <dl>
<dt><span class="sect2">4.3.1. <a href= <dt><span class="sect2">5.3.1. <a href=
"#RCL.PROGRAM.PYTHONAPI.INTRO">Introduction</a></span></dt> "#RCL.PROGRAM.PYTHONAPI.INTRO">Introduction</a></span></dt>
<dt><span class="sect2">4.3.2. <a href= <dt><span class="sect2">5.3.2. <a href=
"#RCL.PROGRAM.PYTHONAPI.ELEMENTS">Interface "#RCL.PROGRAM.PYTHONAPI.ELEMENTS">Interface
elements</a></span></dt> elements</a></span></dt>
<dt><span class="sect2">4.3.3. <a href= <dt><span class="sect2">5.3.3. <a href=
"#RCL.PROGRAM.PYTHONAPI.SEARCH">Python search "#RCL.PROGRAM.PYTHONAPI.SEARCH">Python search
interface</a></span></dt> interface</a></span></dt>
<dt><span class="sect2">4.3.4. <a href= <dt><span class="sect2">5.3.4. <a href=
"#RCL.PROGRAM.PYTHONAPI.UPDATE">Creating Python "#RCL.PROGRAM.PYTHONAPI.UPDATE">Creating Python
external indexers</a></span></dt> external indexers</a></span></dt>
<dt><span class="sect2">4.3.5. <a href= <dt><span class="sect2">5.3.5. <a href=
"#RCL.PROGRAM.PYTHONAPI.COMPAT">Package "#RCL.PROGRAM.PYTHONAPI.COMPAT">Package
compatibility with the previous compatibility with the previous
version</a></span></dt> version</a></span></dt>
@ -352,57 +354,57 @@ alink="#0000FF">
</dd> </dd>
</dl> </dl>
</dd> </dd>
<dt><span class="chapter">5. <a href= <dt><span class="chapter">6. <a href=
"#RCL.INSTALL">Installation and "#RCL.INSTALL">Installation and
configuration</a></span></dt> configuration</a></span></dt>
<dd> <dd>
<dl> <dl>
<dt><span class="sect1">5.1. <a href= <dt><span class="sect1">6.1. <a href=
"#RCL.INSTALL.BINARY">Installing a binary "#RCL.INSTALL.BINARY">Installing a binary
copy</a></span></dt> copy</a></span></dt>
<dt><span class="sect1">5.2. <a href= <dt><span class="sect1">6.2. <a href=
"#RCL.INSTALL.EXTERNAL">Supporting "#RCL.INSTALL.EXTERNAL">Supporting
packages</a></span></dt> packages</a></span></dt>
<dt><span class="sect1">5.3. <a href= <dt><span class="sect1">6.3. <a href=
"#RCL.INSTALL.BUILDING">Building from "#RCL.INSTALL.BUILDING">Building from
source</a></span></dt> source</a></span></dt>
<dd> <dd>
<dl> <dl>
<dt><span class="sect2">5.3.1. <a href= <dt><span class="sect2">6.3.1. <a href=
"#RCL.INSTALL.BUILDING.PREREQS">Prerequisites</a></span></dt> "#RCL.INSTALL.BUILDING.PREREQS">Prerequisites</a></span></dt>
<dt><span class="sect2">5.3.2. <a href= <dt><span class="sect2">6.3.2. <a href=
"#RCL.INSTALL.BUILDING.BUILD">Building</a></span></dt> "#RCL.INSTALL.BUILDING.BUILD">Building</a></span></dt>
<dt><span class="sect2">5.3.3. <a href= <dt><span class="sect2">6.3.3. <a href=
"#RCL.INSTALL.BUILDING.INSTALL">Installation</a></span></dt> "#RCL.INSTALL.BUILDING.INSTALL">Installation</a></span></dt>
</dl> </dl>
</dd> </dd>
<dt><span class="sect1">5.4. <a href= <dt><span class="sect1">6.4. <a href=
"#RCL.INSTALL.CONFIG">Configuration "#RCL.INSTALL.CONFIG">Configuration
overview</a></span></dt> overview</a></span></dt>
<dd> <dd>
<dl> <dl>
<dt><span class="sect2">5.4.1. <a href= <dt><span class="sect2">6.4.1. <a href=
"#RCL.INSTALL.CONFIG.ENVIR">Environment "#RCL.INSTALL.CONFIG.ENVIR">Environment
variables</a></span></dt> variables</a></span></dt>
<dt><span class="sect2">5.4.2. <a href= <dt><span class="sect2">6.4.2. <a href=
"#RCL.INSTALL.CONFIG.RECOLLCONF">Recoll main "#RCL.INSTALL.CONFIG.RECOLLCONF">Recoll main
configuration file, recoll.conf</a></span></dt> configuration file, recoll.conf</a></span></dt>
<dt><span class="sect2">5.4.3. <a href= <dt><span class="sect2">6.4.3. <a href=
"#RCL.INSTALL.CONFIG.FIELDS">The fields "#RCL.INSTALL.CONFIG.FIELDS">The fields
file</a></span></dt> file</a></span></dt>
<dt><span class="sect2">5.4.4. <a href= <dt><span class="sect2">6.4.4. <a href=
"#RCL.INSTALL.CONFIG.MIMEMAP">The mimemap "#RCL.INSTALL.CONFIG.MIMEMAP">The mimemap
file</a></span></dt> file</a></span></dt>
<dt><span class="sect2">5.4.5. <a href= <dt><span class="sect2">6.4.5. <a href=
"#RCL.INSTALL.CONFIG.MIMECONF">The mimeconf "#RCL.INSTALL.CONFIG.MIMECONF">The mimeconf
file</a></span></dt> file</a></span></dt>
<dt><span class="sect2">5.4.6. <a href= <dt><span class="sect2">6.4.6. <a href=
"#RCL.INSTALL.CONFIG.MIMEVIEW">The mimeview "#RCL.INSTALL.CONFIG.MIMEVIEW">The mimeview
file</a></span></dt> file</a></span></dt>
<dt><span class="sect2">5.4.7. <a href= <dt><span class="sect2">6.4.7. <a href=
"#RCL.INSTALL.CONFIG.PTRANS">The <code class= "#RCL.INSTALL.CONFIG.PTRANS">The <code class=
"filename">ptrans</code> file</a></span></dt> "filename">ptrans</code> file</a></span></dt>
<dt><span class="sect2">5.4.8. <a href= <dt><span class="sect2">6.4.8. <a href=
"#RCL.INSTALL.CONFIG.EXAMPLES">Examples of "#RCL.INSTALL.CONFIG.EXAMPLES">Examples of
configuration adjustments</a></span></dt> configuration adjustments</a></span></dt>
</dl> </dl>
@ -452,7 +454,7 @@ alink="#0000FF">
<p>If you do not like reading manuals (who does?) but wish <p>If you do not like reading manuals (who does?) but wish
to give <span class="application">Recoll</span> a try, just to give <span class="application">Recoll</span> a try, just
<a class="link" href="#RCL.INSTALL.BINARY" title= <a class="link" href="#RCL.INSTALL.BINARY" title=
"5.1.&nbsp;Installing a binary copy">install</a> the "6.1.&nbsp;Installing a binary copy">install</a> the
application and start the <span class= application and start the <span class=
"command"><strong>recoll</strong></span> graphical user "command"><strong>recoll</strong></span> graphical user
interface (GUI), which will ask permission to index your interface (GUI), which will ask permission to index your
@ -473,7 +475,7 @@ alink="#0000FF">
<p>Also be aware that, on Unix/Linux, you may need to <p>Also be aware that, on Unix/Linux, you may need to
install the appropriate <a class="link" href= install the appropriate <a class="link" href=
"#RCL.INSTALL.EXTERNAL" title= "#RCL.INSTALL.EXTERNAL" title=
"5.2.&nbsp;Supporting packages">supporting applications</a> "6.2.&nbsp;Supporting packages">supporting applications</a>
for document types that need them (for example <span class= for document types that need them (for example <span class=
"application">antiword</span> for <span class= "application">antiword</span> for <span class=
"application">Microsoft Word</span> files).</p> "application">Microsoft Word</span> files).</p>
@ -693,7 +695,7 @@ alink="#0000FF">
</li> </li>
<li class="listitem"> <li class="listitem">
<p>A <a class="link" href="#RCL.PROGRAM.PYTHONAPI" <p>A <a class="link" href="#RCL.PROGRAM.PYTHONAPI"
title="4.3.&nbsp;Python API"><span class= title="5.3.&nbsp;Python API"><span class=
"application">Python</span> programming "application">Python</span> programming
interface</a></p> interface</a></p>
</li> </li>
@ -933,7 +935,7 @@ alink="#0000FF">
...) need external applications for preprocessing. The ...) need external applications for preprocessing. The
list is in the <a class="link" href= list is in the <a class="link" href=
"#RCL.INSTALL.EXTERNAL" title= "#RCL.INSTALL.EXTERNAL" title=
"5.2.&nbsp;Supporting packages">installation</a> section. "6.2.&nbsp;Supporting packages">installation</a> section.
After every indexing operation, <span class= After every indexing operation, <span class=
"application">Recoll</span> updates a list of commands "application">Recoll</span> updates a list of commands
that would be needed for indexing existing files types. that would be needed for indexing existing files types.
@ -979,7 +981,7 @@ indexedmimetypes = application/pdf
<code class="literal">indexedmimetypes</code>, can be set <code class="literal">indexedmimetypes</code>, can be set
either by editing the <a class="link" href= either by editing the <a class="link" href=
"#RCL.INSTALL.CONFIG.RECOLLCONF" title= "#RCL.INSTALL.CONFIG.RECOLLCONF" title=
"5.4.2.&nbsp;Recoll main configuration file, recoll.conf"> "6.4.2.&nbsp;Recoll main configuration file, recoll.conf">
configuration file (<code class= configuration file (<code class=
"filename">recoll.conf</code>)</a> for the index, or by "filename">recoll.conf</code>)</a> for the index, or by
using the GUI index configuration tool.</p> using the GUI index configuration tool.</p>
@ -1103,7 +1105,7 @@ indexedmimetypes = application/pdf
<p>Using multiple configuration directories and <p>Using multiple configuration directories and
<a class="link" href="#RCL.INSTALL.CONFIG.RECOLLCONF" <a class="link" href="#RCL.INSTALL.CONFIG.RECOLLCONF"
title= title=
"5.4.2.&nbsp;Recoll main configuration file, recoll.conf"> "6.4.2.&nbsp;Recoll main configuration file, recoll.conf">
configuration options</a> allows you to tailor configuration options</a> allows you to tailor
multiple configurations and indexes to handle multiple configurations and indexes to handle
whatever subset of the available data you wish to whatever subset of the available data you wish to
@ -1116,7 +1118,7 @@ indexedmimetypes = application/pdf
parameter in the configuration file (see the parameter in the configuration file (see the
<a class="link" href="#RCL.INSTALL.CONFIG.RECOLLCONF" <a class="link" href="#RCL.INSTALL.CONFIG.RECOLLCONF"
title= title=
"5.4.2.&nbsp;Recoll main configuration file, recoll.conf"> "6.4.2.&nbsp;Recoll main configuration file, recoll.conf">
configuration section</a>). This method would mainly configuration section</a>). This method would mainly
be of use if you wanted to keep the configuration be of use if you wanted to keep the configuration
directory in its default location, but desired directory in its default location, but desired
@ -1211,7 +1213,7 @@ indexedmimetypes = application/pdf
</div> </div>
<p>Variables set inside the <a class="link" href= <p>Variables set inside the <a class="link" href=
"#RCL.INSTALL.CONFIG" title= "#RCL.INSTALL.CONFIG" title=
"5.4.&nbsp;Configuration overview"><span class= "6.4.&nbsp;Configuration overview"><span class=
"application">Recoll</span> configuration files</a> control "application">Recoll</span> configuration files</a> control
which areas of the file system are indexed, and how files which areas of the file system are indexed, and how files
are processed. These variables can be set either by editing are processed. These variables can be set either by editing
@ -1232,7 +1234,7 @@ indexedmimetypes = application/pdf
can edit by hand.</p> can edit by hand.</p>
<p>The configuration is documented inside the <a class= <p>The configuration is documented inside the <a class=
"link" href="#RCL.INSTALL.CONFIG" title= "link" href="#RCL.INSTALL.CONFIG" title=
"5.4.&nbsp;Configuration overview">installation chapter</a> "6.4.&nbsp;Configuration overview">installation chapter</a>
of this document, or in the <span class= of this document, or in the <span class=
"citerefentry"><span class= "citerefentry"><span class=
"refentrytitle">recoll.conf</span>(5)</span> man page, but "refentrytitle">recoll.conf</span>(5)</span> man page, but
@ -1247,7 +1249,7 @@ indexedmimetypes = application/pdf
text, HTML or email (ie: pdf, postscript, ms-word...) are text, HTML or email (ie: pdf, postscript, ms-word...) are
described in the <a class="link" href= described in the <a class="link" href=
"#RCL.INSTALL.EXTERNAL" title= "#RCL.INSTALL.EXTERNAL" title=
"5.2.&nbsp;Supporting packages">external packages "6.2.&nbsp;Supporting packages">external packages
section.</a></p> section.</a></p>
<p>As of Recoll 1.18 there are two incompatible types of <p>As of Recoll 1.18 there are two incompatible types of
Recoll indexes, depending on the treatment of character Recoll indexes, depending on the treatment of character
@ -1332,7 +1334,7 @@ indexedmimetypes = application/pdf
other constraints. Most of the relevant parameters are other constraints. Most of the relevant parameters are
described in the <a class="link" href= described in the <a class="link" href=
"#RCL.INSTALL.CONFIG.RECOLLCONF.TERMS" title= "#RCL.INSTALL.CONFIG.RECOLLCONF.TERMS" title=
"5.4.2.2.&nbsp;Parameters affecting how we generate terms"> "6.4.2.2.&nbsp;Parameters affecting how we generate terms">
linked section</a>.</p> linked section</a>.</p>
<p>The different search interfaces (GUI, command line, <p>The different search interfaces (GUI, command line,
...) have different methods to define the set of indexes ...) have different methods to define the set of indexes
@ -1552,7 +1554,7 @@ thrQSizes = -1 -1 -1
"literal">ToolTip</code> popup on the text label. For "literal">ToolTip</code> popup on the text label. For
more detail, you will need to refer to the <a class= more detail, you will need to refer to the <a class=
"link" href="#RCL.INSTALL.CONFIG" title= "link" href="#RCL.INSTALL.CONFIG" title=
"5.4.&nbsp;Configuration overview">configuration "6.4.&nbsp;Configuration overview">configuration
section</a> of this guide.</p> section</a> of this guide.</p>
<p>The configuration tool normally respects the comments <p>The configuration tool normally respects the comments
and most of the formatting inside the configuration file, and most of the formatting inside the configuration file,
@ -1673,7 +1675,7 @@ thrQSizes = -1 -1 -1
<code class="literal">user</code> prefix is removed from <code class="literal">user</code> prefix is removed from
the name. This can be configured more precisely inside the the name. This can be configured more precisely inside the
<a class="link" href="#RCL.INSTALL.CONFIG.FIELDS" title= <a class="link" href="#RCL.INSTALL.CONFIG.FIELDS" title=
"5.4.3.&nbsp;The fields file"><code class= "6.4.3.&nbsp;The fields file"><code class=
"filename">fields</code> configuration file</a>.</p> "filename">fields</code> configuration file</a>.</p>
</div> </div>
<div class="sect1"> <div class="sect1">
@ -1726,7 +1728,7 @@ metadatacmds = ; tags = tmsu tags %f
"command"><strong>tmsu</strong></span> output will just "command"><strong>tmsu</strong></span> output will just
augment its contents. This will avoid the need to extend augment its contents. This will avoid the need to extend
the <a class="link" href="#RCL.PROGRAM.FIELDS" title= the <a class="link" href="#RCL.PROGRAM.FIELDS" title=
"4.2.&nbsp;Field data processing">field "5.2.&nbsp;Field data processing">field
configuration</a>.</p> configuration</a>.</p>
<p>Once re-indexing is performed (you'll need to force the <p>Once re-indexing is performed (you'll need to force the
file reindexing, <span class="application">Recoll</span> file reindexing, <span class="application">Recoll</span>
@ -2117,7 +2119,7 @@ fvwm
<p>When building <span class="application">Recoll</span>, <p>When building <span class="application">Recoll</span>,
the real time indexing support can be customised during the real time indexing support can be customised during
package <a class="link" href="#RCL.INSTALL.BUILDING.BUILD" package <a class="link" href="#RCL.INSTALL.BUILDING.BUILD"
title="5.3.2.&nbsp;Building">configuration</a> with the title="6.3.2.&nbsp;Building">configuration</a> with the
<code class="option">--with[out]-fam</code> or <code class= <code class="option">--with[out]-fam</code> or <code class=
"option">--with[out]-inotify</code> options. The default is "option">--with[out]-inotify</code> options. The default is
currently to include <span class= currently to include <span class=
@ -2181,7 +2183,7 @@ fs.inotify.max_user_watches=32768
"varname">mondelaypatterns</code> parameter in the "varname">mondelaypatterns</code> parameter in the
<a class="link" href= <a class="link" href=
"#RCL.INSTALL.CONFIG.RECOLLCONF.MISC" title= "#RCL.INSTALL.CONFIG.RECOLLCONF.MISC" title=
"5.4.2.5.&nbsp;Miscellaneous parameters">configuration "6.4.2.5.&nbsp;Miscellaneous parameters">configuration
section</a>.</p> section</a>.</p>
</div> </div>
</div> </div>
@ -2444,7 +2446,7 @@ fs.inotify.max_user_watches=32768
<p>You may also change the choice of applications by <p>You may also change the choice of applications by
editing the <a class="link" href= editing the <a class="link" href=
"#RCL.INSTALL.CONFIG.MIMEVIEW" title= "#RCL.INSTALL.CONFIG.MIMEVIEW" title=
"5.4.6.&nbsp;The mimeview file"><code class= "6.4.6.&nbsp;The mimeview file"><code class=
"filename">mimeview</code></a> configuration file if you "filename">mimeview</code></a> configuration file if you
find this more convenient.</p> find this more convenient.</p>
<p>Each result entry also has a right-click menu with an <p>Each result entry also has a right-click menu with an
@ -2460,10 +2462,10 @@ fs.inotify.max_user_watches=32768
only), or no configured external editor for the file only), or no configured external editor for the file
type. This can sometimes be adjusted simply by tweaking type. This can sometimes be adjusted simply by tweaking
the <a class="link" href="#RCL.INSTALL.CONFIG.MIMEMAP" the <a class="link" href="#RCL.INSTALL.CONFIG.MIMEMAP"
title="5.4.4.&nbsp;The mimemap file"><code class= title="6.4.4.&nbsp;The mimemap file"><code class=
"filename">mimemap</code></a> and <a class="link" href= "filename">mimemap</code></a> and <a class="link" href=
"#RCL.INSTALL.CONFIG.MIMEVIEW" title= "#RCL.INSTALL.CONFIG.MIMEVIEW" title=
"5.4.6.&nbsp;The mimeview file"><code class= "6.4.6.&nbsp;The mimeview file"><code class=
"filename">mimeview</code></a> configuration files (the "filename">mimeview</code></a> configuration files (the
latter can be modified with the user preferences latter can be modified with the user preferences
dialog).</p> dialog).</p>
@ -3885,7 +3887,7 @@ MimeType=*/*
stemming databases which were built during indexing stemming databases which were built during indexing
(this is set in the <a class="link" href= (this is set in the <a class="link" href=
"#RCL.INSTALL.CONFIG.RECOLLCONF" title= "#RCL.INSTALL.CONFIG.RECOLLCONF" title=
"5.4.2.&nbsp;Recoll main configuration file, recoll.conf"> "6.4.2.&nbsp;Recoll main configuration file, recoll.conf">
main configuration file</a>), or later added with main configuration file</a>), or later added with
<span class="command"><strong>recollindex <span class="command"><strong>recollindex
-s</strong></span> (See the recollindex manual). -s</strong></span> (See the recollindex manual).
@ -4050,7 +4052,7 @@ MimeType=*/*
associations are defined inside the <a class= associations are defined inside the <a class=
"link" href="#RCL.INSTALL.CONFIG.MIMECONF" "link" href="#RCL.INSTALL.CONFIG.MIMECONF"
title= title=
"5.4.5.&nbsp;The mimeconf file"><code class= "6.4.5.&nbsp;The mimeconf file"><code class=
"filename">mimeconf</code> configuration "filename">mimeconf</code> configuration
file</a>. If a thumbnail for the file is found file</a>. If a thumbnail for the file is found
at the standard Freedesktop location, this will at the standard Freedesktop location, this will
@ -4128,7 +4130,7 @@ MimeType=*/*
of indexed but not stored fields is not known at this of indexed but not stored fields is not known at this
point in the search process (see <a class="link" point in the search process (see <a class="link"
href="#RCL.PROGRAM.FIELDS" title= href="#RCL.PROGRAM.FIELDS" title=
"4.2.&nbsp;Field data processing">field "5.2.&nbsp;Field data processing">field
configuration</a>). There are currently very few configuration</a>). There are currently very few
fields stored by default, apart from the values above fields stored by default, apart from the values above
(only <code class="literal">author</code> and (only <code class="literal">author</code> and
@ -4297,7 +4299,7 @@ MimeType=*/*
<p>By writing a custom <span class= <p>By writing a custom <span class=
"application">Python</span> program, using the "application">Python</span> program, using the
<a class="link" href="#RCL.PROGRAM.PYTHONAPI" title= <a class="link" href="#RCL.PROGRAM.PYTHONAPI" title=
"4.3.&nbsp;Python API">Recoll Python API</a>.</p> "5.3.&nbsp;Python API">Recoll Python API</a>.</p>
</li> </li>
</ul> </ul>
</div> </div>
@ -4508,7 +4510,7 @@ bye goodbye "see you" \
accesses from the client.</p> accesses from the client.</p>
<p>The translations are defined in the <a class="link" <p>The translations are defined in the <a class="link"
href="#RCL.INSTALL.CONFIG.PTRANS" title= href="#RCL.INSTALL.CONFIG.PTRANS" title=
"5.4.7.&nbsp;The ptrans file"><code class= "6.4.7.&nbsp;The ptrans file"><code class=
"filename">ptrans</code></a> configuration file, which can "filename">ptrans</code></a> configuration file, which can
be edited by hand or from the GUI external indexes be edited by hand or from the GUI external indexes
configuration dialog: <span class= configuration dialog: <span class=
@ -4707,7 +4709,7 @@ bye goodbye "see you" \
"literal">containerfilename</code> to <em class= "literal">containerfilename</code> to <em class=
"replaceable"><code>cfn</code></em>. See the <a class= "replaceable"><code>cfn</code></em>. See the <a class=
"link" href="#RCL.INSTALL.CONFIG.FIELDS" title= "link" href="#RCL.INSTALL.CONFIG.FIELDS" title=
"5.4.3.&nbsp;The fields file">section about the "6.4.3.&nbsp;The fields file">section about the
<code class="filename">fields</code> file</a></p> <code class="filename">fields</code> file</a></p>
<p>The document input handlers used while indexing have the <p>The document input handlers used while indexing have the
possibility to create other fields with arbitrary names, possibility to create other fields with arbitrary names,
@ -5312,12 +5314,111 @@ dir:recoll dir:src -dir:utils -dir:common
</div> </div>
</div> </div>
</div> </div>
<div class="chapter">
<div class="titlepage">
<div>
<div>
<h1 class="title"><a name="RCL.MOVABLE" id=
"RCL.MOVABLE"></a>Chapter&nbsp;4.&nbsp;Movable
datasets</h1>
</div>
</div>
</div>
<p>As of <span class="application">Recoll</span> 1.24, it has
become easy to build self-contained datasets including a
<span class="application">Recoll</span> configuration
directory and index together with the indexed documents, and
to move such a dataset around (for example copying it to an
USB drive), without having to adjust the configuration for
querying the index.</p>
<div class="note" style=
"margin-left: 0.5in; margin-right: 0.5in;">
<h3 class="title">Note</h3>
<p>This is a query-time feature only. The index must only
be updated in its original location. If an update is
necessary in a different location, the index must be
reset.</p>
</div>
<p>The examples below will assume that you have a dataset
under <code class="filename">/home/me/mydata/</code>, with
the index configuration and data stored inside <code class=
"filename">/home/me/mydata/recoll-confdir</code>.</p>
<p>In order to be able to run queries after the dataset has
been moved, you must ensure the following:</p>
<div class="itemizedlist">
<ul class="itemizedlist" style="list-style-type: disc;">
<li class="listitem">
<p>The main configuration file must define the
<a class="link" href=
"#RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">orgidxconfdir</a>
variable to be the original location of the
configuration directory (<code class=
"filename">orgidxconfdir=/home/me/mydata/recoll-confdir</code>
must be set inside <code class=
"filename">/home/me/mydata/recoll-confdir/recoll.conf</code>
in the example above).</p>
</li>
<li class="listitem">
<p>The configuration directory must exist with the
documents, somewhere under the directory which will be
moved. E.g. if you are moving <code class=
"filename">/home/me/mydata</code> around, the
configuration directory must exist somewhere below this
point, for example <code class=
"filename">/home/me/mydata/recoll-confdir</code>, or
<code class=
"filename">/home/me/mydata/sub/recoll-confdir</code>.</p>
</li>
<li class="listitem">
<p>You should keep the default locations for the index
elements (they are relative to the configuration
directory by default). Only the paths referring to the
documents themselves (e.g. <code class=
"literal">topdirs</code> values) should be absolute (in
general, they are only used when indexing anyway).</p>
</li>
</ul>
</div>
<p>Only the first point needs an explicit user action, the
<span class="application">Recoll</span> defaults are
compatible with the second one, and the third is natural.</p>
<p>If, after the move, the configuration directory needs to
be copied out of the dataset (for example because the thumb
drive is too slow), you can set the <a class="link" href=
"#RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">curidxconfdir</a>,
variable inside the copied configuration to define the
location of the moved one. For example if <code class=
"filename">/home/me/mydata</code> is now mounted onto
<code class="filename">/media/me/somelabel</code>, but the
configuration directory and index has been copied to
<code class="filename">/tmp/tempconfig</code>, you would set
<code class="literal">curidxconfdir</code> to <code class=
"filename">/media/me/somelabel/recoll-confdir</code> inside
<code class="filename">/tmp/tempconfig/recoll.conf</code>.
<code class="literal">orgidxconfdir</code> would still be
<code class="filename">/home/me/mydata/recoll-confdir</code>
in the original and the copy.</p>
<p>If you are regularly copying the configuration out of the
dataset, it will be useful to write a script to automate the
procedure. This can't really be done inside <span class=
"application">Recoll</span> because there are probably many
possible variants. One example would be to copy the
configuration to make it writable, but keep the index data on
the medium because it is too big - in this case, the script
would also need to set <code class="literal">dbdir</code> in
the copied configuration.</p>
<p>The same set of modifications (<span class=
"application">Recoll</span> 1.24) has also made it possible
to run queries from a readonly configuration directory (with
slightly reduced function of course, such as not recording
the query history).</p>
</div>
<div class="chapter"> <div class="chapter">
<div class="titlepage"> <div class="titlepage">
<div> <div>
<div> <div>
<h1 class="title"><a name="RCL.PROGRAM" id= <h1 class="title"><a name="RCL.PROGRAM" id=
"RCL.PROGRAM"></a>Chapter&nbsp;4.&nbsp;Programming "RCL.PROGRAM"></a>Chapter&nbsp;5.&nbsp;Programming
interface</h1> interface</h1>
</div> </div>
</div> </div>
@ -5337,7 +5438,7 @@ dir:recoll dir:src -dir:utils -dir:common
<div> <div>
<h2 class="title" style="clear: both"><a name= <h2 class="title" style="clear: both"><a name=
"RCL.PROGRAM.FILTERS" id= "RCL.PROGRAM.FILTERS" id=
"RCL.PROGRAM.FILTERS"></a>4.1.&nbsp;Writing a "RCL.PROGRAM.FILTERS"></a>5.1.&nbsp;Writing a
document input handler</h2> document input handler</h2>
</div> </div>
</div> </div>
@ -5425,7 +5526,7 @@ dir:recoll dir:src -dir:utils -dir:common
"literal">meta</code> tags. These will be processed "literal">meta</code> tags. These will be processed
according to the directives found in the <a class="link" according to the directives found in the <a class="link"
href="#RCL.PROGRAM.FIELDS" title= href="#RCL.PROGRAM.FIELDS" title=
"4.2.&nbsp;Field data processing"><code class= "5.2.&nbsp;Field data processing"><code class=
"filename">fields</code> configuration file</a>.</p> "filename">fields</code> configuration file</a>.</p>
<p>The handlers that can handle multiple documents per file <p>The handlers that can handle multiple documents per file
return a single piece of data to identify each document return a single piece of data to identify each document
@ -5446,7 +5547,7 @@ dir:recoll dir:src -dir:utils -dir:common
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.FILTERS.SIMPLE" id= "RCL.PROGRAM.FILTERS.SIMPLE" id=
"RCL.PROGRAM.FILTERS.SIMPLE"></a>4.1.1.&nbsp;Simple "RCL.PROGRAM.FILTERS.SIMPLE"></a>5.1.1.&nbsp;Simple
input handlers</h3> input handlers</h3>
</div> </div>
</div> </div>
@ -5473,7 +5574,7 @@ dir:recoll dir:src -dir:utils -dir:common
deciding factor is metadata: <span class= deciding factor is metadata: <span class=
"application">Recoll</span> has a way to <a class="link" "application">Recoll</span> has a way to <a class="link"
href="#RCL.PROGRAM.FILTERS.HTML" title= href="#RCL.PROGRAM.FILTERS.HTML" title=
"4.1.4.&nbsp;Input handler output">extract metadata from "5.1.4.&nbsp;Input handler output">extract metadata from
the HTML header and use it for field searches.</a>.</p> the HTML header and use it for field searches.</a>.</p>
<p>The <code class= <p>The <code class=
"envar">RECOLL_FILTER_FORPREVIEW</code> environment "envar">RECOLL_FILTER_FORPREVIEW</code> environment
@ -5497,7 +5598,7 @@ dir:recoll dir:src -dir:utils -dir:common
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.FILTERS.MULTIPLE" id= "RCL.PROGRAM.FILTERS.MULTIPLE" id=
"RCL.PROGRAM.FILTERS.MULTIPLE"></a>4.1.2.&nbsp;"Multiple" "RCL.PROGRAM.FILTERS.MULTIPLE"></a>5.1.2.&nbsp;"Multiple"
handlers</h3> handlers</h3>
</div> </div>
</div> </div>
@ -5567,7 +5668,7 @@ dir:recoll dir:src -dir:utils -dir:common
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.FILTERS.ASSOCIATION" id= "RCL.PROGRAM.FILTERS.ASSOCIATION" id=
"RCL.PROGRAM.FILTERS.ASSOCIATION"></a>4.1.3.&nbsp;Telling "RCL.PROGRAM.FILTERS.ASSOCIATION"></a>5.1.3.&nbsp;Telling
<span class="application">Recoll</span> about the <span class="application">Recoll</span> about the
handler</h3> handler</h3>
</div> </div>
@ -5580,7 +5681,7 @@ dir:recoll dir:src -dir:utils -dir:common
<p>The association of files to MIME types is mostly based <p>The association of files to MIME types is mostly based
on name suffixes. The types are defined inside the on name suffixes. The types are defined inside the
<a class="link" href="#RCL.INSTALL.CONFIG.MIMEMAP" title= <a class="link" href="#RCL.INSTALL.CONFIG.MIMEMAP" title=
"5.4.4.&nbsp;The mimemap file"><code class= "6.4.4.&nbsp;The mimemap file"><code class=
"filename">mimemap</code> file</a>. Example:</p> "filename">mimemap</code> file</a>. Example:</p>
<pre class="programlisting"> <pre class="programlisting">
@ -5595,7 +5696,7 @@ dir:recoll dir:src -dir:utils -dir:common
<p>The second element is the association of MIME types to <p>The second element is the association of MIME types to
handlers in the <a class="link" href= handlers in the <a class="link" href=
"#RCL.INSTALL.CONFIG.MIMECONF" title= "#RCL.INSTALL.CONFIG.MIMECONF" title=
"5.4.5.&nbsp;The mimeconf file"><code class= "6.4.5.&nbsp;The mimeconf file"><code class=
"filename">mimeconf</code> file</a>. A sample will "filename">mimeconf</code> file</a>. A sample will
probably be better than a long explanation:</p> probably be better than a long explanation:</p>
<pre class="programlisting"> <pre class="programlisting">
@ -5657,7 +5758,7 @@ application/x-chm = execm rclchm
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.FILTERS.HTML" id= "RCL.PROGRAM.FILTERS.HTML" id=
"RCL.PROGRAM.FILTERS.HTML"></a>4.1.4.&nbsp;Input "RCL.PROGRAM.FILTERS.HTML"></a>5.1.4.&nbsp;Input
handler output</h3> handler output</h3>
</div> </div>
</div> </div>
@ -5714,7 +5815,7 @@ application/x-chm = execm rclchm
can be processed by the indexer in different ways, for can be processed by the indexer in different ways, for
searching or displaying inside query results. This is searching or displaying inside query results. This is
described in a <a class="link" href="#RCL.PROGRAM.FIELDS" described in a <a class="link" href="#RCL.PROGRAM.FIELDS"
title="4.2.&nbsp;Field data processing">following title="5.2.&nbsp;Field data processing">following
section.</a></p> section.</a></p>
<p>By default, the indexer will process the standard <p>By default, the indexer will process the standard
header fields if they are present: <code class= header fields if they are present: <code class=
@ -5755,7 +5856,7 @@ or
</pre> </pre>
<p>As written above, the processing of fields is <p>As written above, the processing of fields is
described in a <a class="link" href="#RCL.PROGRAM.FIELDS" described in a <a class="link" href="#RCL.PROGRAM.FIELDS"
title="4.2.&nbsp;Field data processing">further title="5.2.&nbsp;Field data processing">further
section</a>.</p> section</a>.</p>
<p>Persistent filters can use another, probably simpler, <p>Persistent filters can use another, probably simpler,
method to produce metadata, by calling the <code class= method to produce metadata, by calling the <code class=
@ -5774,7 +5875,7 @@ or
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.FILTERS.PAGES" id= "RCL.PROGRAM.FILTERS.PAGES" id=
"RCL.PROGRAM.FILTERS.PAGES"></a>4.1.5.&nbsp;Page "RCL.PROGRAM.FILTERS.PAGES"></a>5.1.5.&nbsp;Page
numbers</h3> numbers</h3>
</div> </div>
</div> </div>
@ -5793,7 +5894,7 @@ or
<div> <div>
<h2 class="title" style="clear: both"><a name= <h2 class="title" style="clear: both"><a name=
"RCL.PROGRAM.FIELDS" id= "RCL.PROGRAM.FIELDS" id=
"RCL.PROGRAM.FIELDS"></a>4.2.&nbsp;Field data "RCL.PROGRAM.FIELDS"></a>5.2.&nbsp;Field data
processing</h2> processing</h2>
</div> </div>
</div> </div>
@ -5888,7 +5989,7 @@ or
</div> </div>
<p>You can find more information in the <a class="link" <p>You can find more information in the <a class="link"
href="#RCL.INSTALL.CONFIG.FIELDS" title= href="#RCL.INSTALL.CONFIG.FIELDS" title=
"5.4.3.&nbsp;The fields file">section about the "6.4.3.&nbsp;The fields file">section about the
<code class="filename">fields</code> file</a>, or in <code class="filename">fields</code> file</a>, or in
comments inside the file.</p> comments inside the file.</p>
<p>You can also have a look at the <a class="ulink" href= <p>You can also have a look at the <a class="ulink" href=
@ -5904,7 +6005,7 @@ or
<div> <div>
<h2 class="title" style="clear: both"><a name= <h2 class="title" style="clear: both"><a name=
"RCL.PROGRAM.PYTHONAPI" id= "RCL.PROGRAM.PYTHONAPI" id=
"RCL.PROGRAM.PYTHONAPI"></a>4.3.&nbsp;Python API</h2> "RCL.PROGRAM.PYTHONAPI"></a>5.3.&nbsp;Python API</h2>
</div> </div>
</div> </div>
</div> </div>
@ -5914,7 +6015,7 @@ or
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.INTRO" id= "RCL.PROGRAM.PYTHONAPI.INTRO" id=
"RCL.PROGRAM.PYTHONAPI.INTRO"></a>4.3.1.&nbsp;Introduction</h3> "RCL.PROGRAM.PYTHONAPI.INTRO"></a>5.3.1.&nbsp;Introduction</h3>
</div> </div>
</div> </div>
</div> </div>
@ -6015,7 +6116,7 @@ for doc in results:
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.ELEMENTS" id= "RCL.PROGRAM.PYTHONAPI.ELEMENTS" id=
"RCL.PROGRAM.PYTHONAPI.ELEMENTS"></a>4.3.2.&nbsp;Interface "RCL.PROGRAM.PYTHONAPI.ELEMENTS"></a>5.3.2.&nbsp;Interface
elements</h3> elements</h3>
</div> </div>
</div> </div>
@ -6125,7 +6226,7 @@ for doc in results:
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.SEARCH" id= "RCL.PROGRAM.PYTHONAPI.SEARCH" id=
"RCL.PROGRAM.PYTHONAPI.SEARCH"></a>4.3.3.&nbsp;Python "RCL.PROGRAM.PYTHONAPI.SEARCH"></a>5.3.3.&nbsp;Python
search interface</h3> search interface</h3>
</div> </div>
</div> </div>
@ -6136,7 +6237,7 @@ for doc in results:
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.PACKAGE" id= "RCL.PROGRAM.PYTHONAPI.PACKAGE" id=
"RCL.PROGRAM.PYTHONAPI.PACKAGE"></a>4.3.3.1.&nbsp;Recoll "RCL.PROGRAM.PYTHONAPI.PACKAGE"></a>5.3.3.1.&nbsp;Recoll
package</h4> package</h4>
</div> </div>
</div> </div>
@ -6167,7 +6268,7 @@ for doc in results:
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.RECOLL" id= "RCL.PROGRAM.PYTHONAPI.RECOLL" id=
"RCL.PROGRAM.PYTHONAPI.RECOLL"></a>4.3.3.2.&nbsp;The "RCL.PROGRAM.PYTHONAPI.RECOLL"></a>5.3.3.2.&nbsp;The
recoll module</h4> recoll module</h4>
</div> </div>
</div> </div>
@ -6556,7 +6657,7 @@ for doc in results:
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.RCLEXTRACT" id= "RCL.PROGRAM.PYTHONAPI.RCLEXTRACT" id=
"RCL.PROGRAM.PYTHONAPI.RCLEXTRACT"></a>4.3.3.3.&nbsp;The "RCL.PROGRAM.PYTHONAPI.RCLEXTRACT"></a>5.3.3.3.&nbsp;The
rclextract module</h4> rclextract module</h4>
</div> </div>
</div> </div>
@ -6643,7 +6744,7 @@ filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</pre>
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.SEARCH.EXAMPLE" id= "RCL.PROGRAM.PYTHONAPI.SEARCH.EXAMPLE" id=
"RCL.PROGRAM.PYTHONAPI.SEARCH.EXAMPLE"></a>4.3.3.4.&nbsp;Search "RCL.PROGRAM.PYTHONAPI.SEARCH.EXAMPLE"></a>5.3.3.4.&nbsp;Search
API usage example</h4> API usage example</h4>
</div> </div>
</div> </div>
@ -6688,7 +6789,7 @@ for i in range(nres):
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.UPDATE" id= "RCL.PROGRAM.PYTHONAPI.UPDATE" id=
"RCL.PROGRAM.PYTHONAPI.UPDATE"></a>4.3.4.&nbsp;Creating "RCL.PROGRAM.PYTHONAPI.UPDATE"></a>5.3.4.&nbsp;Creating
Python external indexers</h3> Python external indexers</h3>
</div> </div>
</div> </div>
@ -6743,7 +6844,7 @@ for i in range(nres):
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.UPDATE.UPDATE" id= "RCL.PROGRAM.PYTHONAPI.UPDATE.UPDATE" id=
"RCL.PROGRAM.PYTHONAPI.UPDATE.UPDATE"></a>4.3.4.1.&nbsp;Python "RCL.PROGRAM.PYTHONAPI.UPDATE.UPDATE"></a>5.3.4.1.&nbsp;Python
update interface</h4> update interface</h4>
</div> </div>
</div> </div>
@ -6837,7 +6938,7 @@ for i in range(nres):
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.UPDATE.ACCESS" id= "RCL.PROGRAM.PYTHONAPI.UPDATE.ACCESS" id=
"RCL.PROGRAM.PYTHONAPI.UPDATE.ACCESS"></a>4.3.4.2.&nbsp;Query "RCL.PROGRAM.PYTHONAPI.UPDATE.ACCESS"></a>5.3.4.2.&nbsp;Query
data access for external indexers (1.23)</h4> data access for external indexers (1.23)</h4>
</div> </div>
</div> </div>
@ -6887,7 +6988,7 @@ makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.UPDATE.SAMPLES" id= "RCL.PROGRAM.PYTHONAPI.UPDATE.SAMPLES" id=
"RCL.PROGRAM.PYTHONAPI.UPDATE.SAMPLES"></a>4.3.4.3.&nbsp;External "RCL.PROGRAM.PYTHONAPI.UPDATE.SAMPLES"></a>5.3.4.3.&nbsp;External
indexer samples</h4> indexer samples</h4>
</div> </div>
</div> </div>
@ -6910,7 +7011,7 @@ makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.PROGRAM.PYTHONAPI.COMPAT" id= "RCL.PROGRAM.PYTHONAPI.COMPAT" id=
"RCL.PROGRAM.PYTHONAPI.COMPAT"></a>4.3.5.&nbsp;Package "RCL.PROGRAM.PYTHONAPI.COMPAT"></a>5.3.5.&nbsp;Package
compatibility with the previous version</h3> compatibility with the previous version</h3>
</div> </div>
</div> </div>
@ -6950,7 +7051,7 @@ except:
<div> <div>
<div> <div>
<h1 class="title"><a name="RCL.INSTALL" id= <h1 class="title"><a name="RCL.INSTALL" id=
"RCL.INSTALL"></a>Chapter&nbsp;5.&nbsp;Installation and "RCL.INSTALL"></a>Chapter&nbsp;6.&nbsp;Installation and
configuration</h1> configuration</h1>
</div> </div>
</div> </div>
@ -6961,7 +7062,7 @@ except:
<div> <div>
<h2 class="title" style="clear: both"><a name= <h2 class="title" style="clear: both"><a name=
"RCL.INSTALL.BINARY" id= "RCL.INSTALL.BINARY" id=
"RCL.INSTALL.BINARY"></a>5.1.&nbsp;Installing a "RCL.INSTALL.BINARY"></a>6.1.&nbsp;Installing a
binary copy</h2> binary copy</h2>
</div> </div>
</div> </div>
@ -6991,14 +7092,14 @@ except:
complains about missing dependancies).</p> complains about missing dependancies).</p>
<p>In all cases, you will have to check or install <p>In all cases, you will have to check or install
<a class="link" href="#RCL.INSTALL.EXTERNAL" title= <a class="link" href="#RCL.INSTALL.EXTERNAL" title=
"5.2.&nbsp;Supporting packages">supporting applications</a> "6.2.&nbsp;Supporting packages">supporting applications</a>
for the file types that you want to index beyond those that for the file types that you want to index beyond those that
are natively processed by <span class= are natively processed by <span class=
"application">Recoll</span> (text, HTML, email files, and a "application">Recoll</span> (text, HTML, email files, and a
few others).</p> few others).</p>
<p>You should also maybe have a look at the <a class="link" <p>You should also maybe have a look at the <a class="link"
href="#RCL.INSTALL.CONFIG" title= href="#RCL.INSTALL.CONFIG" title=
"5.4.&nbsp;Configuration overview">configuration "6.4.&nbsp;Configuration overview">configuration
section</a> (but this may not be necessary for a quick test section</a> (but this may not be necessary for a quick test
with default parameters). Most parameters can be more with default parameters). Most parameters can be more
conveniently set from the GUI interface.</p> conveniently set from the GUI interface.</p>
@ -7009,7 +7110,7 @@ except:
<div> <div>
<h2 class="title" style="clear: both"><a name= <h2 class="title" style="clear: both"><a name=
"RCL.INSTALL.EXTERNAL" id= "RCL.INSTALL.EXTERNAL" id=
"RCL.INSTALL.EXTERNAL"></a>5.2.&nbsp;Supporting "RCL.INSTALL.EXTERNAL"></a>6.2.&nbsp;Supporting
packages</h2> packages</h2>
</div> </div>
</div> </div>
@ -7227,7 +7328,7 @@ except:
<div> <div>
<h2 class="title" style="clear: both"><a name= <h2 class="title" style="clear: both"><a name=
"RCL.INSTALL.BUILDING" id= "RCL.INSTALL.BUILDING" id=
"RCL.INSTALL.BUILDING"></a>5.3.&nbsp;Building from "RCL.INSTALL.BUILDING"></a>6.3.&nbsp;Building from
source</h2> source</h2>
</div> </div>
</div> </div>
@ -7238,7 +7339,7 @@ except:
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.BUILDING.PREREQS" id= "RCL.INSTALL.BUILDING.PREREQS" id=
"RCL.INSTALL.BUILDING.PREREQS"></a>5.3.1.&nbsp;Prerequisites</h3> "RCL.INSTALL.BUILDING.PREREQS"></a>6.3.1.&nbsp;Prerequisites</h3>
</div> </div>
</div> </div>
</div> </div>
@ -7343,7 +7444,7 @@ except:
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.BUILDING.BUILD" id= "RCL.INSTALL.BUILDING.BUILD" id=
"RCL.INSTALL.BUILDING.BUILD"></a>5.3.2.&nbsp;Building</h3> "RCL.INSTALL.BUILDING.BUILD"></a>6.3.2.&nbsp;Building</h3>
</div> </div>
</div> </div>
</div> </div>
@ -7479,7 +7580,8 @@ except:
<pre class="screen"> <pre class="screen">
<strong class= <strong class=
"userinput"><code>cd recoll-xxx</code></strong> "userinput"><code>cd recoll-xxx</code></strong>
<strong class="userinput"><code>./configure</code></strong> <strong class=
"userinput"><code>./configure</code></strong>
<strong class="userinput"><code>make</code></strong> <strong class="userinput"><code>make</code></strong>
<strong class= <strong class=
"userinput"><code>(practices usual hardship-repelling invocations)</code></strong> "userinput"><code>(practices usual hardship-repelling invocations)</code></strong>
@ -7498,7 +7600,7 @@ except:
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.BUILDING.BUILD.SOLARIS" id= "RCL.INSTALL.BUILDING.BUILD.SOLARIS" id=
"RCL.INSTALL.BUILDING.BUILD.SOLARIS"></a>5.3.2.1.&nbsp;Building "RCL.INSTALL.BUILDING.BUILD.SOLARIS"></a>6.3.2.1.&nbsp;Building
on Solaris</h4> on Solaris</h4>
</div> </div>
</div> </div>
@ -7522,7 +7624,7 @@ except:
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.BUILDING.INSTALL" id= "RCL.INSTALL.BUILDING.INSTALL" id=
"RCL.INSTALL.BUILDING.INSTALL"></a>5.3.3.&nbsp;Installation</h3> "RCL.INSTALL.BUILDING.INSTALL"></a>6.3.3.&nbsp;Installation</h3>
</div> </div>
</div> </div>
</div> </div>
@ -7542,7 +7644,7 @@ except:
<div> <div>
<h2 class="title" style="clear: both"><a name= <h2 class="title" style="clear: both"><a name=
"RCL.INSTALL.CONFIG" id= "RCL.INSTALL.CONFIG" id=
"RCL.INSTALL.CONFIG"></a>5.4.&nbsp;Configuration "RCL.INSTALL.CONFIG"></a>6.4.&nbsp;Configuration
overview</h2> overview</h2>
</div> </div>
</div> </div>
@ -7704,7 +7806,7 @@ thesame = "some string with spaces"
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.ENVIR" id= "RCL.INSTALL.CONFIG.ENVIR" id=
"RCL.INSTALL.CONFIG.ENVIR"></a>5.4.1.&nbsp;Environment "RCL.INSTALL.CONFIG.ENVIR"></a>6.4.1.&nbsp;Environment
variables</h3> variables</h3>
</div> </div>
</div> </div>
@ -7784,7 +7886,7 @@ thesame = "some string with spaces"
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF" id= "RCL.INSTALL.CONFIG.RECOLLCONF" id=
"RCL.INSTALL.CONFIG.RECOLLCONF"></a>5.4.2.&nbsp;Recoll "RCL.INSTALL.CONFIG.RECOLLCONF"></a>6.4.2.&nbsp;Recoll
main configuration file, recoll.conf</h3> main configuration file, recoll.conf</h3>
</div> </div>
</div> </div>
@ -7795,7 +7897,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS" id= "RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS"></a>5.4.2.1.&nbsp;Parameters "RCL.INSTALL.CONFIG.RECOLLCONF.WHATDOCS"></a>6.4.2.1.&nbsp;Parameters
affecting what documents we index</h4> affecting what documents we index</h4>
</div> </div>
</div> </div>
@ -8094,7 +8196,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.TERMS" id= "RCL.INSTALL.CONFIG.RECOLLCONF.TERMS" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.TERMS"></a>5.4.2.2.&nbsp;Parameters "RCL.INSTALL.CONFIG.RECOLLCONF.TERMS"></a>6.4.2.2.&nbsp;Parameters
affecting how we generate terms</h4> affecting how we generate terms</h4>
</div> </div>
</div> </div>
@ -8305,7 +8407,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.STORE" id= "RCL.INSTALL.CONFIG.RECOLLCONF.STORE" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.STORE"></a>5.4.2.3.&nbsp;Parameters "RCL.INSTALL.CONFIG.RECOLLCONF.STORE"></a>6.4.2.3.&nbsp;Parameters
affecting where and how we store things</h4> affecting where and how we store things</h4>
</div> </div>
</div> </div>
@ -8460,7 +8562,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.PERFS" id= "RCL.INSTALL.CONFIG.RECOLLCONF.PERFS" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.PERFS"></a>5.4.2.4.&nbsp;Parameters "RCL.INSTALL.CONFIG.RECOLLCONF.PERFS"></a>6.4.2.4.&nbsp;Parameters
affecting indexing performance and resource affecting indexing performance and resource
usage</h4> usage</h4>
</div> </div>
@ -8561,7 +8663,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.MISC" id= "RCL.INSTALL.CONFIG.RECOLLCONF.MISC" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.MISC"></a>5.4.2.5.&nbsp;Miscellaneous "RCL.INSTALL.CONFIG.RECOLLCONF.MISC"></a>6.4.2.5.&nbsp;Miscellaneous
parameters</h4> parameters</h4>
</div> </div>
</div> </div>
@ -8613,6 +8715,39 @@ thesame = "some string with spaces"
values if set, else the log... values.</p> values if set, else the log... values.</p>
</dd> </dd>
<dt><a name= <dt><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR"></a><span class="term"><code class="varname">orgidxconfdir</code></span></dt>
<dd>
<p>Original location of the configuration
directory. This is used exclusively for movable
datasets. Locating the configuration directory
inside the directory tree makes it possible to
provide automatic query time path translations
once the data set has moved (for example, because
it has been mounted on another location).</p>
</dd>
<dt><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR"></a><span class="term"><code class="varname">curidxconfdir</code></span></dt>
<dd>
<p>Current location of the configuration
directory. Complement orgidxconfdir for movable
datasets. This should be used if the
configuration directory has been copied from the
dataset to another location, either because the
dataset is readonly and an r/w copy is desired,
or for performance reasons. This records the
original moved location before copy, to allow
path translation computations. For example if a
dataset originally indexed as
'/home/me/mydata/config' has been mounted to
'/media/me/mydata', and the GUI is running from a
copied configuration, orgidxconfdir would be
'/home/me/mydata/config', and curidxconfdir (as
set in the copied configuration) would be
'/media/me/mydata/config'.</p>
</dd>
<dt><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR" id= "RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR"></a><span class="term"><code class="varname">idxrundir</code></span></dt> "RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR"></a><span class="term"><code class="varname">idxrundir</code></span></dt>
<dd> <dd>
@ -8794,7 +8929,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.QUERY" id= "RCL.INSTALL.CONFIG.RECOLLCONF.QUERY" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.QUERY"></a>5.4.2.6.&nbsp;Query-time "RCL.INSTALL.CONFIG.RECOLLCONF.QUERY"></a>6.4.2.6.&nbsp;Query-time
parameters (no impact on the index)</h4> parameters (no impact on the index)</h4>
</div> </div>
</div> </div>
@ -8869,7 +9004,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.PDF" id= "RCL.INSTALL.CONFIG.RECOLLCONF.PDF" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.PDF"></a>5.4.2.7.&nbsp;Parameters "RCL.INSTALL.CONFIG.RECOLLCONF.PDF"></a>6.4.2.7.&nbsp;Parameters
for the PDF input script</h4> for the PDF input script</h4>
</div> </div>
</div> </div>
@ -8930,7 +9065,7 @@ thesame = "some string with spaces"
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS" id= "RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS" id=
"RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS"></a>5.4.2.8.&nbsp;Parameters "RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS"></a>6.4.2.8.&nbsp;Parameters
set for specific locations</h4> set for specific locations</h4>
</div> </div>
</div> </div>
@ -8955,7 +9090,7 @@ thesame = "some string with spaces"
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.FIELDS" id= "RCL.INSTALL.CONFIG.FIELDS" id=
"RCL.INSTALL.CONFIG.FIELDS"></a>5.4.3.&nbsp;The "RCL.INSTALL.CONFIG.FIELDS"></a>6.4.3.&nbsp;The
fields file</h3> fields file</h3>
</div> </div>
</div> </div>
@ -9052,7 +9187,7 @@ x-my-tag = mailmytag
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.FIELDS.XATTR" id= "RCL.INSTALL.CONFIG.FIELDS.XATTR" id=
"RCL.INSTALL.CONFIG.FIELDS.XATTR"></a>5.4.3.1.&nbsp;Extended "RCL.INSTALL.CONFIG.FIELDS.XATTR"></a>6.4.3.1.&nbsp;Extended
attributes in the fields file</h4> attributes in the fields file</h4>
</div> </div>
</div> </div>
@ -9078,7 +9213,7 @@ x-my-tag = mailmytag
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.MIMEMAP" id= "RCL.INSTALL.CONFIG.MIMEMAP" id=
"RCL.INSTALL.CONFIG.MIMEMAP"></a>5.4.4.&nbsp;The "RCL.INSTALL.CONFIG.MIMEMAP"></a>6.4.4.&nbsp;The
mimemap file</h3> mimemap file</h3>
</div> </div>
</div> </div>
@ -9127,7 +9262,7 @@ x-my-tag = mailmytag
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.MIMECONF" id= "RCL.INSTALL.CONFIG.MIMECONF" id=
"RCL.INSTALL.CONFIG.MIMECONF"></a>5.4.5.&nbsp;The "RCL.INSTALL.CONFIG.MIMECONF"></a>6.4.5.&nbsp;The
mimeconf file</h3> mimeconf file</h3>
</div> </div>
</div> </div>
@ -9154,7 +9289,7 @@ x-my-tag = mailmytag
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.MIMEVIEW" id= "RCL.INSTALL.CONFIG.MIMEVIEW" id=
"RCL.INSTALL.CONFIG.MIMEVIEW"></a>5.4.6.&nbsp;The "RCL.INSTALL.CONFIG.MIMEVIEW"></a>6.4.6.&nbsp;The
mimeview file</h3> mimeview file</h3>
</div> </div>
</div> </div>
@ -9274,7 +9409,7 @@ x-my-tag = mailmytag
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.PTRANS" id= "RCL.INSTALL.CONFIG.PTRANS" id=
"RCL.INSTALL.CONFIG.PTRANS"></a>5.4.7.&nbsp;The "RCL.INSTALL.CONFIG.PTRANS"></a>6.4.7.&nbsp;The
<code class="filename">ptrans</code> file</h3> <code class="filename">ptrans</code> file</h3>
</div> </div>
</div> </div>
@ -9305,7 +9440,7 @@ x-my-tag = mailmytag
<div> <div>
<h3 class="title"><a name= <h3 class="title"><a name=
"RCL.INSTALL.CONFIG.EXAMPLES" id= "RCL.INSTALL.CONFIG.EXAMPLES" id=
"RCL.INSTALL.CONFIG.EXAMPLES"></a>5.4.8.&nbsp;Examples "RCL.INSTALL.CONFIG.EXAMPLES"></a>6.4.8.&nbsp;Examples
of configuration adjustments</h3> of configuration adjustments</h3>
</div> </div>
</div> </div>
@ -9316,7 +9451,7 @@ x-my-tag = mailmytag
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.EXAMPLES.ADDVIEW" id= "RCL.INSTALL.CONFIG.EXAMPLES.ADDVIEW" id=
"RCL.INSTALL.CONFIG.EXAMPLES.ADDVIEW"></a>5.4.8.1.&nbsp;Adding "RCL.INSTALL.CONFIG.EXAMPLES.ADDVIEW"></a>6.4.8.1.&nbsp;Adding
an external viewer for an non-indexed type</h4> an external viewer for an non-indexed type</h4>
</div> </div>
</div> </div>
@ -9379,7 +9514,7 @@ application/x-blobapp = blobviewer %f
<div> <div>
<h4 class="title"><a name= <h4 class="title"><a name=
"RCL.INSTALL.CONFIG.EXAMPLES.ADDINDEX" id= "RCL.INSTALL.CONFIG.EXAMPLES.ADDINDEX" id=
"RCL.INSTALL.CONFIG.EXAMPLES.ADDINDEX"></a>5.4.8.2.&nbsp;Adding "RCL.INSTALL.CONFIG.EXAMPLES.ADDINDEX"></a>6.4.8.2.&nbsp;Adding
indexing support for a new file type</h4> indexing support for a new file type</h4>
</div> </div>
</div> </div>
@ -9431,7 +9566,7 @@ application/x-blobapp = exec rclblob
text or html contents on the standard output.</p> text or html contents on the standard output.</p>
<p>The <a class="link" href="#RCL.PROGRAM.FILTERS" <p>The <a class="link" href="#RCL.PROGRAM.FILTERS"
title= title=
"4.1.&nbsp;Writing a document input handler">filter "5.1.&nbsp;Writing a document input handler">filter
programming</a> section describes in more detail how to programming</a> section describes in more detail how to
write an input handler.</p> write an input handler.</p>
</div> </div>

View File

@ -4118,6 +4118,88 @@ dir:recoll dir:src -dir:utils -dir:common
</chapter> <!-- Search --> </chapter> <!-- Search -->
<chapter id="RCL.MOVABLE">
<title>Movable datasets</title>
<para>As of &RCL; 1.24, it has become easy to build self-contained
datasets including a &RCL; configuration directory and index together
with the indexed documents, and to move such a dataset around (for
example copying it to an USB drive), without having to adjust the
configuration for querying the index.</para>
<note><para>This is a query-time feature only. The index must only be
updated in its original location. If an update is necessary in a
different location, the index must be reset.</para></note>
<para>The examples below will assume that you have a dataset under
<filename>/home/me/mydata/</filename>, with the index configuration and
data stored inside
<filename>/home/me/mydata/recoll-confdir</filename>.</para>
<para>In order to be able to run queries after the dataset has been
moved, you must ensure the following:
<itemizedlist>
<listitem><para>The main configuration file must define the <link
linkend="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">orgidxconfdir</link>
variable to be the original location of the configuration directory
(<filename>orgidxconfdir=/home/me/mydata/recoll-confdir</filename>
must be set inside
<filename>/home/me/mydata/recoll-confdir/recoll.conf</filename> in
the example above).</para></listitem>
<listitem><para>The configuration directory must exist with the
documents, somewhere under the directory which will be
moved. E.g. if you are moving <filename>/home/me/mydata</filename>
around, the configuration directory must exist somewhere below this
point, for example
<filename>/home/me/mydata/recoll-confdir</filename>, or
<filename>/home/me/mydata/sub/recoll-confdir</filename>.</para></listitem>
<listitem><para>You should keep the default locations for the index
elements (they are relative to the configuration directory by
default). Only the paths referring to the documents themselves
(e.g. <literal>topdirs</literal> values) should be
absolute (in general, they are only used when indexing
anyway).</para></listitem>
</itemizedlist>
</para>
<para>Only the first point needs an explicit user action, the &RCL;
defaults are compatible with the second one, and the third is
natural.</para>
<para>If, after the move, the configuration directory needs to be
copied out of the dataset (for example because the thumb drive is too
slow), you can set the <link
linkend="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
curidxconfdir</link>, variable inside the copied configuration to
define the location of the moved one. For example if
<filename>/home/me/mydata</filename> is now mounted onto
<filename>/media/me/somelabel</filename>, but the configuration
directory and index has been copied to
<filename>/tmp/tempconfig</filename>, you would set
<literal>curidxconfdir</literal> to
<filename>/media/me/somelabel/recoll-confdir</filename> inside
<filename>/tmp/tempconfig/recoll.conf</filename>.
<literal>orgidxconfdir</literal> would still be
<filename>/home/me/mydata/recoll-confdir</filename> in the original and
the copy.</para>
<para>If you are regularly copying the configuration out of the
dataset, it will be useful to write a script to automate the
procedure. This can't really be done inside &RCL; because there are
probably many possible variants. One example would be to copy the
configuration to make it writable, but keep the index data on the
medium because it is too big - in this case, the script would also need
to set <literal>dbdir</literal> in the copied configuration.</para>
<para>The same set of modifications (&RCL; 1.24) has also made it
possible to run queries from a readonly configuration directory (with
slightly reduced function of course, such as not recording the query
history).</para>
</chapter>
<chapter id="RCL.PROGRAM"> <chapter id="RCL.PROGRAM">
<title>Programming interface</title> <title>Programming interface</title>
@ -5719,7 +5801,8 @@ except:
very much welcome patches</ulink>.</para> very much welcome patches</ulink>.</para>
<formalpara><title>Configure options:</title> <formalpara>
<title>Configure options:</title>
<para> <para>
<itemizedlist> <itemizedlist>
@ -6489,4 +6572,3 @@ application/x-blobapp = exec rclblob
</sect1> </sect1>
</chapter> </chapter>
</book> </book>

View File

@ -571,6 +571,31 @@ logfilename = stderr
# the log... values.</descr></var> # the log... values.</descr></var>
#daemlogfilename = /dev/null #daemlogfilename = /dev/null
# <var name="orgidxconfdir" type="dfn">
#
# <brief>Original location of the configuration directory.</brief>
# <descr>This is used exclusively for movable datasets. Locating the
# configuration directory inside the directory tree makes it possible to
# provide automatic query time path translations once the data set has
# moved (for example, because it has been mounted on another
# location).</descr></var>
#orgidxconfdir =
# <var name="curidxconfdir" type="dfn">
#
# <brief>Current location of the configuration directory.</brief>
# <descr>Complement orgidxconfdir for movable datasets. This should be used
# if the configuration directory has been copied from the dataset to
# another location, either because the dataset is readonly and an r/w copy
# is desired, or for performance reasons. This records the original moved
# location before copy, to allow path translation computations. For
# example if a dataset originally indexed as '/home/me/mydata/config' has
# been mounted to '/media/me/mydata', and the GUI is running from a copied
# configuration, orgidxconfdir would be '/home/me/mydata/config', and
# curidxconfdir (as set in the copied configuration) would be
# '/media/me/mydata/config'.</descr></var>
#curidxconfdir =
# <var name="idxrundir" type="dfn"> # <var name="idxrundir" type="dfn">
# #
# <brief>Indexing process current directory.</brief> <descr>The input # <brief>Indexing process current directory.</brief> <descr>The input