Movable datasets support
This commit is contained in:
parent
329ab7b90d
commit
09acb5687c
@ -1318,17 +1318,85 @@ string RclConfig::getPidfile() const
|
||||
return path_cat(getCacheDir(), "index.pid");
|
||||
}
|
||||
|
||||
/* Eliminate the common leaf part of file paths p1 and p2. Example:
|
||||
* /mnt1/common/part /mnt2/common/part -> /mnt1 /mnt2. This is used
|
||||
* for computing translations for paths when the dataset has been
|
||||
* moved. Of course this could be done more efficiently than by splitting
|
||||
* into vectors, but we don't care.*/
|
||||
static string path_diffstems(const string& p1, const string& p2,
|
||||
string& r1, string& r2)
|
||||
{
|
||||
string reason;
|
||||
r1.clear();
|
||||
r2.clear();
|
||||
vector<string> v1, v2;
|
||||
stringToTokens(p1, v1, "/");
|
||||
stringToTokens(p2, v2, "/");
|
||||
unsigned int l1 = v1.size();
|
||||
unsigned int l2 = v2.size();
|
||||
|
||||
// Search for common leaf part
|
||||
unsigned int cl = 0;
|
||||
for (; cl < MIN(l1, l2); cl++) {
|
||||
if (v1[l1-cl-1] != v2[l2-cl-1]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
//cerr << "Common length = " << cl << endl;
|
||||
if (cl == 0) {
|
||||
reason = "Input paths are empty or have no common part";
|
||||
return reason;
|
||||
}
|
||||
for (unsigned i = 0; i < l1 - cl; i++) {
|
||||
r1 += "/" + v1[i];
|
||||
}
|
||||
for (unsigned i = 0; i < l2 - cl; i++) {
|
||||
r2 += "/" + v2[i];
|
||||
}
|
||||
|
||||
return reason;
|
||||
}
|
||||
|
||||
void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
||||
{
|
||||
LOGDEB2("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
|
||||
LOGDEB("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
|
||||
"]\n");
|
||||
|
||||
// If orgidxconfdir is set, we assume that this index is for a
|
||||
// movable dataset, with the configuration directory stored inside
|
||||
// the dataset tree. This allows computing automatic path
|
||||
// translations if the dataset has been moved.
|
||||
string orig_confdir;
|
||||
string cur_confdir;
|
||||
string confstemorg, confstemrep;
|
||||
if (m_conf->get("orgidxconfdir", orig_confdir, "")) {
|
||||
if (!m_conf->get("curidxconfdir", cur_confdir, "")) {
|
||||
cur_confdir = m_confdir;
|
||||
}
|
||||
LOGDEB("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
|
||||
" cur_confdir " << cur_confdir << endl);
|
||||
string reason = path_diffstems(orig_confdir, cur_confdir,
|
||||
confstemorg, confstemrep);
|
||||
if (!reason.empty()) {
|
||||
LOGERR("urlrewrite: path_diffstems failed: " << reason <<
|
||||
" : orig_confdir [" << orig_confdir <<
|
||||
"] cur_confdir [" << cur_confdir << endl);
|
||||
confstemorg = confstemrep = "";
|
||||
}
|
||||
}
|
||||
|
||||
// Do path translations exist for this index ?
|
||||
bool needptrans = true;
|
||||
if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
|
||||
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
|
||||
m_ptrans << ")\n");
|
||||
needptrans = false;
|
||||
}
|
||||
|
||||
if (!needptrans && confstemorg.empty()) {
|
||||
return;
|
||||
}
|
||||
bool computeurl = false;
|
||||
|
||||
string path = fileurltolocalpath(url);
|
||||
if (path.empty()) {
|
||||
@ -1336,21 +1404,33 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
||||
return;
|
||||
}
|
||||
|
||||
// Do the movable volume thing.
|
||||
if (!confstemorg.empty() && confstemorg.size() <= path.size() &&
|
||||
!path.compare(0, confstemorg.size(), confstemorg)) {
|
||||
path = path.replace(0, confstemorg.size(), confstemrep);
|
||||
computeurl = true;
|
||||
}
|
||||
|
||||
if (needptrans) {
|
||||
// For each translation check if the prefix matches the input path,
|
||||
// replace and return the result if it does.
|
||||
vector<string> opaths = m_ptrans->getNames(dbdir);
|
||||
for (vector<string>::const_iterator it = opaths.begin();
|
||||
it != opaths.end(); it++) {
|
||||
if (it->size() <= path.size() && !path.compare(0, it->size(), *it)) {
|
||||
for (const auto& opath: opaths) {
|
||||
if (opath.size() <= path.size() &&
|
||||
!path.compare(0, opath.size(), opath)) {
|
||||
string npath;
|
||||
// This call always succeeds because the key comes from getNames()
|
||||
if (m_ptrans->get(*it, npath, dbdir)) {
|
||||
path = path.replace(0, it->size(), npath);
|
||||
url = path_pathtofileurl(path);
|
||||
// Key comes from getNames()=> call must succeed
|
||||
if (m_ptrans->get(opath, npath, dbdir)) {
|
||||
path = path.replace(0, opath.size(), npath);
|
||||
computeurl = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (computeurl) {
|
||||
url = path_pathtofileurl(path);
|
||||
}
|
||||
}
|
||||
|
||||
bool RclConfig::sourceChanged() const
|
||||
|
||||
@ -471,6 +471,25 @@ the log... values.</para></listitem></varlistentry>
|
||||
<listitem><para>Override logfilename for the indexer in real time
|
||||
mode. The default is to use the idx... values if set, else
|
||||
the log... values.</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">
|
||||
<term><varname>orgidxconfdir</varname></term>
|
||||
<listitem><para>Original location of the configuration directory. This is used exclusively for movable datasets. Locating the
|
||||
configuration directory inside the directory tree makes it possible to
|
||||
provide automatic query time path translations once the data set has
|
||||
moved (for example, because it has been mounted on another
|
||||
location).</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
|
||||
<term><varname>curidxconfdir</varname></term>
|
||||
<listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
|
||||
if the configuration directory has been copied from the dataset to
|
||||
another location, either because the dataset is readonly and an r/w copy
|
||||
is desired, or for performance reasons. This records the original moved
|
||||
location before copy, to allow path translation computations. For
|
||||
example if a dataset originally indexed as '/home/me/mydata/config' has
|
||||
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
||||
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
||||
curidxconfdir (as set in the copied configuration) would be
|
||||
'/media/me/mydata/config'.</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
|
||||
<term><varname>idxrundir</varname></term>
|
||||
<listitem><para>Indexing process current directory. The input
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -498,12 +498,12 @@
|
||||
indexed (no others will be indexed), by settting
|
||||
the <link linkend="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXEDMIMETYPES">
|
||||
indexedmimetypes</link> configuration variable. Example:<programlisting>
|
||||
indexedmimetypes = text/html application/pdf
|
||||
indexedmimetypes = text/html application/pdf
|
||||
</programlisting>
|
||||
It is possible to redefine this parameter for
|
||||
subdirectories. Example:<programlisting>
|
||||
[/path/to/my/dir]
|
||||
indexedmimetypes = application/pdf
|
||||
[/path/to/my/dir]
|
||||
indexedmimetypes = application/pdf
|
||||
</programlisting>
|
||||
(When using sections like this, don't forget that they remain
|
||||
in effect until the end of the file or another section
|
||||
@ -920,10 +920,10 @@ indexedmimetypes = application/pdf
|
||||
processing their text, and one to update the index. This was
|
||||
tested to be the best configuration on the test system
|
||||
(quadri-processor with multiple disks).
|
||||
<programlisting>
|
||||
thrQSizes = 2 2 2
|
||||
thrTCounts = 4 2 1
|
||||
</programlisting>
|
||||
<programlisting>
|
||||
thrQSizes = 2 2 2
|
||||
thrTCounts = 4 2 1
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>The following example would use a single queue, and the
|
||||
@ -936,18 +936,18 @@ thrTCounts = 4 2 1
|
||||
would be performed purely sequentially), so the previous
|
||||
approach is preferred. YMMV... The 2 last values for
|
||||
thrTCounts are ignored.
|
||||
<programlisting>
|
||||
thrQSizes = 2 -1 -1
|
||||
thrTCounts = 6 1 1
|
||||
</programlisting>
|
||||
<programlisting>
|
||||
thrQSizes = 2 -1 -1
|
||||
thrTCounts = 6 1 1
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>The following example would disable
|
||||
multithreading. Indexing will be performed by a single
|
||||
thread.
|
||||
<programlisting>
|
||||
thrQSizes = -1 -1 -1
|
||||
</programlisting>
|
||||
<programlisting>
|
||||
thrQSizes = -1 -1 -1
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
@ -1113,7 +1113,7 @@ thrQSizes = -1 -1 -1
|
||||
configuration file:</para>
|
||||
|
||||
<programlisting>[/some/area/of/the/fs]
|
||||
metadatacmds = ; tags = tmsu tags %f
|
||||
metadatacmds = ; tags = tmsu tags %f
|
||||
</programlisting>
|
||||
|
||||
<note><para>Depending on the <application>tmsu</application> version,
|
||||
@ -1154,7 +1154,7 @@ metadatacmds = ; tags = tmsu tags %f
|
||||
couple the tag update with a <literal>recollindex -e -i
|
||||
filename.</literal></para>
|
||||
|
||||
</sect1>
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="RCL.INDEXING.PDF">
|
||||
@ -1216,9 +1216,9 @@ metadatacmds = ; tags = tmsu tags %f
|
||||
the metadata fields (available for &RCL; 1.23.3 and later. 1.23.2
|
||||
has equivalent code inside the handler script). Example:</para>
|
||||
<programlisting>import sys
|
||||
import re
|
||||
import re
|
||||
|
||||
class MetaFixer(object):
|
||||
class MetaFixer(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@ -1367,13 +1367,13 @@ class MetaFixer(object):
|
||||
PATH):
|
||||
|
||||
<screen><![CDATA[
|
||||
30 3 * * * recollindex > /some/tmp/dir/recolltrace 2>&1
|
||||
]]></screen>
|
||||
30 3 * * * recollindex > /some/tmp/dir/recolltrace 2>&1
|
||||
]]></screen>
|
||||
|
||||
Or, using <command>anacron</command>:
|
||||
<screen><![CDATA[
|
||||
1 15 su mylogin -c "recollindex recollindex > /tmp/rcltraceme 2>&1"
|
||||
]]></screen>
|
||||
<screen><![CDATA[
|
||||
1 15 su mylogin -c "recollindex recollindex > /tmp/rcltraceme 2>&1"
|
||||
]]></screen>
|
||||
</para>
|
||||
|
||||
<para>As of version 1.17 the &RCL; GUI has dialogs to manage
|
||||
@ -1435,12 +1435,12 @@ class MetaFixer(object):
|
||||
at the end:</para>
|
||||
|
||||
<programlisting>recollconf=$HOME/.recoll-home
|
||||
recolldata=/usr/local/share/recoll
|
||||
RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh start
|
||||
recolldata=/usr/local/share/recoll
|
||||
RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh start
|
||||
|
||||
fvwm
|
||||
fvwm
|
||||
|
||||
</programlisting>
|
||||
</programlisting>
|
||||
|
||||
<para>The indexing daemon gets started, then the window manager,
|
||||
for which the session waits.</para> <para>By default the
|
||||
@ -1487,17 +1487,17 @@ fvwm
|
||||
increasing the resources available to inotify, which are
|
||||
normally defined in <filename>/etc/sysctl.conf</filename>.
|
||||
<programlisting>
|
||||
### inotify
|
||||
#
|
||||
# cat /proc/sys/fs/inotify/max_queued_events - 16384
|
||||
# cat /proc/sys/fs/inotify/max_user_instances - 128
|
||||
# cat /proc/sys/fs/inotify/max_user_watches - 16384
|
||||
#
|
||||
# -- Change to:
|
||||
#
|
||||
fs.inotify.max_queued_events=32768
|
||||
fs.inotify.max_user_instances=256
|
||||
fs.inotify.max_user_watches=32768
|
||||
### inotify
|
||||
#
|
||||
# cat /proc/sys/fs/inotify/max_queued_events - 16384
|
||||
# cat /proc/sys/fs/inotify/max_user_instances - 128
|
||||
# cat /proc/sys/fs/inotify/max_user_watches - 16384
|
||||
#
|
||||
# -- Change to:
|
||||
#
|
||||
fs.inotify.max_queued_events=32768
|
||||
fs.inotify.max_user_instances=256
|
||||
fs.inotify.max_user_watches=32768
|
||||
</programlisting>
|
||||
|
||||
</para>
|
||||
@ -1915,11 +1915,11 @@ fs.inotify.max_user_watches=32768
|
||||
<filename>~/.recoll/scripts/myscript.desktop</filename> (the exact
|
||||
file name inside the directory is irrelevant):
|
||||
<programlisting>
|
||||
[Desktop Entry]
|
||||
Type=Application
|
||||
Name=MyFirstScript
|
||||
Exec=/home/me/bin/tryscript %F
|
||||
MimeType=*/*
|
||||
[Desktop Entry]
|
||||
Type=Application
|
||||
Name=MyFirstScript
|
||||
Exec=/home/me/bin/tryscript %F
|
||||
MimeType=*/*
|
||||
</programlisting>
|
||||
The <literal>Name</literal> attribute defines the label which will
|
||||
appear inside the <guilabel>Run Script</guilabel> menu. The
|
||||
@ -2084,10 +2084,10 @@ MimeType=*/*
|
||||
history.</para>
|
||||
|
||||
<para>Here follows an example:
|
||||
<programlisting>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<programlisting>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<fragbuts version="1.0">
|
||||
<fragbuts version="1.0">
|
||||
|
||||
<radiobuttons>
|
||||
|
||||
@ -2121,8 +2121,8 @@ MimeType=*/*
|
||||
</fragbut>
|
||||
|
||||
</buttons>
|
||||
</fragbuts>
|
||||
</programlisting>
|
||||
</fragbuts>
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>Each <literal>radiobuttons</literal> or
|
||||
@ -3162,27 +3162,27 @@ MimeType=*/*
|
||||
"<span style='white-space:nowrap'><i>%M</i> %D</span> <i>%U</i> %i<br>\n"
|
||||
"%A %K</td>\n"
|
||||
"</tr></table>\n"
|
||||
]]></screen>
|
||||
]]></screen>
|
||||
|
||||
You may, for example, try the following for a more web-like
|
||||
experience:
|
||||
|
||||
<screen><![CDATA[
|
||||
<u><b><a href="P%N">%T</a></b></u><br>
|
||||
%A<font color=#008000>%U - %S</font> - %L
|
||||
]]></screen>
|
||||
<u><b><a href="P%N">%T</a></b></u><br>
|
||||
%A<font color=#008000>%U - %S</font> - %L
|
||||
]]></screen>
|
||||
|
||||
Note that the P%N link in the above paragraph makes the title a
|
||||
preview link. Or the clean looking:
|
||||
|
||||
<screen><![CDATA[
|
||||
<img src="%I" align="left">%L <font color="#900000">%R</font>
|
||||
<b>%T&</b><br>%S
|
||||
<font color="#808080"><i>%U</i></font>
|
||||
<table bgcolor="#e0e0e0">
|
||||
<tr><td><div>%A</div></td></tr>
|
||||
</table>%K
|
||||
]]></screen>
|
||||
<img src="%I" align="left">%L <font color="#900000">%R</font>
|
||||
<b>%T&</b><br>%S
|
||||
<font color="#808080"><i>%U</i></font>
|
||||
<table bgcolor="#e0e0e0">
|
||||
<tr><td><div>%A</div></td></tr>
|
||||
</table>%K
|
||||
]]></screen>
|
||||
</para>
|
||||
|
||||
<para>These samples, and some others are
|
||||
@ -3258,11 +3258,11 @@ MimeType=*/*
|
||||
window.location.href = 'recoll://search/query?qtp=a&p=0&q=' +
|
||||
encodeURIComponent(t);
|
||||
}
|
||||
</script>
|
||||
</script>
|
||||
....
|
||||
<body ondblclick="recollsearch()">
|
||||
<body ondblclick="recollsearch()">
|
||||
|
||||
</programlisting>
|
||||
</programlisting>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
@ -3303,8 +3303,8 @@ MimeType=*/*
|
||||
<para><command>recollq</command> has a man page (not installed by
|
||||
default, look in the <filename>doc/man</filename> directory). The
|
||||
Usage string is as follows:</para>
|
||||
<programlisting>
|
||||
recollq: usage:
|
||||
<programlisting>
|
||||
recollq: usage:
|
||||
-P: Show the date span for all the documents present in the index
|
||||
[-o|-a|-f] [-q] <query string>
|
||||
Runs a recoll query and displays result lines.
|
||||
@ -3317,7 +3317,7 @@ recollq: usage:
|
||||
-a Emulate the GUI simple search in ALL TERMS mode
|
||||
-f Emulate the GUI simple search in filename mode
|
||||
-q is just ignored (compatibility with the recoll GUI command line)
|
||||
Common options:
|
||||
Common options:
|
||||
-c <configdir> : specify config directory, overriding $RECOLL_CONFDIR
|
||||
-d also dump file contents
|
||||
-n [first-]<cnt> define the result slice. The default value for [first]
|
||||
@ -3338,18 +3338,18 @@ Common options:
|
||||
separated by one space character. This is the recommended format
|
||||
for use by other programs. Use a normal query with option -m to
|
||||
see the field names.
|
||||
</programlisting>
|
||||
</programlisting>
|
||||
|
||||
<para>Sample execution:</para>
|
||||
<programlisting>recollq 'ilur -nautique mime:text/html'
|
||||
Recoll query: ((((ilur:(wqf=11) OR ilurs) AND_NOT (nautique:(wqf=11)
|
||||
<programlisting>recollq 'ilur -nautique mime:text/html'
|
||||
Recoll query: ((((ilur:(wqf=11) OR ilurs) AND_NOT (nautique:(wqf=11)
|
||||
OR nautiques OR nautiqu OR nautiquement)) FILTER Ttext/html))
|
||||
4 results
|
||||
text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/comptes.html] [comptes.html] 18593 bytes
|
||||
text/html [file:///Users/uncrypted-dockes/projets/nautique/webnautique/articles/ilur1/index.html] [Constructio...
|
||||
text/html [file:///Users/uncrypted-dockes/projets/pagepers/index.html] [psxtcl/writemime/recoll]...
|
||||
text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/recu-chasse-maree....
|
||||
</programlisting>
|
||||
4 results
|
||||
text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/comptes.html] [comptes.html] 18593 bytes
|
||||
text/html [file:///Users/uncrypted-dockes/projets/nautique/webnautique/articles/ilur1/index.html] [Constructio...
|
||||
text/html [file:///Users/uncrypted-dockes/projets/pagepers/index.html] [psxtcl/writemime/recoll]...
|
||||
text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/recu-chasse-maree....
|
||||
</programlisting>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="RCL.SEARCH.SYNONYMS">
|
||||
@ -3380,10 +3380,10 @@ text/html [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
|
||||
|
||||
<para>Example:
|
||||
<programlisting>
|
||||
hi hello "good morning"
|
||||
hi hello "good morning"
|
||||
|
||||
# not sure about "au revoir" though. Is this english ?
|
||||
bye goodbye "see you" \
|
||||
# not sure about "au revoir" though. Is this english ?
|
||||
bye goodbye "see you" \
|
||||
"au revoir"
|
||||
</programlisting>
|
||||
</para>
|
||||
@ -3680,7 +3680,7 @@ bye goodbye "see you" \
|
||||
<para>Several <literal>dir</literal> clauses can be specified,
|
||||
both positive and negative. For example the following makes sense:
|
||||
<programlisting>
|
||||
dir:recoll dir:src -dir:utils -dir:common
|
||||
dir:recoll dir:src -dir:utils -dir:common
|
||||
</programlisting> This would select results which have both
|
||||
<filename>recoll</filename> and <filename>src</filename> in the
|
||||
path (in any order), and which have not either
|
||||
@ -4118,6 +4118,88 @@ dir:recoll dir:src -dir:utils -dir:common
|
||||
|
||||
</chapter> <!-- Search -->
|
||||
|
||||
<chapter id="RCL.MOVABLE">
|
||||
<title>Movable datasets</title>
|
||||
|
||||
<para>As of &RCL; 1.24, it has become easy to build self-contained
|
||||
datasets including a &RCL; configuration directory and index together
|
||||
with the indexed documents, and to move such a dataset around (for
|
||||
example copying it to an USB drive), without having to adjust the
|
||||
configuration for querying the index.</para>
|
||||
|
||||
<note><para>This is a query-time feature only. The index must only be
|
||||
updated in its original location. If an update is necessary in a
|
||||
different location, the index must be reset.</para></note>
|
||||
|
||||
<para>The examples below will assume that you have a dataset under
|
||||
<filename>/home/me/mydata/</filename>, with the index configuration and
|
||||
data stored inside
|
||||
<filename>/home/me/mydata/recoll-confdir</filename>.</para>
|
||||
|
||||
<para>In order to be able to run queries after the dataset has been
|
||||
moved, you must ensure the following:
|
||||
<itemizedlist>
|
||||
<listitem><para>The main configuration file must define the <link
|
||||
linkend="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">orgidxconfdir</link>
|
||||
variable to be the original location of the configuration directory
|
||||
(<filename>orgidxconfdir=/home/me/mydata/recoll-confdir</filename>
|
||||
must be set inside
|
||||
<filename>/home/me/mydata/recoll-confdir/recoll.conf</filename> in
|
||||
the example above).</para></listitem>
|
||||
|
||||
<listitem><para>The configuration directory must exist with the
|
||||
documents, somewhere under the directory which will be
|
||||
moved. E.g. if you are moving <filename>/home/me/mydata</filename>
|
||||
around, the configuration directory must exist somewhere below this
|
||||
point, for example
|
||||
<filename>/home/me/mydata/recoll-confdir</filename>, or
|
||||
<filename>/home/me/mydata/sub/recoll-confdir</filename>.</para></listitem>
|
||||
|
||||
<listitem><para>You should keep the default locations for the index
|
||||
elements (they are relative to the configuration directory by
|
||||
default). Only the paths referring to the documents themselves
|
||||
(e.g. <literal>topdirs</literal> values) should be
|
||||
absolute (in general, they are only used when indexing
|
||||
anyway).</para></listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>Only the first point needs an explicit user action, the &RCL;
|
||||
defaults are compatible with the second one, and the third is
|
||||
natural.</para>
|
||||
|
||||
<para>If, after the move, the configuration directory needs to be
|
||||
copied out of the dataset (for example because the thumb drive is too
|
||||
slow), you can set the <link
|
||||
linkend="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
|
||||
curidxconfdir</link>, variable inside the copied configuration to
|
||||
define the location of the moved one. For example if
|
||||
<filename>/home/me/mydata</filename> is now mounted onto
|
||||
<filename>/media/me/somelabel</filename>, but the configuration
|
||||
directory and index has been copied to
|
||||
<filename>/tmp/tempconfig</filename>, you would set
|
||||
<literal>curidxconfdir</literal> to
|
||||
<filename>/media/me/somelabel/recoll-confdir</filename> inside
|
||||
<filename>/tmp/tempconfig/recoll.conf</filename>.
|
||||
<literal>orgidxconfdir</literal> would still be
|
||||
<filename>/home/me/mydata/recoll-confdir</filename> in the original and
|
||||
the copy.</para>
|
||||
|
||||
<para>If you are regularly copying the configuration out of the
|
||||
dataset, it will be useful to write a script to automate the
|
||||
procedure. This can't really be done inside &RCL; because there are
|
||||
probably many possible variants. One example would be to copy the
|
||||
configuration to make it writable, but keep the index data on the
|
||||
medium because it is too big - in this case, the script would also need
|
||||
to set <literal>dbdir</literal> in the copied configuration.</para>
|
||||
|
||||
<para>The same set of modifications (&RCL; 1.24) has also made it
|
||||
possible to run queries from a readonly configuration directory (with
|
||||
slightly reduced function of course, such as not recording the query
|
||||
history).</para>
|
||||
|
||||
</chapter>
|
||||
|
||||
<chapter id="RCL.PROGRAM">
|
||||
<title>Programming interface</title>
|
||||
@ -4329,10 +4411,10 @@ dir:recoll dir:src -dir:utils -dir:common
|
||||
name suffixes. The types are defined inside the
|
||||
<link linkend="RCL.INSTALL.CONFIG.MIMEMAP">
|
||||
<filename>mimemap</filename> file</link>. Example:
|
||||
<programlisting>
|
||||
<programlisting>
|
||||
|
||||
.doc = application/msword
|
||||
</programlisting>
|
||||
.doc = application/msword
|
||||
</programlisting>
|
||||
If no suffix association is found for the file name, &RCL; will try
|
||||
to execute a system command (typically <command>file -i</command> or
|
||||
<command>xdg-mime</command>) to determine a MIME type.</para>
|
||||
@ -4341,18 +4423,18 @@ dir:recoll dir:src -dir:utils -dir:common
|
||||
in the <link linkend="RCL.INSTALL.CONFIG.MIMECONF">
|
||||
<filename>mimeconf</filename> file</link>. A sample will probably be
|
||||
better than a long explanation:</para>
|
||||
<programlisting>
|
||||
<programlisting>
|
||||
|
||||
[index]
|
||||
application/msword = exec antiword -t -i 1 -m UTF-8;\
|
||||
[index]
|
||||
application/msword = exec antiword -t -i 1 -m UTF-8;\
|
||||
mimetype = text/plain ; charset=utf-8
|
||||
|
||||
application/ogg = exec rclogg
|
||||
application/ogg = exec rclogg
|
||||
|
||||
text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
|
||||
text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
|
||||
|
||||
application/x-chm = execm rclchm
|
||||
</programlisting>
|
||||
application/x-chm = execm rclchm
|
||||
</programlisting>
|
||||
|
||||
<para>The fragment specifies that:
|
||||
|
||||
@ -4409,14 +4491,14 @@ application/x-chm = execm rclchm
|
||||
<para>For filters producing HTML, the output could be very minimal
|
||||
like the following example:
|
||||
<programlisting>
|
||||
<html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
|
||||
</head>
|
||||
<body>
|
||||
Some text content
|
||||
</body>
|
||||
</html>
|
||||
</html>
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
@ -4460,13 +4542,13 @@ application/x-chm = execm rclchm
|
||||
date (for display and sorting), in preference to the file
|
||||
modification date. The date format should be as follows:
|
||||
<programlisting>
|
||||
<meta name="date" content="YYYY-mm-dd HH:MM:SS">
|
||||
or
|
||||
<meta name="date" content="YYYY-mm-ddTHH:MM:SS">
|
||||
<meta name="date" content="YYYY-mm-dd HH:MM:SS">
|
||||
or
|
||||
<meta name="date" content="YYYY-mm-ddTHH:MM:SS">
|
||||
</programlisting>
|
||||
Example:
|
||||
<programlisting>
|
||||
<meta name="date" content="2013-02-24 17:50:00">
|
||||
<meta name="date" content="2013-02-24 17:50:00">
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
@ -4474,8 +4556,8 @@ or
|
||||
names. This should also be output as meta tags:</para>
|
||||
|
||||
<programlisting>
|
||||
<meta name="somefield" content="Some textual data" />
|
||||
</programlisting>
|
||||
<meta name="somefield" content="Some textual data" />
|
||||
</programlisting>
|
||||
|
||||
<para>You can embed HTML markup inside the content of custom
|
||||
fields, for improving the display inside result lists. In this
|
||||
@ -4484,8 +4566,8 @@ or
|
||||
be escaped for display.</para>
|
||||
|
||||
<programlisting>
|
||||
<meta name="somefield" markup="html" content="Some <i>textual</i> data" />
|
||||
</programlisting>
|
||||
<meta name="somefield" markup="html" content="Some <i>textual</i> data" />
|
||||
</programlisting>
|
||||
|
||||
<para>As written above, the processing of fields is described
|
||||
in a <link linkend="RCL.PROGRAM.FIELDS">further
|
||||
@ -4677,17 +4759,17 @@ or
|
||||
features.</para>
|
||||
|
||||
<programlisting><![CDATA[
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python
|
||||
|
||||
from recoll import recoll
|
||||
from recoll import recoll
|
||||
|
||||
db = recoll.connect()
|
||||
query = db.query()
|
||||
nres = query.execute("some query")
|
||||
results = query.fetchmany(20)
|
||||
for doc in results:
|
||||
db = recoll.connect()
|
||||
query = db.query()
|
||||
nres = query.execute("some query")
|
||||
results = query.fetchmany(20)
|
||||
for doc in results:
|
||||
print(doc.url, doc.title)
|
||||
]]></programlisting>
|
||||
]]></programlisting>
|
||||
|
||||
</sect2>
|
||||
|
||||
@ -5145,12 +5227,12 @@ for doc in results:
|
||||
text/html according to doc.mimetype. The typical use
|
||||
would be as follows:
|
||||
<programlisting>
|
||||
qdoc = query.fetchone()
|
||||
extractor = recoll.Extractor(qdoc)
|
||||
doc = extractor.textextract(qdoc.ipath)
|
||||
# use doc.text, e.g. for previewing
|
||||
</programlisting>
|
||||
</para></listitem>
|
||||
qdoc = query.fetchone()
|
||||
extractor = recoll.Extractor(qdoc)
|
||||
doc = extractor.textextract(qdoc.ipath)
|
||||
# use doc.text, e.g. for previewing
|
||||
</programlisting>
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term>Extractor.idoctofile(ipath, targetmtype, outfile='')</term>
|
||||
@ -5158,11 +5240,11 @@ doc = extractor.textextract(qdoc.ipath)
|
||||
which can be given explicitly or will be created as a
|
||||
temporary file to be deleted by the caller. Typical use:
|
||||
<programlisting>
|
||||
qdoc = query.fetchone()
|
||||
extractor = recoll.Extractor(qdoc)
|
||||
filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
|
||||
qdoc = query.fetchone()
|
||||
extractor = recoll.Extractor(qdoc)
|
||||
filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
|
||||
|
||||
</para></listitem>
|
||||
</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
@ -5182,9 +5264,9 @@ filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
|
||||
highlighting and data extraction functions.</para>
|
||||
|
||||
<programlisting>
|
||||
#!/usr/bin/env python
|
||||
<![CDATA[
|
||||
from recoll import recoll
|
||||
#!/usr/bin/env python
|
||||
<![CDATA[
|
||||
from recoll import recoll
|
||||
|
||||
db = recoll.connect()
|
||||
db.setAbstractParams(maxchars=80, contextwords=4)
|
||||
@ -5193,18 +5275,18 @@ query = db.query()
|
||||
nres = query.execute("some user question")
|
||||
print "Result count: ", nres
|
||||
if nres > 5:
|
||||
nres = 5
|
||||
nres = 5
|
||||
for i in range(nres):
|
||||
doc = query.fetchone()
|
||||
print "Result #%d" % (query.rownumber,)
|
||||
for k in ("title", "size"):
|
||||
print k, ":", getattr(doc, k).encode('utf-8')
|
||||
abs = db.makeDocAbstract(doc, query).encode('utf-8')
|
||||
print abs
|
||||
print
|
||||
doc = query.fetchone()
|
||||
print "Result #%d" % (query.rownumber,)
|
||||
for k in ("title", "size"):
|
||||
print k, ":", getattr(doc, k).encode('utf-8')
|
||||
abs = db.makeDocAbstract(doc, query).encode('utf-8')
|
||||
print abs
|
||||
print
|
||||
|
||||
]]>
|
||||
</programlisting>
|
||||
]]>
|
||||
</programlisting>
|
||||
|
||||
</sect3>
|
||||
</sect2>
|
||||
@ -5348,8 +5430,8 @@ for i in range(nres):
|
||||
indexing sample found in the Recoll source (which sets
|
||||
<literal>rclbes="MBOX"</literal>):</para>
|
||||
<programlisting>[MBOX]
|
||||
fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch
|
||||
makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
|
||||
fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch
|
||||
makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
|
||||
</programlisting>
|
||||
<para><literal>fetch</literal> and <literal>makesig</literal>
|
||||
define two commands to execute to respectively retrieve the
|
||||
@ -5390,15 +5472,15 @@ makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
|
||||
|
||||
<para>Adapting to the new package structure:</para>
|
||||
<programlisting>
|
||||
<![CDATA[
|
||||
try:
|
||||
<![CDATA[
|
||||
try:
|
||||
from recoll import recoll
|
||||
from recoll import rclextract
|
||||
hasextract = True
|
||||
except:
|
||||
except:
|
||||
import recoll
|
||||
hasextract = False
|
||||
]]>
|
||||
]]>
|
||||
</programlisting>
|
||||
|
||||
<para>Adapting to the change of nature of
|
||||
@ -5408,10 +5490,10 @@ except:
|
||||
the <literal>next</literal> value (old).</para>
|
||||
|
||||
<programlisting>
|
||||
<![CDATA[
|
||||
<![CDATA[
|
||||
rownum = query.next if type(query.next) == int else \
|
||||
query.rownumber
|
||||
]]>
|
||||
]]>
|
||||
</programlisting>
|
||||
|
||||
</sect2> <!-- compat with previous version -->
|
||||
@ -5719,7 +5801,8 @@ except:
|
||||
very much welcome patches</ulink>.</para>
|
||||
|
||||
|
||||
<formalpara><title>Configure options:</title>
|
||||
<formalpara>
|
||||
<title>Configure options:</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
@ -5983,9 +6066,9 @@ except:
|
||||
character. Long lines can be continued by escaping the
|
||||
physical newline with backslash, even inside quoted strings.</para>
|
||||
<programlisting>
|
||||
astringlist = "some string \
|
||||
with spaces"
|
||||
thesame = "some string with spaces"
|
||||
astringlist = "some string \
|
||||
with spaces"
|
||||
thesame = "some string with spaces"
|
||||
</programlisting>
|
||||
|
||||
<para>Parameters which are not part of string lists can't be
|
||||
@ -6166,25 +6249,25 @@ thesame = "some string with spaces"
|
||||
only plain ascii headers can be indexed, and only the
|
||||
first occurrence will be used for headers that occur several times).
|
||||
|
||||
<programlisting>[prefixes]
|
||||
# Index mailmytag contents (with the given prefix)
|
||||
mailmytag = XMTAG
|
||||
<programlisting>[prefixes]
|
||||
# Index mailmytag contents (with the given prefix)
|
||||
mailmytag = XMTAG
|
||||
|
||||
[stored]
|
||||
# Store mailmytag inside the document data record (so that it can be
|
||||
# displayed - as %(mailmytag) - in result lists).
|
||||
mailmytag =
|
||||
[stored]
|
||||
# Store mailmytag inside the document data record (so that it can be
|
||||
# displayed - as %(mailmytag) - in result lists).
|
||||
mailmytag =
|
||||
|
||||
[queryaliases]
|
||||
filename = fn
|
||||
containerfilename = cfn
|
||||
[queryaliases]
|
||||
filename = fn
|
||||
containerfilename = cfn
|
||||
|
||||
[mail]
|
||||
# Extract the X-My-Tag mail header, and use it internally with the
|
||||
# mailmytag field name
|
||||
x-my-tag = mailmytag
|
||||
</programlisting>
|
||||
</para>
|
||||
[mail]
|
||||
# Extract the X-My-Tag mail header, and use it internally with the
|
||||
# mailmytag field name
|
||||
x-my-tag = mailmytag
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
|
||||
<sect3 id="RCL.INSTALL.CONFIG.FIELDS.XATTR">
|
||||
@ -6231,7 +6314,7 @@ x-my-tag = mailmytag
|
||||
should be handled specially, which is possible because they
|
||||
are usually all located in one place. Example:
|
||||
<programlisting>[~/.kde/share/apps/okular/docdata]
|
||||
.xml = application/x-okular-notes</programlisting></para>
|
||||
.xml = application/x-okular-notes</programlisting></para>
|
||||
|
||||
<para>The <varname>recoll_noindex</varname>
|
||||
<filename>mimemap</filename> variable has been moved to
|
||||
@ -6305,7 +6388,7 @@ x-my-tag = mailmytag
|
||||
application tag to specialize the choice for an area of the
|
||||
filesystem (using a <varname>localfields</varname> specification
|
||||
in <filename>mimeconf</filename>). The syntax for the key is
|
||||
<replaceable>mimetype</replaceable><literal>|</literal><replaceable>tag</replaceable></para>
|
||||
<replaceable>mimetype</replaceable><literal>|</literal><replaceable>tag</replaceable></para>
|
||||
|
||||
<para>The <varname>nouncompforviewmts</varname> entry, (placed at
|
||||
the top level, outside of the <literal>[view]</literal> section),
|
||||
@ -6415,8 +6498,8 @@ x-my-tag = mailmytag
|
||||
<listitem><para>In <filename>$RECOLL_CONFDIR/mimemap</filename>
|
||||
(typically <filename>~/.recoll/mimemap</filename>), add the
|
||||
following line:<programlisting>
|
||||
.blob = application/x-blobapp
|
||||
</programlisting>
|
||||
.blob = application/x-blobapp
|
||||
</programlisting>
|
||||
Note that the MIME type is made up here, and you could
|
||||
call it <replaceable>diesel/oil</replaceable> just the
|
||||
same.</para>
|
||||
@ -6424,8 +6507,8 @@ x-my-tag = mailmytag
|
||||
<listitem><para>In <filename>$RECOLL_CONFDIR/mimeview</filename>
|
||||
under the <literal>[view]</literal> section, add:</para>
|
||||
<programlisting>
|
||||
application/x-blobapp = blobviewer %f
|
||||
</programlisting>
|
||||
application/x-blobapp = blobviewer %f
|
||||
</programlisting>
|
||||
<para>We are supposing
|
||||
that <replaceable>blobviewer</replaceable> wants a file
|
||||
name parameter here, you would use <literal>%u</literal> if
|
||||
@ -6458,8 +6541,8 @@ application/x-blobapp = blobviewer %f
|
||||
section, add the following line (more about the
|
||||
<replaceable>rclblob</replaceable> indexing script
|
||||
later):<programlisting>
|
||||
application/x-blobapp = exec rclblob
|
||||
</programlisting></para>
|
||||
application/x-blobapp = exec rclblob
|
||||
</programlisting></para>
|
||||
</listitem>
|
||||
<listitem><para>Under the <literal>[icons]</literal>
|
||||
section, you should choose an icon to be displayed for the
|
||||
@ -6489,4 +6572,3 @@ application/x-blobapp = exec rclblob
|
||||
</sect1>
|
||||
</chapter>
|
||||
</book>
|
||||
|
||||
|
||||
@ -571,6 +571,31 @@ logfilename = stderr
|
||||
# the log... values.</descr></var>
|
||||
#daemlogfilename = /dev/null
|
||||
|
||||
# <var name="orgidxconfdir" type="dfn">
|
||||
#
|
||||
# <brief>Original location of the configuration directory.</brief>
|
||||
# <descr>This is used exclusively for movable datasets. Locating the
|
||||
# configuration directory inside the directory tree makes it possible to
|
||||
# provide automatic query time path translations once the data set has
|
||||
# moved (for example, because it has been mounted on another
|
||||
# location).</descr></var>
|
||||
#orgidxconfdir =
|
||||
|
||||
# <var name="curidxconfdir" type="dfn">
|
||||
#
|
||||
# <brief>Current location of the configuration directory.</brief>
|
||||
# <descr>Complement orgidxconfdir for movable datasets. This should be used
|
||||
# if the configuration directory has been copied from the dataset to
|
||||
# another location, either because the dataset is readonly and an r/w copy
|
||||
# is desired, or for performance reasons. This records the original moved
|
||||
# location before copy, to allow path translation computations. For
|
||||
# example if a dataset originally indexed as '/home/me/mydata/config' has
|
||||
# been mounted to '/media/me/mydata', and the GUI is running from a copied
|
||||
# configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
||||
# curidxconfdir (as set in the copied configuration) would be
|
||||
# '/media/me/mydata/config'.</descr></var>
|
||||
#curidxconfdir =
|
||||
|
||||
# <var name="idxrundir" type="dfn">
|
||||
#
|
||||
# <brief>Indexing process current directory.</brief> <descr>The input
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user