Movable datasets support

2017-12-06 11:34:04 +01:00 · 2017-12-06 11:34:04 +01:00 · 09acb5687c
commit 09acb5687c
parent 329ab7b90d
5 changed files with 4611 additions and 4270 deletions
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -1318,17 +1318,85 @@ string RclConfig::getPidfile() const
    return path_cat(getCacheDir(), "index.pid");
 }
 /* Eliminate the common leaf part of file paths p1 and p2. Example: 
 * /mnt1/common/part /mnt2/common/part -> /mnt1 /mnt2. This is used
 * for computing translations for paths when the dataset has been
 * moved. Of course this could be done more efficiently than by splitting 
 * into vectors, but we don't care.*/
 static string path_diffstems(const string& p1, const string& p2,
                            string& r1, string& r2)
 {
    string reason;
    r1.clear();
    r2.clear();
    vector<string> v1, v2;
    stringToTokens(p1, v1, "/");
    stringToTokens(p2, v2, "/");
    unsigned int l1 = v1.size();
    unsigned int l2 = v2.size();
    // Search for common leaf part
    unsigned int cl = 0;
    for (; cl < MIN(l1, l2); cl++) {
        if (v1[l1-cl-1] != v2[l2-cl-1]) {
            break;
        }
    }
    //cerr << "Common length = " << cl << endl;
    if (cl == 0) {
        reason = "Input paths are empty or have no common part";
        return reason;
    }
    for (unsigned i = 0; i < l1 - cl; i++) {
        r1 += "/" + v1[i];
    }
    for (unsigned i = 0; i < l2 - cl; i++) {
        r2 += "/" + v2[i];
    }
    return reason;
 }
 void RclConfig::urlrewrite(const string& dbdir, string& url) const
 {
-    LOGDEB2("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
+    LOGDEB("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
            "]\n");
    // If orgidxconfdir is set, we assume that this index is for a
    // movable dataset, with the configuration directory stored inside
    // the dataset tree. This allows computing automatic path
    // translations if the dataset has been moved.
    string orig_confdir;
    string cur_confdir;
    string confstemorg, confstemrep;
    if (m_conf->get("orgidxconfdir", orig_confdir, "")) {
        if (!m_conf->get("curidxconfdir", cur_confdir, "")) {
            cur_confdir = m_confdir;
        }
        LOGDEB("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
               " cur_confdir " << cur_confdir << endl);
        string reason = path_diffstems(orig_confdir, cur_confdir,
                                       confstemorg, confstemrep);
        if (!reason.empty()) {
            LOGERR("urlrewrite: path_diffstems failed: " << reason <<
                   " : orig_confdir [" << orig_confdir <<
                   "] cur_confdir [" << cur_confdir << endl);
            confstemorg = confstemrep = "";
        }
    }
    // Do path translations exist for this index ?
    bool needptrans = true;
    if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
 	LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
                m_ptrans << ")\n");
        needptrans = false;
    }
    if (!needptrans && confstemorg.empty()) {
        return;
    }
    bool computeurl = false;
    string path = fileurltolocalpath(url);
    if (path.empty()) {
@ -1336,21 +1404,33 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
 	return;
    }
    // Do the movable volume thing.
    if (!confstemorg.empty() && confstemorg.size() <= path.size() &&
        !path.compare(0, confstemorg.size(), confstemorg)) {
        path = path.replace(0, confstemorg.size(), confstemrep);
        computeurl = true;
    }
    if (needptrans) {
        // For each translation check if the prefix matches the input path,
        // replace and return the result if it does.
        vector<string> opaths = m_ptrans->getNames(dbdir);
-    for (vector<string>::const_iterator it = opaths.begin(); 
+        for (const auto& opath: opaths) {
-	 it != opaths.end(); it++) {
+            if (opath.size() <= path.size() &&
-	if (it->size() <= path.size() && !path.compare(0, it->size(), *it)) {
+                !path.compare(0, opath.size(), opath)) {
                string npath;
-	    // This call always succeeds because the key comes from getNames()
+                // Key comes from getNames()=> call must succeed
-	    if (m_ptrans->get(*it, npath, dbdir)) { 
+                if (m_ptrans->get(opath, npath, dbdir)) { 
-		path = path.replace(0, it->size(), npath);
+                    path = path.replace(0, opath.size(), npath);
-		url = path_pathtofileurl(path);
+                    computeurl = true;
                }
                break;
            }
        }
    }
    if (computeurl) {
        url = path_pathtofileurl(path);
    }
 }
 bool RclConfig::sourceChanged() const
--- a/src/doc/user/recoll.conf.xml
+++ b/src/doc/user/recoll.conf.xml
@ -471,6 +471,25 @@ the log... values.</para></listitem></varlistentry>
 <listitem><para>Override logfilename for the indexer in real time
 mode. The default is to use the idx... values if set, else
 the log... values.</para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">
 <term><varname>orgidxconfdir</varname></term>
 <listitem><para>Original location of the configuration directory. This is used exclusively for movable datasets. Locating the
 configuration directory inside the directory tree makes it possible to
 provide automatic query time path translations once the data set has
 moved (for example, because it has been mounted on another
 location).</para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
 <term><varname>curidxconfdir</varname></term>
 <listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
 if the configuration directory has been copied from the dataset to
 another location, either because the dataset is readonly and an r/w copy
 is desired, or for performance reasons. This records the original moved
 location before copy, to allow path translation computations.  For
 example if a dataset originally indexed as '/home/me/mydata/config' has
 been mounted to '/media/me/mydata', and the GUI is running from a copied
 configuration, orgidxconfdir would be '/home/me/mydata/config', and
 curidxconfdir (as set in the copied configuration) would be
 '/media/me/mydata/config'.</para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
 <term><varname>idxrundir</varname></term>
 <listitem><para>Indexing process current directory. The input
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
@ -498,12 +498,12 @@
        indexed (no others will be indexed), by settting 
        the <link linkend="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXEDMIMETYPES">
        indexedmimetypes</link> configuration variable. Example:<programlisting>
-indexedmimetypes = text/html application/pdf
+        indexedmimetypes = text/html application/pdf
      </programlisting>
      It is possible to redefine this parameter for
      subdirectories. Example:<programlisting>
-[/path/to/my/dir]
+      [/path/to/my/dir]
-indexedmimetypes = application/pdf
+      indexedmimetypes = application/pdf
    </programlisting>
    (When using sections like this, don't forget that they remain
    in effect until the end of the file or another section
@ -920,10 +920,10 @@ indexedmimetypes = application/pdf
        processing their text, and one to update the index. This was
        tested to be the best configuration on the test system
        (quadri-processor with multiple disks).
-<programlisting>
+        <programlisting>
-thrQSizes = 2 2 2
+          thrQSizes = 2 2 2
-thrTCounts =  4 2 1
+          thrTCounts =  4 2 1
-</programlisting>
+        </programlisting>
        </para>
        <para>The following example would use a single queue, and the
@ -936,18 +936,18 @@ thrTCounts =  4 2 1
        would be performed purely sequentially), so the previous
        approach is preferred. YMMV...  The 2 last values for
        thrTCounts are ignored.
-<programlisting>
+        <programlisting>
-thrQSizes = 2 -1 -1
+          thrQSizes = 2 -1 -1
-thrTCounts =  6 1 1
+          thrTCounts =  6 1 1
-</programlisting>
+        </programlisting>
        </para>
        <para>The following example would disable
        multithreading. Indexing will be performed by a single
        thread.
-<programlisting>
+        <programlisting>
-thrQSizes = -1 -1 -1
+          thrQSizes = -1 -1 -1
-</programlisting>
+        </programlisting>
        </para>
      </sect2>
@ -1113,7 +1113,7 @@ thrQSizes = -1 -1 -1
      configuration file:</para>
      <programlisting>[/some/area/of/the/fs]
-metadatacmds = ; tags = tmsu tags %f
+      metadatacmds = ; tags = tmsu tags %f
      </programlisting>
      <note><para>Depending on the <application>tmsu</application> version,
@ -1154,7 +1154,7 @@ metadatacmds = ; tags = tmsu tags %f
      couple the tag update with a <literal>recollindex -e -i
      filename.</literal></para>
-</sect1>
+    </sect1>
    <sect1 id="RCL.INDEXING.PDF">
@ -1216,9 +1216,9 @@ metadatacmds = ; tags = tmsu tags %f
        the metadata fields (available for &RCL; 1.23.3 and later. 1.23.2
        has equivalent code inside the handler script). Example:</para>
        <programlisting>import sys
-import re
+        import re
-class MetaFixer(object):
+        class MetaFixer(object):
        def __init__(self):
        pass
@ -1367,13 +1367,13 @@ class MetaFixer(object):
        PATH):
        <screen><![CDATA[
-30 3 * * * recollindex > /some/tmp/dir/recolltrace 2>&1
+        30 3 * * * recollindex > /some/tmp/dir/recolltrace 2>&1
-]]></screen>
+        ]]></screen>
        Or, using <command>anacron</command>:
-<screen><![CDATA[
+        <screen><![CDATA[
-1  15  su mylogin -c "recollindex recollindex > /tmp/rcltraceme 2>&1"
+        1  15  su mylogin -c "recollindex recollindex > /tmp/rcltraceme 2>&1"
-]]></screen>
+        ]]></screen>
        </para>
        <para>As of version 1.17 the &RCL; GUI has dialogs to manage
@ -1435,12 +1435,12 @@ class MetaFixer(object):
      at the end:</para>
      <programlisting>recollconf=$HOME/.recoll-home
-recolldata=/usr/local/share/recoll
+      recolldata=/usr/local/share/recoll
-RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh start
+      RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh start
-fvwm 
+      fvwm 
-</programlisting>
+      </programlisting>
      <para>The indexing daemon gets started, then the window manager,
      for which the session waits.</para> <para>By default the
@ -1487,17 +1487,17 @@ fvwm
      increasing the resources available to inotify, which are
      normally defined in <filename>/etc/sysctl.conf</filename>.
      <programlisting>
-### inotify
+        ### inotify
-#
+        #
-# cat  /proc/sys/fs/inotify/max_queued_events   - 16384
+        # cat  /proc/sys/fs/inotify/max_queued_events   - 16384
-# cat  /proc/sys/fs/inotify/max_user_instances  - 128
+        # cat  /proc/sys/fs/inotify/max_user_instances  - 128
-# cat  /proc/sys/fs/inotify/max_user_watches    - 16384
+        # cat  /proc/sys/fs/inotify/max_user_watches    - 16384
-#
+        #
-# -- Change to:
+        # -- Change to:
-#
+        #
-fs.inotify.max_queued_events=32768
+        fs.inotify.max_queued_events=32768
-fs.inotify.max_user_instances=256
+        fs.inotify.max_user_instances=256
-fs.inotify.max_user_watches=32768
+        fs.inotify.max_user_watches=32768
      </programlisting>
      </para>
@ -1915,11 +1915,11 @@ fs.inotify.max_user_watches=32768
        <filename>~/.recoll/scripts/myscript.desktop</filename> (the exact
        file name inside the directory is irrelevant):
        <programlisting>
-[Desktop Entry]
+          [Desktop Entry]
-Type=Application
+          Type=Application
-Name=MyFirstScript
+          Name=MyFirstScript
-Exec=/home/me/bin/tryscript %F
+          Exec=/home/me/bin/tryscript %F
-MimeType=*/*
+          MimeType=*/*
        </programlisting>
        The <literal>Name</literal> attribute defines the label which will
        appear inside the <guilabel>Run Script</guilabel> menu. The
@ -2084,10 +2084,10 @@ MimeType=*/*
        history.</para>
        <para>Here follows an example:
-<programlisting>
+        <programlisting>
-&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
+          &lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
-&lt;fragbuts version=&quot;1.0&quot;&gt;
+          &lt;fragbuts version=&quot;1.0&quot;&gt;
          &lt;radiobuttons&gt;
@ -2121,8 +2121,8 @@ MimeType=*/*
          &lt;/fragbut&gt;
          &lt;/buttons&gt;
-&lt;/fragbuts&gt;
+          &lt;/fragbuts&gt;
-</programlisting>
+        </programlisting>
        </para>
        <para>Each <literal>radiobuttons</literal> or
@ -3162,27 +3162,27 @@ MimeType=*/*
            "<span style='white-space:nowrap'><i>%M</i>&nbsp;%D</span>&nbsp;&nbsp;&nbsp; <i>%U</i>&nbsp;%i<br>\n"
            "%A %K</td>\n"
            "</tr></table>\n"
-]]></screen>
+            ]]></screen>
            You may, for example, try the following for a more web-like
            experience:
            <screen><![CDATA[
-<u><b><a href="P%N">%T</a></b></u><br>
+            <u><b><a href="P%N">%T</a></b></u><br>
-%A<font color=#008000>%U - %S</font> - %L
+            %A<font color=#008000>%U - %S</font> - %L
-]]></screen>
+            ]]></screen>
            Note that the P%N link in the above paragraph makes the title a
            preview link. Or the clean looking:
            <screen><![CDATA[
-<img src="%I" align="left">%L <font color="#900000">%R</font>
+            <img src="%I" align="left">%L <font color="#900000">%R</font>
-&nbsp;&nbsp;<b>%T&</b><br>%S&nbsp;
+            &nbsp;&nbsp;<b>%T&</b><br>%S&nbsp;
-<font color="#808080"><i>%U</i></font>
+            <font color="#808080"><i>%U</i></font>
-<table bgcolor="#e0e0e0">
+            <table bgcolor="#e0e0e0">
-<tr><td><div>%A</div></td></tr>
+            <tr><td><div>%A</div></td></tr>
-</table>%K
+            </table>%K
-]]></screen>
+            ]]></screen>
            </para>
            <para>These samples, and some others are 
@ -3258,11 +3258,11 @@ MimeType=*/*
        window.location.href = 'recoll://search/query?qtp=a&amp;p=0&amp;q=' +
        encodeURIComponent(t);
        }
-&lt;/script>
+        &lt;/script>
        ....
-&lt;body ondblclick="recollsearch()">
+        &lt;body ondblclick="recollsearch()">
-</programlisting>
+        </programlisting>
      </sect2>
    </sect1>
@ -3303,8 +3303,8 @@ MimeType=*/*
      <para><command>recollq</command> has a man page (not installed by
      default, look in the <filename>doc/man</filename> directory). The
      Usage string is as follows:</para>
-<programlisting>
+      <programlisting>
-recollq: usage:
+        recollq: usage:
        -P: Show the date span for all the documents present in the index
        [-o|-a|-f] [-q] &lt;query string&gt;
        Runs a recoll query and displays result lines. 
@ -3317,7 +3317,7 @@ recollq: usage:
        -a Emulate the GUI simple search in ALL TERMS mode
        -f Emulate the GUI simple search in filename mode
        -q is just ignored (compatibility with the recoll GUI command line)
-Common options:
+        Common options:
        -c &lt;configdir&gt; : specify config directory, overriding $RECOLL_CONFDIR
        -d also dump file contents
        -n [first-]&lt;cnt&gt; define the result slice. The default value for [first]
@ -3338,18 +3338,18 @@ Common options:
        separated by one space character. This is the recommended format 
        for use by other programs. Use a normal query with option -m to 
        see the field names.
-</programlisting>
+      </programlisting>
      <para>Sample execution:</para>
-<programlisting>recollq 'ilur -nautique mime:text/html'
+      <programlisting>recollq 'ilur -nautique mime:text/html'
-Recoll query: ((((ilur:(wqf=11) OR ilurs) AND_NOT (nautique:(wqf=11)
+      Recoll query: ((((ilur:(wqf=11) OR ilurs) AND_NOT (nautique:(wqf=11)
      OR nautiques OR nautiqu OR nautiquement)) FILTER Ttext/html))
-4 results
+      4 results
-text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/comptes.html]      [comptes.html]  18593   bytes   
+      text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/comptes.html]      [comptes.html]  18593   bytes   
-text/html       [file:///Users/uncrypted-dockes/projets/nautique/webnautique/articles/ilur1/index.html] [Constructio...
+      text/html       [file:///Users/uncrypted-dockes/projets/nautique/webnautique/articles/ilur1/index.html] [Constructio...
-text/html       [file:///Users/uncrypted-dockes/projets/pagepers/index.html]    [psxtcl/writemime/recoll]...
+      text/html       [file:///Users/uncrypted-dockes/projets/pagepers/index.html]    [psxtcl/writemime/recoll]...
-text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/recu-chasse-maree....
+      text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/recu-chasse-maree....
-</programlisting>
+      </programlisting>
    </sect1>
    <sect1 id="RCL.SEARCH.SYNONYMS">
@ -3380,10 +3380,10 @@ text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r
      <para>Example:
      <programlisting>
-hi hello "good morning"
+        hi hello "good morning"
-# not sure about "au revoir" though. Is this english ?
+        # not sure about "au revoir" though. Is this english ?
-bye goodbye "see you" \
+        bye goodbye "see you" \
        "au revoir" 
      </programlisting>
      </para>
@ -3680,7 +3680,7 @@ bye goodbye "see you" \
        <para>Several <literal>dir</literal> clauses can be specified,
        both positive and negative. For example the following makes sense:
        <programlisting>
-dir:recoll dir:src -dir:utils -dir:common
+          dir:recoll dir:src -dir:utils -dir:common
          </programlisting> This would select results which have both
          <filename>recoll</filename> and <filename>src</filename> in the
          path (in any order), and which have not either
@ -4118,6 +4118,88 @@ dir:recoll dir:src -dir:utils -dir:common
  </chapter> <!-- Search -->
  <chapter id="RCL.MOVABLE">
    <title>Movable datasets</title>
    <para>As of &RCL; 1.24, it has become easy to build self-contained
    datasets including a &RCL; configuration directory and index together
    with the indexed documents, and to move such a dataset around (for
    example copying it to an USB drive), without having to adjust the
    configuration for querying the index.</para>
    <note><para>This is a query-time feature only. The index must only be
    updated in its original location. If an update is necessary in a
    different location, the index must be reset.</para></note>
    <para>The examples below will assume that you have a dataset under
    <filename>/home/me/mydata/</filename>, with the index configuration and
    data stored inside
    <filename>/home/me/mydata/recoll-confdir</filename>.</para> 
    <para>In order to be able to run queries after the dataset has been
    moved, you must ensure the following:
    <itemizedlist>
      <listitem><para>The main configuration file must define the <link
      linkend="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">orgidxconfdir</link>
      variable to be the original location of the configuration directory
      (<filename>orgidxconfdir=/home/me/mydata/recoll-confdir</filename>
      must be set inside
      <filename>/home/me/mydata/recoll-confdir/recoll.conf</filename> in
      the example above).</para></listitem>
      <listitem><para>The configuration directory must exist with the
      documents, somewhere under the directory which will be
      moved. E.g. if you are moving <filename>/home/me/mydata</filename>
      around, the configuration directory must exist somewhere below this
      point, for example
      <filename>/home/me/mydata/recoll-confdir</filename>, or
      <filename>/home/me/mydata/sub/recoll-confdir</filename>.</para></listitem>
      <listitem><para>You should keep the default locations for the index
      elements (they are relative to the configuration directory by
      default). Only the paths referring to the documents themselves
      (e.g. <literal>topdirs</literal> values) should be
      absolute (in general, they are only used when indexing
      anyway).</para></listitem> 
    </itemizedlist>
    </para>
    <para>Only the first point needs an explicit user action, the &RCL;
    defaults are compatible with the second one, and the third is
    natural.</para>
    <para>If, after the move, the configuration directory needs to be
    copied out of the dataset (for example because the thumb drive is too
    slow), you can set the <link
    linkend="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
    curidxconfdir</link>, variable inside the copied configuration to
    define the location of the moved one. For example if
    <filename>/home/me/mydata</filename> is now mounted onto
    <filename>/media/me/somelabel</filename>, but the configuration
    directory and index has been copied to
    <filename>/tmp/tempconfig</filename>, you would set
    <literal>curidxconfdir</literal> to
    <filename>/media/me/somelabel/recoll-confdir</filename> inside
    <filename>/tmp/tempconfig/recoll.conf</filename>.
    <literal>orgidxconfdir</literal> would still be
    <filename>/home/me/mydata/recoll-confdir</filename> in the original and
    the copy.</para>
    <para>If you are regularly copying the configuration out of the
    dataset, it will be useful to write a script to automate the
    procedure. This can't really be done inside &RCL; because there are
    probably many possible variants. One example would be to copy the
    configuration to make it writable, but keep the index data on the
    medium because it is too big - in this case, the script would also need
    to set <literal>dbdir</literal> in the copied configuration.</para>
    <para>The same set of modifications (&RCL; 1.24) has also made it
    possible to run queries from a readonly configuration directory (with
    slightly reduced function of course, such as not recording the query
    history).</para>
  </chapter>
  <chapter id="RCL.PROGRAM">
    <title>Programming interface</title>
@ -4329,10 +4411,10 @@ dir:recoll dir:src -dir:utils -dir:common
        name suffixes. The types are defined inside the
        <link linkend="RCL.INSTALL.CONFIG.MIMEMAP">
          <filename>mimemap</filename> file</link>. Example:
-<programlisting>
+          <programlisting>
-.doc = application/msword
+            .doc = application/msword
-</programlisting>
+          </programlisting>
          If no suffix association is found for the file name, &RCL; will try
          to execute a system command (typically <command>file -i</command> or
        <command>xdg-mime</command>) to determine a MIME type.</para>
@ -4341,18 +4423,18 @@ dir:recoll dir:src -dir:utils -dir:common
        in the <link linkend="RCL.INSTALL.CONFIG.MIMECONF">
        <filename>mimeconf</filename> file</link>. A sample will probably be
        better than a long explanation:</para>
-<programlisting>
+        <programlisting>
-[index]
+          [index]
-application/msword = exec antiword -t -i 1 -m UTF-8;\
+          application/msword = exec antiword -t -i 1 -m UTF-8;\
          mimetype = text/plain ; charset=utf-8
-application/ogg = exec rclogg
+          application/ogg = exec rclogg
-text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
+          text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
-application/x-chm = execm rclchm
+          application/x-chm = execm rclchm
-</programlisting>
+        </programlisting>
        <para>The fragment specifies that:
@ -4409,14 +4491,14 @@ application/x-chm = execm rclchm
        <para>For filters producing HTML, the output could be very minimal
        like the following example:
        <programlisting>
-&lt;html>
+          &lt;html>
          &lt;head>
          &lt;meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
          &lt;/head>
          &lt;body>
          Some text content
          &lt;/body>
-&lt;/html>
+          &lt;/html>
        </programlisting>
        </para>
@ -4460,13 +4542,13 @@ application/x-chm = execm rclchm
        date (for display and sorting), in preference to the file
        modification date. The date format should be as follows:
        <programlisting>
-&lt;meta name="date" content="YYYY-mm-dd HH:MM:SS">
+          &lt;meta name="date" content="YYYY-mm-dd HH:MM:SS">
-or
+          or
-&lt;meta name="date" content="YYYY-mm-ddTHH:MM:SS">
+          &lt;meta name="date" content="YYYY-mm-ddTHH:MM:SS">
        </programlisting>
        Example:
        <programlisting>
-&lt;meta name="date" content="2013-02-24 17:50:00">
+          &lt;meta name="date" content="2013-02-24 17:50:00">
        </programlisting>
        </para>
@ -4474,8 +4556,8 @@ or
        names. This should also be output as meta tags:</para>
        <programlisting>
-&lt;meta name="somefield" content="Some textual data" /&gt;
+          &lt;meta name="somefield" content="Some textual data" /&gt;
-</programlisting>
+        </programlisting>
        <para>You can embed HTML markup inside the content of custom
        fields, for improving the display inside result lists. In this
@ -4484,8 +4566,8 @@ or
        be escaped for display.</para>
        <programlisting>
-&lt;meta name="somefield" markup="html" content="Some &lt;i>textual&lt;/i> data" /&gt;
+          &lt;meta name="somefield" markup="html" content="Some &lt;i>textual&lt;/i> data" /&gt;
-</programlisting>
+        </programlisting>
        <para>As written above, the processing of fields is described
        in a <link linkend="RCL.PROGRAM.FIELDS">further
@ -4677,17 +4759,17 @@ or
        features.</para>
        <programlisting><![CDATA[
-#!/usr/bin/env python
+        #!/usr/bin/env python
-from recoll import recoll
+        from recoll import recoll
-db = recoll.connect()
+        db = recoll.connect()
-query = db.query()
+        query = db.query()
-nres = query.execute("some query")
+        nres = query.execute("some query")
-results = query.fetchmany(20)
+        results = query.fetchmany(20)
-for doc in results:
+        for doc in results:
        print(doc.url, doc.title)
-]]></programlisting>
+        ]]></programlisting>
      </sect2>
@ -5145,12 +5227,12 @@ for doc in results:
                  text/html according to doc.mimetype. The typical use
                  would be as follows:
                  <programlisting>
-qdoc = query.fetchone()
+                    qdoc = query.fetchone()
-extractor = recoll.Extractor(qdoc)
+                    extractor = recoll.Extractor(qdoc)
-doc = extractor.textextract(qdoc.ipath)
+                    doc = extractor.textextract(qdoc.ipath)
-# use doc.text, e.g. for previewing
+                    # use doc.text, e.g. for previewing
-</programlisting>
+                  </programlisting>
-</para></listitem>
+                  </para></listitem>
                </varlistentry>
                <varlistentry>
                  <term>Extractor.idoctofile(ipath, targetmtype, outfile='')</term>
@ -5158,11 +5240,11 @@ doc = extractor.textextract(qdoc.ipath)
                  which can be given explicitly or will be created as a
                  temporary file to be deleted by the caller. Typical use:
                  <programlisting>
-qdoc = query.fetchone()
+                    qdoc = query.fetchone()
-extractor = recoll.Extractor(qdoc)
+                    extractor = recoll.Extractor(qdoc)
-filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
+                  filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
-</para></listitem>
+                  </para></listitem>
                </varlistentry>
              </variablelist>
@ -5182,9 +5264,9 @@ filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
          highlighting and data extraction functions.</para>
          <programlisting>
-#!/usr/bin/env python
+            #!/usr/bin/env python
-<![CDATA[
+            <![CDATA[
-from recoll import recoll
+                     from recoll import recoll
 db = recoll.connect()
 db.setAbstractParams(maxchars=80, contextwords=4)
@ -5193,18 +5275,18 @@ query = db.query()
 nres = query.execute("some user question")
 print "Result count: ", nres
 if nres > 5:
-    nres = 5
+nres = 5
 for i in range(nres):
-    doc = query.fetchone()
+doc = query.fetchone()
-    print "Result #%d" % (query.rownumber,)
+print "Result #%d" % (query.rownumber,)
-    for k in ("title", "size"):
+for k in ("title", "size"):
-        print k, ":", getattr(doc, k).encode('utf-8')
+print k, ":", getattr(doc, k).encode('utf-8')
-    abs = db.makeDocAbstract(doc, query).encode('utf-8')
+abs = db.makeDocAbstract(doc, query).encode('utf-8')
-    print abs
+print abs
-    print
+print
-]]>
+            ]]>
-</programlisting>
+          </programlisting>
        </sect3>
      </sect2>
@ -5348,8 +5430,8 @@ for i in range(nres):
          indexing sample found in the Recoll source (which sets
          <literal>rclbes="MBOX"</literal>):</para>
          <programlisting>[MBOX]
-fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch
+          fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch
-makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
+          makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
          </programlisting>
          <para><literal>fetch</literal> and <literal>makesig</literal>
          define two commands to execute to respectively retrieve the
@ -5390,15 +5472,15 @@ makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
        <para>Adapting to the new package structure:</para>
        <programlisting>
-<![CDATA[
+          <![CDATA[
-try:
+                   try:
                   from recoll import recoll
                   from recoll import rclextract
                   hasextract = True
-except:
+                   except:
                   import recoll
                   hasextract = False
-]]>
+          ]]>
        </programlisting>
        <para>Adapting to the change of nature of
@ -5408,10 +5490,10 @@ except:
        the <literal>next</literal> value (old).</para>
        <programlisting>
-<![CDATA[
+          <![CDATA[
                   rownum = query.next if type(query.next) == int else \
                   query.rownumber
-]]>
+          ]]>
        </programlisting>
      </sect2> <!-- compat with previous version -->
@ -5719,7 +5801,8 @@ except:
        very much welcome patches</ulink>.</para>
-        <formalpara><title>Configure options:</title>
+        <formalpara>
          <title>Configure options:</title>
          <para>
            <itemizedlist>
@ -5983,9 +6066,9 @@ except:
      character. Long lines can be continued by escaping the
      physical newline with backslash, even inside quoted strings.</para>
      <programlisting>
-astringlist =  "some string \
+        astringlist =  "some string \
-with spaces"
+        with spaces"
-thesame = "some string with spaces"        
+        thesame = "some string with spaces"        
      </programlisting>
      <para>Parameters which are not part of string lists can't be
@ -6166,25 +6249,25 @@ thesame = "some string with spaces"
        only plain ascii headers can be indexed, and only the
        first occurrence will be used for headers that occur several times).
-<programlisting>[prefixes]
+        <programlisting>[prefixes]
-# Index mailmytag contents (with the given prefix)
+        # Index mailmytag contents (with the given prefix)
-mailmytag = XMTAG
+        mailmytag = XMTAG
-[stored]
+        [stored]
-# Store mailmytag inside the document data record (so that it can be
+        # Store mailmytag inside the document data record (so that it can be
-# displayed - as %(mailmytag) - in result lists).
+        # displayed - as %(mailmytag) - in result lists).
-mailmytag = 
+        mailmytag = 
-[queryaliases]
+        [queryaliases]
-filename = fn
+        filename = fn
-containerfilename = cfn
+        containerfilename = cfn
-[mail]
+        [mail]
-# Extract the X-My-Tag mail header, and use it internally with the
+        # Extract the X-My-Tag mail header, and use it internally with the
-# mailmytag field name
+        # mailmytag field name
-x-my-tag = mailmytag
+        x-my-tag = mailmytag
-</programlisting>
+        </programlisting>
-</para>
+        </para>
        <sect3 id="RCL.INSTALL.CONFIG.FIELDS.XATTR">
@ -6231,7 +6314,7 @@ x-my-tag = mailmytag
        should be handled specially, which is possible because they
        are usually all located in one place. Example:
        <programlisting>[~/.kde/share/apps/okular/docdata]
-.xml = application/x-okular-notes</programlisting></para>
+        .xml = application/x-okular-notes</programlisting></para>
        <para>The <varname>recoll_noindex</varname>
        <filename>mimemap</filename> variable has been moved to
@ -6305,7 +6388,7 @@ x-my-tag = mailmytag
        application tag to specialize the choice for an area of the
        filesystem (using a <varname>localfields</varname> specification
        in <filename>mimeconf</filename>). The syntax for the key is 
-<replaceable>mimetype</replaceable><literal>|</literal><replaceable>tag</replaceable></para>
+        <replaceable>mimetype</replaceable><literal>|</literal><replaceable>tag</replaceable></para>
        <para>The <varname>nouncompforviewmts</varname> entry, (placed at
        the top level, outside of the <literal>[view]</literal> section),
@ -6415,8 +6498,8 @@ x-my-tag = mailmytag
            <listitem><para>In <filename>$RECOLL_CONFDIR/mimemap</filename>
            (typically <filename>~/.recoll/mimemap</filename>), add the
            following line:<programlisting>
-.blob = application/x-blobapp
+            .blob = application/x-blobapp
-</programlisting>
+          </programlisting>
          Note that the MIME type is made up here, and you could
          call it <replaceable>diesel/oil</replaceable> just the
          same.</para>
@ -6424,8 +6507,8 @@ x-my-tag = mailmytag
            <listitem><para>In <filename>$RECOLL_CONFDIR/mimeview</filename>
            under the <literal>[view]</literal> section, add:</para>
            <programlisting>
-application/x-blobapp = blobviewer %f
+              application/x-blobapp = blobviewer %f
-</programlisting>
+            </programlisting>
            <para>We are supposing
            that <replaceable>blobviewer</replaceable> wants a file
            name parameter here, you would use <literal>%u</literal> if
@ -6458,8 +6541,8 @@ application/x-blobapp = blobviewer %f
            section, add the following line (more about the
            <replaceable>rclblob</replaceable> indexing script
            later):<programlisting>
-application/x-blobapp = exec rclblob
+            application/x-blobapp = exec rclblob
-</programlisting></para>
+          </programlisting></para>
            </listitem>
            <listitem><para>Under the <literal>[icons]</literal>
            section, you should choose an icon to be displayed for the
@ -6489,4 +6572,3 @@ application/x-blobapp = exec rclblob
    </sect1>
  </chapter>
 </book>
--- a/src/sampleconf/recoll.conf
+++ b/src/sampleconf/recoll.conf
@ -571,6 +571,31 @@ logfilename = stderr
 # the log... values.</descr></var>
 #daemlogfilename = /dev/null
 # <var name="orgidxconfdir" type="dfn">
 #
 # <brief>Original location of the configuration directory.</brief>
 # <descr>This is used exclusively for movable datasets. Locating the
 # configuration directory inside the directory tree makes it possible to
 # provide automatic query time path translations once the data set has
 # moved (for example, because it has been mounted on another
 # location).</descr></var>
 #orgidxconfdir = 
 # <var name="curidxconfdir" type="dfn">
 #
 # <brief>Current location of the configuration directory.</brief>
 # <descr>Complement orgidxconfdir for movable datasets. This should be used
 # if the configuration directory has been copied from the dataset to
 # another location, either because the dataset is readonly and an r/w copy
 # is desired, or for performance reasons. This records the original moved
 # location before copy, to allow path translation computations.  For
 # example if a dataset originally indexed as '/home/me/mydata/config' has
 # been mounted to '/media/me/mydata', and the GUI is running from a copied
 # configuration, orgidxconfdir would be '/home/me/mydata/config', and
 # curidxconfdir (as set in the copied configuration) would be
 # '/media/me/mydata/config'.</descr></var>
 #curidxconfdir = 
 # <var name="idxrundir" type="dfn">
 #
 # <brief>Indexing process current directory.</brief> <descr>The input