Movable datasets support

2017-12-06 11:34:04 +01:00 · 2017-12-06 11:34:04 +01:00 · 09acb5687c
commit 09acb5687c
parent 329ab7b90d
5 changed files with 4611 additions and 4270 deletions
--- a/src/common/rclconfig.cpp
+++ b/src/common/rclconfig.cpp
@ -1318,17 +1318,85 @@ string RclConfig::getPidfile() const
    return path_cat(getCacheDir(), "index.pid");
 }

+/* Eliminate the common leaf part of file paths p1 and p2. Example: 
+ * /mnt1/common/part /mnt2/common/part -> /mnt1 /mnt2. This is used
+ * for computing translations for paths when the dataset has been
+ * moved. Of course this could be done more efficiently than by splitting 
+ * into vectors, but we don't care.*/
+static string path_diffstems(const string& p1, const string& p2,
+                            string& r1, string& r2)
+{
+    string reason;
+    r1.clear();
+    r2.clear();
+    vector<string> v1, v2;
+    stringToTokens(p1, v1, "/");
+    stringToTokens(p2, v2, "/");
+    unsigned int l1 = v1.size();
+    unsigned int l2 = v2.size();
+        
+    // Search for common leaf part
+    unsigned int cl = 0;
+    for (; cl < MIN(l1, l2); cl++) {
+        if (v1[l1-cl-1] != v2[l2-cl-1]) {
+            break;
+        }
+    }
+    //cerr << "Common length = " << cl << endl;
+    if (cl == 0) {
+        reason = "Input paths are empty or have no common part";
+        return reason;
+    }
+    for (unsigned i = 0; i < l1 - cl; i++) {
+        r1 += "/" + v1[i];
+    }
+    for (unsigned i = 0; i < l2 - cl; i++) {
+        r2 += "/" + v2[i];
+    }
+        
+    return reason;
+}
+
 void RclConfig::urlrewrite(const string& dbdir, string& url) const
 {
-    LOGDEB2("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
+    LOGDEB("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
            "]\n");

+    // If orgidxconfdir is set, we assume that this index is for a
+    // movable dataset, with the configuration directory stored inside
+    // the dataset tree. This allows computing automatic path
+    // translations if the dataset has been moved.
+    string orig_confdir;
+    string cur_confdir;
+    string confstemorg, confstemrep;
+    if (m_conf->get("orgidxconfdir", orig_confdir, "")) {
+        if (!m_conf->get("curidxconfdir", cur_confdir, "")) {
+            cur_confdir = m_confdir;
+        }
+        LOGDEB("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
+               " cur_confdir " << cur_confdir << endl);
+        string reason = path_diffstems(orig_confdir, cur_confdir,
+                                       confstemorg, confstemrep);
+        if (!reason.empty()) {
+            LOGERR("urlrewrite: path_diffstems failed: " << reason <<
+                   " : orig_confdir [" << orig_confdir <<
+                   "] cur_confdir [" << cur_confdir << endl);
+            confstemorg = confstemrep = "";
+        }
+    }
+    
    // Do path translations exist for this index ?
+    bool needptrans = true;
    if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
 	LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
                m_ptrans << ")\n");
+        needptrans = false;
+    }
+
+    if (!needptrans && confstemorg.empty()) {
        return;
    }
+    bool computeurl = false;
    
    string path = fileurltolocalpath(url);
    if (path.empty()) {
@ -1336,21 +1404,33 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
 	return;
    }
    
+    // Do the movable volume thing.
+    if (!confstemorg.empty() && confstemorg.size() <= path.size() &&
+        !path.compare(0, confstemorg.size(), confstemorg)) {
+        path = path.replace(0, confstemorg.size(), confstemrep);
+        computeurl = true;
+    }
+
+    if (needptrans) {
        // For each translation check if the prefix matches the input path,
        // replace and return the result if it does.
        vector<string> opaths = m_ptrans->getNames(dbdir);
-    for (vector<string>::const_iterator it = opaths.begin(); 
-	 it != opaths.end(); it++) {
-	if (it->size() <= path.size() && !path.compare(0, it->size(), *it)) {
+        for (const auto& opath: opaths) {
+            if (opath.size() <= path.size() &&
+                !path.compare(0, opath.size(), opath)) {
                string npath;
-	    // This call always succeeds because the key comes from getNames()
-	    if (m_ptrans->get(*it, npath, dbdir)) { 
-		path = path.replace(0, it->size(), npath);
-		url = path_pathtofileurl(path);
+                // Key comes from getNames()=> call must succeed
+                if (m_ptrans->get(opath, npath, dbdir)) { 
+                    path = path.replace(0, opath.size(), npath);
+                    computeurl = true;
                }
                break;
            }
        }
+    }
+    if (computeurl) {
+        url = path_pathtofileurl(path);
+    }
 }

 bool RclConfig::sourceChanged() const
--- a/src/doc/user/recoll.conf.xml
+++ b/src/doc/user/recoll.conf.xml
@ -471,6 +471,25 @@ the log... values.</para></listitem></varlistentry>
 <listitem><para>Override logfilename for the indexer in real time
 mode. The default is to use the idx... values if set, else
 the log... values.</para></listitem></varlistentry>
+<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">
+<term><varname>orgidxconfdir</varname></term>
+<listitem><para>Original location of the configuration directory. This is used exclusively for movable datasets. Locating the
+configuration directory inside the directory tree makes it possible to
+provide automatic query time path translations once the data set has
+moved (for example, because it has been mounted on another
+location).</para></listitem></varlistentry>
+<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
+<term><varname>curidxconfdir</varname></term>
+<listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
+if the configuration directory has been copied from the dataset to
+another location, either because the dataset is readonly and an r/w copy
+is desired, or for performance reasons. This records the original moved
+location before copy, to allow path translation computations.  For
+example if a dataset originally indexed as '/home/me/mydata/config' has
+been mounted to '/media/me/mydata', and the GUI is running from a copied
+configuration, orgidxconfdir would be '/home/me/mydata/config', and
+curidxconfdir (as set in the copied configuration) would be
+'/media/me/mydata/config'.</para></listitem></varlistentry>
 <varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
 <term><varname>idxrundir</varname></term>
 <listitem><para>Indexing process current directory. The input
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
@ -498,12 +498,12 @@
        indexed (no others will be indexed), by settting 
        the <link linkend="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXEDMIMETYPES">
        indexedmimetypes</link> configuration variable. Example:<programlisting>
-indexedmimetypes = text/html application/pdf
+        indexedmimetypes = text/html application/pdf
      </programlisting>
      It is possible to redefine this parameter for
      subdirectories. Example:<programlisting>
-[/path/to/my/dir]
-indexedmimetypes = application/pdf
+      [/path/to/my/dir]
+      indexedmimetypes = application/pdf
    </programlisting>
    (When using sections like this, don't forget that they remain
    in effect until the end of the file or another section
@ -920,10 +920,10 @@ indexedmimetypes = application/pdf
        processing their text, and one to update the index. This was
        tested to be the best configuration on the test system
        (quadri-processor with multiple disks).
-<programlisting>
-thrQSizes = 2 2 2
-thrTCounts =  4 2 1
-</programlisting>
+        <programlisting>
+          thrQSizes = 2 2 2
+          thrTCounts =  4 2 1
+        </programlisting>
        </para>

        <para>The following example would use a single queue, and the
@ -936,18 +936,18 @@ thrTCounts =  4 2 1
        would be performed purely sequentially), so the previous
        approach is preferred. YMMV...  The 2 last values for
        thrTCounts are ignored.
-<programlisting>
-thrQSizes = 2 -1 -1
-thrTCounts =  6 1 1
-</programlisting>
+        <programlisting>
+          thrQSizes = 2 -1 -1
+          thrTCounts =  6 1 1
+        </programlisting>
        </para>

        <para>The following example would disable
        multithreading. Indexing will be performed by a single
        thread.
-<programlisting>
-thrQSizes = -1 -1 -1
-</programlisting>
+        <programlisting>
+          thrQSizes = -1 -1 -1
+        </programlisting>
        </para>

      </sect2>
@ -1113,7 +1113,7 @@ thrQSizes = -1 -1 -1
      configuration file:</para>

      <programlisting>[/some/area/of/the/fs]
-metadatacmds = ; tags = tmsu tags %f
+      metadatacmds = ; tags = tmsu tags %f
      </programlisting>

      <note><para>Depending on the <application>tmsu</application> version,
@ -1154,7 +1154,7 @@ metadatacmds = ; tags = tmsu tags %f
      couple the tag update with a <literal>recollindex -e -i
      filename.</literal></para>

-</sect1>
+    </sect1>


    <sect1 id="RCL.INDEXING.PDF">
@ -1216,9 +1216,9 @@ metadatacmds = ; tags = tmsu tags %f
        the metadata fields (available for &RCL; 1.23.3 and later. 1.23.2
        has equivalent code inside the handler script). Example:</para>
        <programlisting>import sys
-import re
+        import re

-class MetaFixer(object):
+        class MetaFixer(object):
        def __init__(self):
        pass

@ -1367,13 +1367,13 @@ class MetaFixer(object):
        PATH):

        <screen><![CDATA[
-30 3 * * * recollindex > /some/tmp/dir/recolltrace 2>&1
-]]></screen>
+        30 3 * * * recollindex > /some/tmp/dir/recolltrace 2>&1
+        ]]></screen>

        Or, using <command>anacron</command>:
-<screen><![CDATA[
-1  15  su mylogin -c "recollindex recollindex > /tmp/rcltraceme 2>&1"
-]]></screen>
+        <screen><![CDATA[
+        1  15  su mylogin -c "recollindex recollindex > /tmp/rcltraceme 2>&1"
+        ]]></screen>
        </para>

        <para>As of version 1.17 the &RCL; GUI has dialogs to manage
@ -1435,12 +1435,12 @@ class MetaFixer(object):
      at the end:</para>

      <programlisting>recollconf=$HOME/.recoll-home
-recolldata=/usr/local/share/recoll
-RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh start
+      recolldata=/usr/local/share/recoll
+      RECOLL_CONFDIR=$recollconf $recolldata/examples/rclmon.sh start

-fvwm 
+      fvwm 

-</programlisting>
+      </programlisting>

      <para>The indexing daemon gets started, then the window manager,
      for which the session waits.</para> <para>By default the
@ -1487,17 +1487,17 @@ fvwm
      increasing the resources available to inotify, which are
      normally defined in <filename>/etc/sysctl.conf</filename>.
      <programlisting>
-### inotify
-#
-# cat  /proc/sys/fs/inotify/max_queued_events   - 16384
-# cat  /proc/sys/fs/inotify/max_user_instances  - 128
-# cat  /proc/sys/fs/inotify/max_user_watches    - 16384
-#
-# -- Change to:
-#
-fs.inotify.max_queued_events=32768
-fs.inotify.max_user_instances=256
-fs.inotify.max_user_watches=32768
+        ### inotify
+        #
+        # cat  /proc/sys/fs/inotify/max_queued_events   - 16384
+        # cat  /proc/sys/fs/inotify/max_user_instances  - 128
+        # cat  /proc/sys/fs/inotify/max_user_watches    - 16384
+        #
+        # -- Change to:
+        #
+        fs.inotify.max_queued_events=32768
+        fs.inotify.max_user_instances=256
+        fs.inotify.max_user_watches=32768
      </programlisting>
      
      </para>
@ -1915,11 +1915,11 @@ fs.inotify.max_user_watches=32768
        <filename>~/.recoll/scripts/myscript.desktop</filename> (the exact
        file name inside the directory is irrelevant):
        <programlisting>
-[Desktop Entry]
-Type=Application
-Name=MyFirstScript
-Exec=/home/me/bin/tryscript %F
-MimeType=*/*
+          [Desktop Entry]
+          Type=Application
+          Name=MyFirstScript
+          Exec=/home/me/bin/tryscript %F
+          MimeType=*/*
        </programlisting>
        The <literal>Name</literal> attribute defines the label which will
        appear inside the <guilabel>Run Script</guilabel> menu. The
@ -2084,10 +2084,10 @@ MimeType=*/*
        history.</para>

        <para>Here follows an example:
-<programlisting>
-&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
+        <programlisting>
+          &lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;

-&lt;fragbuts version=&quot;1.0&quot;&gt;
+          &lt;fragbuts version=&quot;1.0&quot;&gt;

          &lt;radiobuttons&gt;

@ -2121,8 +2121,8 @@ MimeType=*/*
          &lt;/fragbut&gt;

          &lt;/buttons&gt;
-&lt;/fragbuts&gt;
-</programlisting>
+          &lt;/fragbuts&gt;
+        </programlisting>
        </para>
        
        <para>Each <literal>radiobuttons</literal> or
@ -3162,27 +3162,27 @@ MimeType=*/*
            "<span style='white-space:nowrap'><i>%M</i>&nbsp;%D</span>&nbsp;&nbsp;&nbsp; <i>%U</i>&nbsp;%i<br>\n"
            "%A %K</td>\n"
            "</tr></table>\n"
-]]></screen>
+            ]]></screen>

            You may, for example, try the following for a more web-like
            experience:

            <screen><![CDATA[
-<u><b><a href="P%N">%T</a></b></u><br>
-%A<font color=#008000>%U - %S</font> - %L
-]]></screen>
+            <u><b><a href="P%N">%T</a></b></u><br>
+            %A<font color=#008000>%U - %S</font> - %L
+            ]]></screen>

            Note that the P%N link in the above paragraph makes the title a
            preview link. Or the clean looking:

            <screen><![CDATA[
-<img src="%I" align="left">%L <font color="#900000">%R</font>
-&nbsp;&nbsp;<b>%T&</b><br>%S&nbsp;
-<font color="#808080"><i>%U</i></font>
-<table bgcolor="#e0e0e0">
-<tr><td><div>%A</div></td></tr>
-</table>%K
-]]></screen>
+            <img src="%I" align="left">%L <font color="#900000">%R</font>
+            &nbsp;&nbsp;<b>%T&</b><br>%S&nbsp;
+            <font color="#808080"><i>%U</i></font>
+            <table bgcolor="#e0e0e0">
+            <tr><td><div>%A</div></td></tr>
+            </table>%K
+            ]]></screen>
            </para>

            <para>These samples, and some others are 
@ -3258,11 +3258,11 @@ MimeType=*/*
        window.location.href = 'recoll://search/query?qtp=a&amp;p=0&amp;q=' +
        encodeURIComponent(t);
        }
-&lt;/script>
+        &lt;/script>
        ....
-&lt;body ondblclick="recollsearch()">
+        &lt;body ondblclick="recollsearch()">

-</programlisting>
+        </programlisting>
      </sect2>
    </sect1>

@ -3303,8 +3303,8 @@ MimeType=*/*
      <para><command>recollq</command> has a man page (not installed by
      default, look in the <filename>doc/man</filename> directory). The
      Usage string is as follows:</para>
-<programlisting>
-recollq: usage:
+      <programlisting>
+        recollq: usage:
        -P: Show the date span for all the documents present in the index
        [-o|-a|-f] [-q] &lt;query string&gt;
        Runs a recoll query and displays result lines. 
@ -3317,7 +3317,7 @@ recollq: usage:
        -a Emulate the GUI simple search in ALL TERMS mode
        -f Emulate the GUI simple search in filename mode
        -q is just ignored (compatibility with the recoll GUI command line)
-Common options:
+        Common options:
        -c &lt;configdir&gt; : specify config directory, overriding $RECOLL_CONFDIR
        -d also dump file contents
        -n [first-]&lt;cnt&gt; define the result slice. The default value for [first]
@ -3338,18 +3338,18 @@ Common options:
        separated by one space character. This is the recommended format 
        for use by other programs. Use a normal query with option -m to 
        see the field names.
-</programlisting>
+      </programlisting>

      <para>Sample execution:</para>
-<programlisting>recollq 'ilur -nautique mime:text/html'
-Recoll query: ((((ilur:(wqf=11) OR ilurs) AND_NOT (nautique:(wqf=11)
+      <programlisting>recollq 'ilur -nautique mime:text/html'
+      Recoll query: ((((ilur:(wqf=11) OR ilurs) AND_NOT (nautique:(wqf=11)
      OR nautiques OR nautiqu OR nautiquement)) FILTER Ttext/html))
-4 results
-text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/comptes.html]      [comptes.html]  18593   bytes   
-text/html       [file:///Users/uncrypted-dockes/projets/nautique/webnautique/articles/ilur1/index.html] [Constructio...
-text/html       [file:///Users/uncrypted-dockes/projets/pagepers/index.html]    [psxtcl/writemime/recoll]...
-text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/recu-chasse-maree....
-</programlisting>
+      4 results
+      text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/comptes.html]      [comptes.html]  18593   bytes   
+      text/html       [file:///Users/uncrypted-dockes/projets/nautique/webnautique/articles/ilur1/index.html] [Constructio...
+      text/html       [file:///Users/uncrypted-dockes/projets/pagepers/index.html]    [psxtcl/writemime/recoll]...
+      text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/recu-chasse-maree....
+      </programlisting>
    </sect1>

    <sect1 id="RCL.SEARCH.SYNONYMS">
@ -3380,10 +3380,10 @@ text/html       [file:///Users/uncrypted-dockes/projets/bateaux/ilur/factEtCie/r

      <para>Example:
      <programlisting>
-hi hello "good morning"
+        hi hello "good morning"

-# not sure about "au revoir" though. Is this english ?
-bye goodbye "see you" \
+        # not sure about "au revoir" though. Is this english ?
+        bye goodbye "see you" \
        "au revoir" 
      </programlisting>
      </para>
@ -3680,7 +3680,7 @@ bye goodbye "see you" \
        <para>Several <literal>dir</literal> clauses can be specified,
        both positive and negative. For example the following makes sense:
        <programlisting>
-dir:recoll dir:src -dir:utils -dir:common
+          dir:recoll dir:src -dir:utils -dir:common
          </programlisting> This would select results which have both
          <filename>recoll</filename> and <filename>src</filename> in the
          path (in any order), and which have not either
@ -4118,6 +4118,88 @@ dir:recoll dir:src -dir:utils -dir:common

  </chapter> <!-- Search -->

+  <chapter id="RCL.MOVABLE">
+    <title>Movable datasets</title>
+
+    <para>As of &RCL; 1.24, it has become easy to build self-contained
+    datasets including a &RCL; configuration directory and index together
+    with the indexed documents, and to move such a dataset around (for
+    example copying it to an USB drive), without having to adjust the
+    configuration for querying the index.</para>
+
+    <note><para>This is a query-time feature only. The index must only be
+    updated in its original location. If an update is necessary in a
+    different location, the index must be reset.</para></note>
+
+    <para>The examples below will assume that you have a dataset under
+    <filename>/home/me/mydata/</filename>, with the index configuration and
+    data stored inside
+    <filename>/home/me/mydata/recoll-confdir</filename>.</para> 
+    
+    <para>In order to be able to run queries after the dataset has been
+    moved, you must ensure the following:
+    <itemizedlist>
+      <listitem><para>The main configuration file must define the <link
+      linkend="RCL.INSTALL.CONFIG.RECOLLCONF.ORGIDXCONFDIR">orgidxconfdir</link>
+      variable to be the original location of the configuration directory
+      (<filename>orgidxconfdir=/home/me/mydata/recoll-confdir</filename>
+      must be set inside
+      <filename>/home/me/mydata/recoll-confdir/recoll.conf</filename> in
+      the example above).</para></listitem>
+
+      <listitem><para>The configuration directory must exist with the
+      documents, somewhere under the directory which will be
+      moved. E.g. if you are moving <filename>/home/me/mydata</filename>
+      around, the configuration directory must exist somewhere below this
+      point, for example
+      <filename>/home/me/mydata/recoll-confdir</filename>, or
+      <filename>/home/me/mydata/sub/recoll-confdir</filename>.</para></listitem>
+
+      <listitem><para>You should keep the default locations for the index
+      elements (they are relative to the configuration directory by
+      default). Only the paths referring to the documents themselves
+      (e.g. <literal>topdirs</literal> values) should be
+      absolute (in general, they are only used when indexing
+      anyway).</para></listitem> 
+      
+    </itemizedlist>
+    </para>
+
+    <para>Only the first point needs an explicit user action, the &RCL;
+    defaults are compatible with the second one, and the third is
+    natural.</para>
+    
+    <para>If, after the move, the configuration directory needs to be
+    copied out of the dataset (for example because the thumb drive is too
+    slow), you can set the <link
+    linkend="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
+    curidxconfdir</link>, variable inside the copied configuration to
+    define the location of the moved one. For example if
+    <filename>/home/me/mydata</filename> is now mounted onto
+    <filename>/media/me/somelabel</filename>, but the configuration
+    directory and index has been copied to
+    <filename>/tmp/tempconfig</filename>, you would set
+    <literal>curidxconfdir</literal> to
+    <filename>/media/me/somelabel/recoll-confdir</filename> inside
+    <filename>/tmp/tempconfig/recoll.conf</filename>.
+    <literal>orgidxconfdir</literal> would still be
+    <filename>/home/me/mydata/recoll-confdir</filename> in the original and
+    the copy.</para>
+
+    <para>If you are regularly copying the configuration out of the
+    dataset, it will be useful to write a script to automate the
+    procedure. This can't really be done inside &RCL; because there are
+    probably many possible variants. One example would be to copy the
+    configuration to make it writable, but keep the index data on the
+    medium because it is too big - in this case, the script would also need
+    to set <literal>dbdir</literal> in the copied configuration.</para>
+    
+    <para>The same set of modifications (&RCL; 1.24) has also made it
+    possible to run queries from a readonly configuration directory (with
+    slightly reduced function of course, such as not recording the query
+    history).</para>
+
+  </chapter>

  <chapter id="RCL.PROGRAM">
    <title>Programming interface</title>
@ -4329,10 +4411,10 @@ dir:recoll dir:src -dir:utils -dir:common
        name suffixes. The types are defined inside the
        <link linkend="RCL.INSTALL.CONFIG.MIMEMAP">
          <filename>mimemap</filename> file</link>. Example:
-<programlisting>
+          <programlisting>

-.doc = application/msword
-</programlisting>
+            .doc = application/msword
+          </programlisting>
          If no suffix association is found for the file name, &RCL; will try
          to execute a system command (typically <command>file -i</command> or
        <command>xdg-mime</command>) to determine a MIME type.</para>
@ -4341,18 +4423,18 @@ dir:recoll dir:src -dir:utils -dir:common
        in the <link linkend="RCL.INSTALL.CONFIG.MIMECONF">
        <filename>mimeconf</filename> file</link>. A sample will probably be
        better than a long explanation:</para>
-<programlisting>
+        <programlisting>

-[index]
-application/msword = exec antiword -t -i 1 -m UTF-8;\
+          [index]
+          application/msword = exec antiword -t -i 1 -m UTF-8;\
          mimetype = text/plain ; charset=utf-8

-application/ogg = exec rclogg
+          application/ogg = exec rclogg

-text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
+          text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html

-application/x-chm = execm rclchm
-</programlisting>
+          application/x-chm = execm rclchm
+        </programlisting>

        <para>The fragment specifies that:

@ -4409,14 +4491,14 @@ application/x-chm = execm rclchm
        <para>For filters producing HTML, the output could be very minimal
        like the following example:
        <programlisting>
-&lt;html>
+          &lt;html>
          &lt;head>
          &lt;meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
          &lt;/head>
          &lt;body>
          Some text content
          &lt;/body>
-&lt;/html>
+          &lt;/html>
        </programlisting>
        </para>

@ -4460,13 +4542,13 @@ application/x-chm = execm rclchm
        date (for display and sorting), in preference to the file
        modification date. The date format should be as follows:
        <programlisting>
-&lt;meta name="date" content="YYYY-mm-dd HH:MM:SS">
-or
-&lt;meta name="date" content="YYYY-mm-ddTHH:MM:SS">
+          &lt;meta name="date" content="YYYY-mm-dd HH:MM:SS">
+          or
+          &lt;meta name="date" content="YYYY-mm-ddTHH:MM:SS">
        </programlisting>
        Example:
        <programlisting>
-&lt;meta name="date" content="2013-02-24 17:50:00">
+          &lt;meta name="date" content="2013-02-24 17:50:00">
        </programlisting>
        </para>

@ -4474,8 +4556,8 @@ or
        names. This should also be output as meta tags:</para>

        <programlisting>
-&lt;meta name="somefield" content="Some textual data" /&gt;
-</programlisting>
+          &lt;meta name="somefield" content="Some textual data" /&gt;
+        </programlisting>

        <para>You can embed HTML markup inside the content of custom
        fields, for improving the display inside result lists. In this
@ -4484,8 +4566,8 @@ or
        be escaped for display.</para>

        <programlisting>
-&lt;meta name="somefield" markup="html" content="Some &lt;i>textual&lt;/i> data" /&gt;
-</programlisting>
+          &lt;meta name="somefield" markup="html" content="Some &lt;i>textual&lt;/i> data" /&gt;
+        </programlisting>

        <para>As written above, the processing of fields is described
        in a <link linkend="RCL.PROGRAM.FIELDS">further
@ -4677,17 +4759,17 @@ or
        features.</para>

        <programlisting><![CDATA[
-#!/usr/bin/env python
+        #!/usr/bin/env python

-from recoll import recoll
+        from recoll import recoll

-db = recoll.connect()
-query = db.query()
-nres = query.execute("some query")
-results = query.fetchmany(20)
-for doc in results:
+        db = recoll.connect()
+        query = db.query()
+        nres = query.execute("some query")
+        results = query.fetchmany(20)
+        for doc in results:
        print(doc.url, doc.title)
-]]></programlisting>
+        ]]></programlisting>

      </sect2>
      
@ -5145,12 +5227,12 @@ for doc in results:
                  text/html according to doc.mimetype. The typical use
                  would be as follows:
                  <programlisting>
-qdoc = query.fetchone()
-extractor = recoll.Extractor(qdoc)
-doc = extractor.textextract(qdoc.ipath)
-# use doc.text, e.g. for previewing
-</programlisting>
-</para></listitem>
+                    qdoc = query.fetchone()
+                    extractor = recoll.Extractor(qdoc)
+                    doc = extractor.textextract(qdoc.ipath)
+                    # use doc.text, e.g. for previewing
+                  </programlisting>
+                  </para></listitem>
                </varlistentry>
                <varlistentry>
                  <term>Extractor.idoctofile(ipath, targetmtype, outfile='')</term>
@ -5158,11 +5240,11 @@ doc = extractor.textextract(qdoc.ipath)
                  which can be given explicitly or will be created as a
                  temporary file to be deleted by the caller. Typical use:
                  <programlisting>
-qdoc = query.fetchone()
-extractor = recoll.Extractor(qdoc)
-filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
+                    qdoc = query.fetchone()
+                    extractor = recoll.Extractor(qdoc)
+                  filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>

-</para></listitem>
+                  </para></listitem>
                </varlistentry>

              </variablelist>
@ -5182,9 +5264,9 @@ filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype)</programlisting>
          highlighting and data extraction functions.</para>

          <programlisting>
-#!/usr/bin/env python
-<![CDATA[
-from recoll import recoll
+            #!/usr/bin/env python
+            <![CDATA[
+                     from recoll import recoll

 db = recoll.connect()
 db.setAbstractParams(maxchars=80, contextwords=4)
@ -5193,18 +5275,18 @@ query = db.query()
 nres = query.execute("some user question")
 print "Result count: ", nres
 if nres > 5:
-    nres = 5
+nres = 5
 for i in range(nres):
-    doc = query.fetchone()
-    print "Result #%d" % (query.rownumber,)
-    for k in ("title", "size"):
-        print k, ":", getattr(doc, k).encode('utf-8')
-    abs = db.makeDocAbstract(doc, query).encode('utf-8')
-    print abs
-    print
+doc = query.fetchone()
+print "Result #%d" % (query.rownumber,)
+for k in ("title", "size"):
+print k, ":", getattr(doc, k).encode('utf-8')
+abs = db.makeDocAbstract(doc, query).encode('utf-8')
+print abs
+print

-]]>
-</programlisting>
+            ]]>
+          </programlisting>

        </sect3>
      </sect2>
@ -5348,8 +5430,8 @@ for i in range(nres):
          indexing sample found in the Recoll source (which sets
          <literal>rclbes="MBOX"</literal>):</para>
          <programlisting>[MBOX]
-fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch
-makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
+          fetch = /path/to/recoll/src/python/samples/rclmbox.py fetch
+          makesig = path/to/recoll/src/python/samples/rclmbox.py makesig
          </programlisting>
          <para><literal>fetch</literal> and <literal>makesig</literal>
          define two commands to execute to respectively retrieve the
@ -5390,15 +5472,15 @@ makesig = path/to/recoll/src/python/samples/rclmbox.py makesig

        <para>Adapting to the new package structure:</para>
        <programlisting>
-<![CDATA[
-try:
+          <![CDATA[
+                   try:
                   from recoll import recoll
                   from recoll import rclextract
                   hasextract = True
-except:
+                   except:
                   import recoll
                   hasextract = False
-]]>
+          ]]>
        </programlisting>

        <para>Adapting to the change of nature of
@ -5408,10 +5490,10 @@ except:
        the <literal>next</literal> value (old).</para>

        <programlisting>
-<![CDATA[
+          <![CDATA[
                   rownum = query.next if type(query.next) == int else \
                   query.rownumber
-]]>
+          ]]>
        </programlisting>

      </sect2> <!-- compat with previous version -->
@ -5719,7 +5801,8 @@ except:
        very much welcome patches</ulink>.</para>


-        <formalpara><title>Configure options:</title>
+        <formalpara>
+          <title>Configure options:</title>
          <para>
            <itemizedlist>

@ -5983,9 +6066,9 @@ except:
      character. Long lines can be continued by escaping the
      physical newline with backslash, even inside quoted strings.</para>
      <programlisting>
-astringlist =  "some string \
-with spaces"
-thesame = "some string with spaces"        
+        astringlist =  "some string \
+        with spaces"
+        thesame = "some string with spaces"        
      </programlisting>

      <para>Parameters which are not part of string lists can't be
@ -6166,25 +6249,25 @@ thesame = "some string with spaces"
        only plain ascii headers can be indexed, and only the
        first occurrence will be used for headers that occur several times).

-<programlisting>[prefixes]
-# Index mailmytag contents (with the given prefix)
-mailmytag = XMTAG
+        <programlisting>[prefixes]
+        # Index mailmytag contents (with the given prefix)
+        mailmytag = XMTAG

-[stored]
-# Store mailmytag inside the document data record (so that it can be
-# displayed - as %(mailmytag) - in result lists).
-mailmytag = 
+        [stored]
+        # Store mailmytag inside the document data record (so that it can be
+        # displayed - as %(mailmytag) - in result lists).
+        mailmytag = 

-[queryaliases]
-filename = fn
-containerfilename = cfn
+        [queryaliases]
+        filename = fn
+        containerfilename = cfn

-[mail]
-# Extract the X-My-Tag mail header, and use it internally with the
-# mailmytag field name
-x-my-tag = mailmytag
-</programlisting>
-</para>
+        [mail]
+        # Extract the X-My-Tag mail header, and use it internally with the
+        # mailmytag field name
+        x-my-tag = mailmytag
+        </programlisting>
+        </para>


        <sect3 id="RCL.INSTALL.CONFIG.FIELDS.XATTR">
@ -6231,7 +6314,7 @@ x-my-tag = mailmytag
        should be handled specially, which is possible because they
        are usually all located in one place. Example:
        <programlisting>[~/.kde/share/apps/okular/docdata]
-.xml = application/x-okular-notes</programlisting></para>
+        .xml = application/x-okular-notes</programlisting></para>

        <para>The <varname>recoll_noindex</varname>
        <filename>mimemap</filename> variable has been moved to
@ -6305,7 +6388,7 @@ x-my-tag = mailmytag
        application tag to specialize the choice for an area of the
        filesystem (using a <varname>localfields</varname> specification
        in <filename>mimeconf</filename>). The syntax for the key is 
-<replaceable>mimetype</replaceable><literal>|</literal><replaceable>tag</replaceable></para>
+        <replaceable>mimetype</replaceable><literal>|</literal><replaceable>tag</replaceable></para>

        <para>The <varname>nouncompforviewmts</varname> entry, (placed at
        the top level, outside of the <literal>[view]</literal> section),
@ -6415,8 +6498,8 @@ x-my-tag = mailmytag
            <listitem><para>In <filename>$RECOLL_CONFDIR/mimemap</filename>
            (typically <filename>~/.recoll/mimemap</filename>), add the
            following line:<programlisting>
-.blob = application/x-blobapp
-</programlisting>
+            .blob = application/x-blobapp
+          </programlisting>
          Note that the MIME type is made up here, and you could
          call it <replaceable>diesel/oil</replaceable> just the
          same.</para>
@ -6424,8 +6507,8 @@ x-my-tag = mailmytag
            <listitem><para>In <filename>$RECOLL_CONFDIR/mimeview</filename>
            under the <literal>[view]</literal> section, add:</para>
            <programlisting>
-application/x-blobapp = blobviewer %f
-</programlisting>
+              application/x-blobapp = blobviewer %f
+            </programlisting>
            <para>We are supposing
            that <replaceable>blobviewer</replaceable> wants a file
            name parameter here, you would use <literal>%u</literal> if
@ -6458,8 +6541,8 @@ application/x-blobapp = blobviewer %f
            section, add the following line (more about the
            <replaceable>rclblob</replaceable> indexing script
            later):<programlisting>
-application/x-blobapp = exec rclblob
-</programlisting></para>
+            application/x-blobapp = exec rclblob
+          </programlisting></para>
            </listitem>
            <listitem><para>Under the <literal>[icons]</literal>
            section, you should choose an icon to be displayed for the
@ -6489,4 +6572,3 @@ application/x-blobapp = exec rclblob
    </sect1>
  </chapter>
 </book>
-
--- a/src/sampleconf/recoll.conf
+++ b/src/sampleconf/recoll.conf
@ -571,6 +571,31 @@ logfilename = stderr
 # the log... values.</descr></var>
 #daemlogfilename = /dev/null

+# <var name="orgidxconfdir" type="dfn">
+#
+# <brief>Original location of the configuration directory.</brief>
+# <descr>This is used exclusively for movable datasets. Locating the
+# configuration directory inside the directory tree makes it possible to
+# provide automatic query time path translations once the data set has
+# moved (for example, because it has been mounted on another
+# location).</descr></var>
+#orgidxconfdir = 
+
+# <var name="curidxconfdir" type="dfn">
+#
+# <brief>Current location of the configuration directory.</brief>
+# <descr>Complement orgidxconfdir for movable datasets. This should be used
+# if the configuration directory has been copied from the dataset to
+# another location, either because the dataset is readonly and an r/w copy
+# is desired, or for performance reasons. This records the original moved
+# location before copy, to allow path translation computations.  For
+# example if a dataset originally indexed as '/home/me/mydata/config' has
+# been mounted to '/media/me/mydata', and the GUI is running from a copied
+# configuration, orgidxconfdir would be '/home/me/mydata/config', and
+# curidxconfdir (as set in the copied configuration) would be
+# '/media/me/mydata/config'.</descr></var>
+#curidxconfdir = 
+
 # <var name="idxrundir" type="dfn">
 #
 # <brief>Indexing process current directory.</brief> <descr>The input