web page indexing doc and indents

2021-03-11 10:00:11 +01:00 · 2021-03-11 10:00:11 +01:00 · 5c7d0ff96d
commit 5c7d0ff96d
parent f57530e2a6
5 changed files with 108 additions and 121 deletions
--- a/src/common/webstore.cpp
+++ b/src/common/webstore.cpp
@ -37,15 +37,15 @@ WebStore::WebStore(RclConfig *cnf)
    int maxmbs = 40;
    cnf->getConfParam("webcachemaxmbs", &maxmbs);
    if ((m_cache = new CirCache(ccdir)) == 0) {
-    LOGERR("WebStore: cant create CirCache object\n" );
+        LOGERR("WebStore: cant create CirCache object\n" );
-    return;
+        return;
    }
    if (!m_cache->create(int64_t(maxmbs)*1000*1024, CirCache::CC_CRUNIQUE)) {
-    LOGERR("WebStore: cache file creation failed: " <<
+        LOGERR("WebStore: cache file creation failed: " <<
               m_cache->getReason() << "\n");
-    delete m_cache;
+        delete m_cache;
-    m_cache = 0;
+        m_cache = 0;
-    return;
+        return;
    }
 }
@ -57,17 +57,17 @@ WebStore::~WebStore()
 // Read  document from cache. Return the metadata as an Rcl::Doc
 // @param htt Web Hit Type 
 bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc, 
-                    string& data, string *htt)
+                            string& data, string *htt)
 {
    string dict;
    if (m_cache == 0) {
-    LOGERR("WebStore::getFromCache: cache is null\n");
+        LOGERR("WebStore::getFromCache: cache is null\n");
-    return false;
+        return false;
    }
    if (!m_cache->get(udi, dict, &data)) {
-    LOGDEB("WebStore::getFromCache: get failed\n");
+        LOGDEB("WebStore::getFromCache: get failed\n");
-    return false;
+        return false;
    }
    ConfSimple cf(dict, 1);
@ -89,4 +89,3 @@ bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc,
    dotdoc.meta[Rcl::Doc::keyudi] = udi;
    return true;
 }
--- a/src/common/webstore.h
+++ b/src/common/webstore.h
@ -21,8 +21,8 @@
 class RclConfig;
 namespace Rcl {
-    class Db;
+class Db;
-    class Doc;
+class Doc;
 }
 class CirCache;
--- a/src/doc/user/usermanual.html
+++ b/src/doc/user/usermanual.html
@ -140,9 +140,9 @@ alink="#0000FF">
            "#RCL.INDEXING.REMOVABLE">Removable
            volumes</a></span></dt>
            <dt><span class="sect1">2.5. <a href=
-            "#RCL.INDEXING.WEBQUEUE"><span class=
+            "#RCL.INDEXING.WebQUEUE"><span class=
            "application">Unix</span>-like systems: indexing
-            visited WEB pages</a></span></dt>
+            visited Web pages</a></span></dt>
            <dt><span class="sect1">2.6. <a href=
            "#RCL.INDEXING.EXTATTR"><span class=
            "application">Unix</span>-like systems: using extended
@ -423,7 +423,7 @@ alink="#0000FF">
    <div class="list-of-tables">
      <p><b>List of Tables</b></p>
      <dl>
-        <dt>3.1. <a href="#idm1437">Keyboard shortcuts</a></dt>
+        <dt>3.1. <a href="#idm1438">Keyboard shortcuts</a></dt>
      </dl>
    </div>
    <div class="chapter">
@ -720,7 +720,7 @@ alink="#0000FF">
            <li class="listitem">
              <p>A <a class="ulink" href=
              "https://framagit.org/medoc92/recollwebui" target=
-              "_top">WEB interface</a>.</p>
+              "_top">Web interface</a>.</p>
            </li>
          </ul>
        </div>
@ -1949,10 +1949,10 @@ recollindex -c "$confdir"
          <div>
            <div>
              <h2 class="title" style="clear: both"><a name=
-              "RCL.INDEXING.WEBQUEUE" id=
+              "RCL.INDEXING.WebQUEUE" id=
-              "RCL.INDEXING.WEBQUEUE"></a>2.5.&nbsp;<span class=
+              "RCL.INDEXING.WebQUEUE"></a>2.5.&nbsp;<span class=
              "application">Unix</span>-like systems: indexing
-              visited WEB pages</h2>
+              visited Web pages</h2>
            </div>
          </div>
        </div>
@ -1964,57 +1964,48 @@ recollindex -c "$confdir"
        "application">Beagle</span> indexer, then adapted to
        <span class="application">Recoll</span> and the
        <span class="application">Firefox</span> <span class=
-        "application">XUL</span> API. A new version of the addon
+        "application">XUL</span> API. The current version of the
-        has been written to work with the <span class=
+        extension is located in the <a class="ulink" href=
-        "application">WebExtensions</span> API, which is the only
+        "https://addons.mozilla.org/en-US/firefox/addon/recoll-we/"
-        one supported after <span class=
+        target="_top">Mozilla add-ons repository</a> uses the
-        "application">Firefox</span> version 57.</p>
+        <span class="application">WebExtensions</span> API, and
-        <p>The extension works by copying visited WEB pages to an
+        works with current <span class="application">Firefox</span>
        versions.</p>
        <p>The extension works by copying visited Web pages to an
        indexing queue directory, which <span class=
-        "application">Recoll</span> then processes, indexing the
+        "application">Recoll</span> then processes, storing the
-        data, storing it into a local cache, then removing the file
+        data into a local cache, then indexing it, then removing
-        from the queue.</p>
+        the file from the queue.</p>
-        <p>Because the WebExtensions API introduces more
+        <p>The visited Web pages indexing feature can be enabled on
        constraints to what extensions can do, the new version
        works with one more step: the files are first created in
        the browser default downloads location (typically
        <code class="filename">$HOME/Downloads</code> ), then moved
        by a script in the old queue location. The script is
        automatically executed by the <span class=
        "application">Recoll</span> indexer versions 1.23.5 and
        newer. It could conceivably be executed independently to
        make the new browser extension compatible with an older
        <span class="application">Recoll</span> version (the script
        is named <span class=
        "command"><strong>recoll-we-move-files.py</strong></span>).</p>
        <div class="note" style=
        "margin-left: 0.5in; margin-right: 0.5in;">
          <h3 class="title">Note</h3>
          <p>For the WebExtensions-based version to work, it is
          necessary to set the <code class=
          "literal">webdownloadsdir</code> value in the
          configuration if it was changed from the default
          <code class="filename">$HOME/Downloads</code> in the
          browser preferences.</p>
        </div>
        <p>The visited WEB pages indexing feature can be enabled on
        the <span class="application">Recoll</span> side from the
        GUI <span class="guilabel">Index configuration</span>
        panel, or by editing the configuration file (set
        <code class="varname">processwebqueue</code> to 1).</p>
-        <p>A current pointer to the extension can be found, along
+        <p>The <span class="application">Recoll</span> GUI has a
-        with up-to-date instructions, on the <a class="ulink" href=
+        tool to list and edit the contents of the Web cache.
        (<span class="guimenu">Tools</span> → <span class=
        "guimenuitem">Webcache editor</span>)</p>
        <p>You can find more details on Web indexing, its usage and
        configuration in a <a class="ulink" href=
        "https://www.lesbonscomptes.com/recoll/faqsandhowtos/IndexWebHistory"
-        target="_top">Recoll wiki</a>.</p>
+        target="_top">Recoll 'Howto' entry</a>.</p>
-        <p>A copy of the indexed WEB pages is retained by Recoll in
+        <div class="note" style=
-        a local cache (from which previews can be fetched). The
+        "margin-left: 0.5in; margin-right: 0.5in;">
-        cache size can be adjusted from the <span class=
+          <h3 class="title">The cache is not an archive</h3>
-        "guilabel">Index configuration</span> / <span class=
+          <p>A copy of the indexed Web pages is retained by Recoll
-        "guilabel">Web history</span> panel. Once the maximum size
+          in a local cache (from which data is fetched for
-        is reached, old pages are purged - both from the cache and
+          previews, or when resetting the index). The cache has a
-        the index - to make room for new ones, so you need to
+          maximum size, which can be adjusted from the <span class=
-        explicitly archive in some other place the pages that you
+          "guilabel">Index configuration</span> / <span class=
-        want to keep indefinitely.</p>
+          "guilabel">Web history</span> panel (<code class=
          "literal">webcachemaxmbs</code> parameter in <code class=
          "filename">recoll.conf</code>). Once the maximum size is
          reached, old pages are erased to make room for new ones.
          The pages which you want to keep indefinitely need to be
          explicitly archived elsewhere. Using a very high value
          for the cache size can avoid data erasure, but see the
          above 'Howto' page for more details and gotchas.</p>
        </div>
      </div>
      <div class="sect1">
        <div class="titlepage">
@ -3473,14 +3464,14 @@ fs.inotify.max_user_watches=32768
          be able to find under <code class=
          "filename">/usr/share/recoll/examples/fragbuts.xml</code>),
          contains an example which filters the results from the
-          WEB history.</p>
+          Web history.</p>
          <p>Here follows an example:</p>
          <pre class="programlisting">
 &lt;?xml version="1.0" encoding="UTF-8"?&gt;
 &lt;fragbuts version="1.0"&gt;
  &lt;radiobuttons&gt;
-    &lt;!-- Actually useful: toggle WEB queue results inclusion --&gt;
+    &lt;!-- Actually useful: toggle Web queue results inclusion --&gt;
    &lt;fragbut&gt;
      &lt;label&gt;Include Web Results&lt;/label&gt;
      &lt;frag&gt;&lt;/frag&gt;
@ -3996,7 +3987,7 @@ fs.inotify.max_user_watches=32768
          given context (e.g. within a preview window, within the
          result table).</p>
          <div class="table">
-            <a name="idm1437" id="idm1437"></a>
+            <a name="idm1438" id="idm1438"></a>
            <p class="title"><b>Table&nbsp;3.1.&nbsp;Keyboard
            shortcuts</b></p>
            <div class="table-contents">
@ -7940,11 +7931,11 @@ hasextract = False
        "application">FreeBSD</span> ports, etc.), or from some
        type of "backports" repository providing versions newer
        than the standard ones, or found on the <span class=
-        "application">Recoll</span> WEB site in some cases. The
+        "application">Recoll</span> Web site in some cases. The
        most up-to-date information about Recoll packages can
        usually be found on the <a class="ulink" href=
        "http://www.recoll.org/pages/download.html" target=
-        "_top"><span class="application">Recoll</span> WEB site
+        "_top"><span class="application">Recoll</span> Web site
        downloads page</a></p>
        <p>The <span class="application">Windows</span> version of
        Recoll comes in a self-contained setup file, there is
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
@ -282,7 +282,7 @@
        <ulink url="https://www.lesbonscomptes.com/recoll/pages/download.html">Search Provider</ulink>.
        </para></listitem>
        <listitem><para>A
-        <ulink url="https://framagit.org/medoc92/recollwebui">WEB interface</ulink>.
+        <ulink url="https://framagit.org/medoc92/recollwebui">Web interface</ulink>.
        </para></listitem>
      </itemizedlist>
      </para>
@ -1257,56 +1257,51 @@ recollindex -c "$confdir"
  </simplesect>
  </sect1>
-    <sect1 id="RCL.INDEXING.WEBQUEUE">
+    <sect1 id="RCL.INDEXING.WebQUEUE">
-      <title>&LIN;: indexing visited WEB pages</title>
+      <title>&LIN;: indexing visited Web pages</title>
      <para>With the help of a <application>Firefox</application>
-      extension, &RCL; can index the Internet pages that you visit. The
+        extension, &RCL; can index the Internet pages that you visit. The
-      extension has a long history: it was initially designed for the
+        extension has a long history: it was initially designed for
-      <application>Beagle</application> indexer, then adapted to &RCL; and
+        the <application>Beagle</application> indexer, then adapted to
-      the <application>Firefox</application> <application>XUL</application>
+        &RCL; and
-      API. A new version of the addon has been written to work with the
+        the <application>Firefox</application> <application>XUL</application>
-      <application>WebExtensions</application> API, which is the only one
+        API. The current version of the extension is located in
-      supported after <application>Firefox</application> version 57.</para>
+        the <ulink url="https://addons.mozilla.org/en-US/firefox/addon/recoll-we/">Mozilla
        add-ons repository</ulink> uses
        the <application>WebExtensions</application> API, and works with
        current <application>Firefox</application> versions.</para>
-      <para>The extension works by copying visited WEB pages to an indexing
+      <para>The extension works by copying visited Web pages to an indexing
-      queue directory, which &RCL; then processes, indexing the data,
+        queue directory, which &RCL; then processes, storing the data into a
-      storing it into a local cache, then removing the file from the
+        local cache, then indexing it, then removing the file from the
-      queue.</para>
+        queue.</para>
-      <para>Because the WebExtensions API introduces more constraints to
+      <para>The visited Web pages indexing feature can be enabled on the
-      what extensions can do, the new version works with one
+        &RCL; side from the GUI <guilabel>Index configuration</guilabel>
-      more step: the files are first created in the browser default
+        panel, or by editing the configuration file (set
-      downloads location (typically <filename>$HOME/Downloads</filename> ),
+        <varname>processwebqueue</varname> to 1).</para>
      then moved by a script in the old queue location. The script is
      automatically executed by the &RCL; indexer versions 1.23.5 and
      newer. It could conceivably be executed independently to make the new
      browser extension compatible with an older &RCL; version (the script
      is named <command>recoll-we-move-files.py</command>).</para>
-      <note><para>For the WebExtensions-based version to work, it is
+      <para>The &RCL; GUI has a tool to list and edit the contents of the
-      necessary to set the <literal>webdownloadsdir</literal> value in the
+        Web
-      configuration if it was changed from the default
+        cache. (<menuchoice><guimenu>Tools</guimenu><guimenuitem>Webcache
-      <filename>$HOME/Downloads</filename> in the browser
+        editor</guimenuitem></menuchoice>)</para>
      preferences.</para></note>
-      <para>The visited WEB pages indexing feature can be enabled on the
+      <para>You can find more details on Web indexing, its usage and configuration
-      &RCL; side from the GUI <guilabel>Index configuration</guilabel>
+        in a <ulink url="&FAQS;IndexWebHistory">Recoll 'Howto' entry</ulink>.</para>
      panel, or by editing the configuration file (set
      <varname>processwebqueue</varname> to 1).</para>
-      <para>A current pointer to the extension can be found, along with
+      <note><title>The cache is not an archive</title><para>A copy of
-      up-to-date instructions, on the
+          the indexed Web pages is retained by Recoll in a local cache
-      <ulink url="&FAQS;IndexWebHistory">Recoll wiki</ulink>.</para>
+          (from which data is fetched for previews, or when resetting the
-
+          index). The cache has a maximum size, which can be adjusted from
-      <para>A copy of the indexed WEB pages is retained by Recoll in a
+          the <guilabel>Index configuration</guilabel> / <guilabel>Web
-      local cache (from which previews can be fetched). The cache size can
+          history</guilabel> panel (<literal>webcachemaxmbs</literal>
-      be adjusted from the <guilabel>Index configuration</guilabel> /
+          parameter in <filename>recoll.conf</filename>). Once the maximum
-      <guilabel>Web history</guilabel> panel. Once the maximum size
+          size is reached, old pages are erased to make room for new ones.
-      is reached, old pages are purged - both from the cache and the index
+          The pages which you want to keep indefinitely need to be
-      - to make room for new ones, so you need to explicitly archive in
+          explicitly archived elsewhere. Using a very high value for
-      some other place the pages that you want to keep
+          the cache size can avoid data erasure, but see the above 'Howto'
-      indefinitely.</para> 
+          page for more details and gotchas.</para></note>
    </sect1>
@ -2475,7 +2470,7 @@ fs.inotify.max_user_watches=32768
        file inside the configuration directory. The sample file
        distributed with &RCL; (which you should be able to find under
        <filename>/usr/share/recoll/examples/fragbuts.xml</filename>),
-        contains an example which filters the results from the WEB
+        contains an example which filters the results from the Web
        history.</para>
@ -2485,7 +2480,7 @@ fs.inotify.max_user_watches=32768
 <fragbuts version="1.0">
  <radiobuttons>
-    <!-- Actually useful: toggle WEB queue results inclusion -->
+    <!-- Actually useful: toggle Web queue results inclusion -->
    <fragbut>
      <label>Include Web Results</label>
      <frag></frag>
@ -6115,11 +6110,11 @@ hasextract = False
      <application>Debian/Ubuntu apt</application>,
      <application>FreeBSD</application> ports, etc.), or from some type
      of "backports" repository providing versions newer than the standard
-      ones, or found on the &RCL; WEB site in some
+      ones, or found on the &RCL; Web site in some
      cases. The most up-to-date information about Recoll packages can
      usually be found on the
      <ulink url="http://www.recoll.org/pages/download.html">
-        <application>Recoll</application> WEB site downloads
+        <application>Recoll</application> Web site downloads
      page</ulink></para> 
      <para>The &WIN; version of Recoll comes in a self-contained setup
--- a/src/qtgui/confgui/confguiindex.cpp
+++ b/src/qtgui/confgui/confguiindex.cpp
@ -331,7 +331,7 @@ bool ConfIndexW::setupWebHistoryPanel(int idx)
 {
    ConfParamW *bparam = m_w->addParam(
        idx, ConfTabsW::CFPT_BOOL, "processwebqueue",
-        tr("Process the WEB history queue"),
+        tr("Process the Web history queue"),
        tr("Enables indexing Firefox visited pages.<br>"
           "(you need also install the Firefox Recoll plugin)"));
    ConfParamW *cparam = m_w->addParam(
@ -353,6 +353,8 @@ bool ConfIndexW::setupWebHistoryPanel(int idx)
           "file (only waste space at the end)."
            ), -1, 1000*1000); // Max 1TB...
    m_w->enableLink(bparam, cparam);
    m_w->addBlurb(idx, tr("Note: old pages will be erased to make space for "
                          "new ones when the maximum size is reached"));
    m_w->endOfList(idx);
    return true;
 }