diff --git a/src/common/webstore.cpp b/src/common/webstore.cpp index 5a070f6d..9361da29 100644 --- a/src/common/webstore.cpp +++ b/src/common/webstore.cpp @@ -37,15 +37,15 @@ WebStore::WebStore(RclConfig *cnf) int maxmbs = 40; cnf->getConfParam("webcachemaxmbs", &maxmbs); if ((m_cache = new CirCache(ccdir)) == 0) { - LOGERR("WebStore: cant create CirCache object\n" ); - return; + LOGERR("WebStore: cant create CirCache object\n" ); + return; } if (!m_cache->create(int64_t(maxmbs)*1000*1024, CirCache::CC_CRUNIQUE)) { - LOGERR("WebStore: cache file creation failed: " << + LOGERR("WebStore: cache file creation failed: " << m_cache->getReason() << "\n"); - delete m_cache; - m_cache = 0; - return; + delete m_cache; + m_cache = 0; + return; } } @@ -57,17 +57,17 @@ WebStore::~WebStore() // Read document from cache. Return the metadata as an Rcl::Doc // @param htt Web Hit Type bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc, - string& data, string *htt) + string& data, string *htt) { string dict; if (m_cache == 0) { - LOGERR("WebStore::getFromCache: cache is null\n"); - return false; + LOGERR("WebStore::getFromCache: cache is null\n"); + return false; } if (!m_cache->get(udi, dict, &data)) { - LOGDEB("WebStore::getFromCache: get failed\n"); - return false; + LOGDEB("WebStore::getFromCache: get failed\n"); + return false; } ConfSimple cf(dict, 1); @@ -89,4 +89,3 @@ bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc, dotdoc.meta[Rcl::Doc::keyudi] = udi; return true; } - diff --git a/src/common/webstore.h b/src/common/webstore.h index a720ba50..70386035 100644 --- a/src/common/webstore.h +++ b/src/common/webstore.h @@ -21,8 +21,8 @@ class RclConfig; namespace Rcl { - class Db; - class Doc; +class Db; +class Doc; } class CirCache; diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index 16885cc8..85405efa 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -140,9 +140,9 @@ alink="#0000FF"> "#RCL.INDEXING.REMOVABLE">Removable volumes
2.5. Unix-like systems: indexing - visited WEB pages
+ visited Web pages
2.6. Unix-like systems: using extended @@ -423,7 +423,7 @@ alink="#0000FF">
@@ -720,7 +720,7 @@ alink="#0000FF">
  • A WEB interface.

    + "_top">Web interface.

  • @@ -1949,10 +1949,10 @@ recollindex -c "$confdir"

    2.5. 2.5. Unix-like systems: indexing - visited WEB pages

    + visited Web pages
    @@ -1964,57 +1964,48 @@ recollindex -c "$confdir" "application">Beagle
    indexer, then adapted to Recoll and the Firefox XUL API. A new version of the addon - has been written to work with the WebExtensions API, which is the only - one supported after Firefox version 57.

    -

    The extension works by copying visited WEB pages to an + "application">XUL API. The current version of the + extension is located in the Mozilla add-ons repository uses the + WebExtensions API, and + works with current Firefox + versions.

    +

    The extension works by copying visited Web pages to an indexing queue directory, which Recoll then processes, indexing the - data, storing it into a local cache, then removing the file - from the queue.

    -

    Because the WebExtensions API introduces more - constraints to what extensions can do, the new version - works with one more step: the files are first created in - the browser default downloads location (typically - $HOME/Downloads ), then moved - by a script in the old queue location. The script is - automatically executed by the Recoll indexer versions 1.23.5 and - newer. It could conceivably be executed independently to - make the new browser extension compatible with an older - Recoll version (the script - is named recoll-we-move-files.py).

    -
    -

    Note

    -

    For the WebExtensions-based version to work, it is - necessary to set the webdownloadsdir value in the - configuration if it was changed from the default - $HOME/Downloads in the - browser preferences.

    -
    -

    The visited WEB pages indexing feature can be enabled on + "application">Recoll then processes, storing the + data into a local cache, then indexing it, then removing + the file from the queue.

    +

    The visited Web pages indexing feature can be enabled on the Recoll side from the GUI Index configuration panel, or by editing the configuration file (set processwebqueue to 1).

    -

    A current pointer to the extension can be found, along - with up-to-date instructions, on the The Recoll GUI has a + tool to list and edit the contents of the Web cache. + (ToolsWebcache editor)

    +

    You can find more details on Web indexing, its usage and + configuration in a Recoll wiki.

    -

    A copy of the indexed WEB pages is retained by Recoll in - a local cache (from which previews can be fetched). The - cache size can be adjusted from the Index configuration / Web history panel. Once the maximum size - is reached, old pages are purged - both from the cache and - the index - to make room for new ones, so you need to - explicitly archive in some other place the pages that you - want to keep indefinitely.

    + target="_top">Recoll 'Howto' entry.

    +
    +

    The cache is not an archive

    +

    A copy of the indexed Web pages is retained by Recoll + in a local cache (from which data is fetched for + previews, or when resetting the index). The cache has a + maximum size, which can be adjusted from the Index configuration / Web history panel (webcachemaxmbs parameter in recoll.conf). Once the maximum size is + reached, old pages are erased to make room for new ones. + The pages which you want to keep indefinitely need to be + explicitly archived elsewhere. Using a very high value + for the cache size can avoid data erasure, but see the + above 'Howto' page for more details and gotchas.

    +
    @@ -3473,14 +3464,14 @@ fs.inotify.max_user_watches=32768 be able to find under /usr/share/recoll/examples/fragbuts.xml), contains an example which filters the results from the - WEB history.

    + Web history.

    Here follows an example:

     <?xml version="1.0" encoding="UTF-8"?>
     <fragbuts version="1.0">
     
       <radiobuttons>
    -    <!-- Actually useful: toggle WEB queue results inclusion -->
    +    <!-- Actually useful: toggle Web queue results inclusion -->
         <fragbut>
           <label>Include Web Results</label>
           <frag></frag>
    @@ -3996,7 +3987,7 @@ fs.inotify.max_user_watches=32768
               given context (e.g. within a preview window, within the
               result table).

    - +

    Table 3.1. Keyboard shortcuts

    @@ -7940,11 +7931,11 @@ hasextract = False "application">FreeBSD ports, etc.), or from some type of "backports" repository providing versions newer than the standard ones, or found on the Recoll WEB site in some cases. The + "application">Recoll Web site in some cases. The most up-to-date information about Recoll packages can usually be found on the Recoll WEB site + "_top">Recoll Web site downloads page

    The Windows version of Recoll comes in a self-contained setup file, there is diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml index 385ead62..9526a320 100644 --- a/src/doc/user/usermanual.xml +++ b/src/doc/user/usermanual.xml @@ -282,7 +282,7 @@ Search Provider. A - WEB interface. + Web interface. @@ -1257,56 +1257,51 @@ recollindex -c "$confdir" - - &LIN;: indexing visited WEB pages + + &LIN;: indexing visited Web pages With the help of a Firefox - extension, &RCL; can index the Internet pages that you visit. The - extension has a long history: it was initially designed for the - Beagle indexer, then adapted to &RCL; and - the Firefox XUL - API. A new version of the addon has been written to work with the - WebExtensions API, which is the only one - supported after Firefox version 57. + extension, &RCL; can index the Internet pages that you visit. The + extension has a long history: it was initially designed for + the Beagle indexer, then adapted to + &RCL; and + the Firefox XUL + API. The current version of the extension is located in + the Mozilla + add-ons repository uses + the WebExtensions API, and works with + current Firefox versions. - The extension works by copying visited WEB pages to an indexing - queue directory, which &RCL; then processes, indexing the data, - storing it into a local cache, then removing the file from the - queue. - - Because the WebExtensions API introduces more constraints to - what extensions can do, the new version works with one - more step: the files are first created in the browser default - downloads location (typically $HOME/Downloads ), - then moved by a script in the old queue location. The script is - automatically executed by the &RCL; indexer versions 1.23.5 and - newer. It could conceivably be executed independently to make the new - browser extension compatible with an older &RCL; version (the script - is named recoll-we-move-files.py). - - For the WebExtensions-based version to work, it is - necessary to set the webdownloadsdir value in the - configuration if it was changed from the default - $HOME/Downloads in the browser - preferences. + The extension works by copying visited Web pages to an indexing + queue directory, which &RCL; then processes, storing the data into a + local cache, then indexing it, then removing the file from the + queue. - The visited WEB pages indexing feature can be enabled on the - &RCL; side from the GUI Index configuration - panel, or by editing the configuration file (set - processwebqueue to 1). + The visited Web pages indexing feature can be enabled on the + &RCL; side from the GUI Index configuration + panel, or by editing the configuration file (set + processwebqueue to 1). - A current pointer to the extension can be found, along with - up-to-date instructions, on the - Recoll wiki. + The &RCL; GUI has a tool to list and edit the contents of the + Web + cache. (ToolsWebcache + editor) - A copy of the indexed WEB pages is retained by Recoll in a - local cache (from which previews can be fetched). The cache size can - be adjusted from the Index configuration / - Web history panel. Once the maximum size - is reached, old pages are purged - both from the cache and the index - - to make room for new ones, so you need to explicitly archive in - some other place the pages that you want to keep - indefinitely. + You can find more details on Web indexing, its usage and configuration + in a Recoll 'Howto' entry. + + The cache is not an archiveA copy of + the indexed Web pages is retained by Recoll in a local cache + (from which data is fetched for previews, or when resetting the + index). The cache has a maximum size, which can be adjusted from + the Index configuration / Web + history panel (webcachemaxmbs + parameter in recoll.conf). Once the maximum + size is reached, old pages are erased to make room for new ones. + The pages which you want to keep indefinitely need to be + explicitly archived elsewhere. Using a very high value for + the cache size can avoid data erasure, but see the above 'Howto' + page for more details and gotchas. @@ -2475,7 +2470,7 @@ fs.inotify.max_user_watches=32768 file inside the configuration directory. The sample file distributed with &RCL; (which you should be able to find under /usr/share/recoll/examples/fragbuts.xml), - contains an example which filters the results from the WEB + contains an example which filters the results from the Web history. @@ -2485,7 +2480,7 @@ fs.inotify.max_user_watches=32768 - + @@ -6115,11 +6110,11 @@ hasextract = False Debian/Ubuntu apt, FreeBSD ports, etc.), or from some type of "backports" repository providing versions newer than the standard - ones, or found on the &RCL; WEB site in some + ones, or found on the &RCL; Web site in some cases. The most up-to-date information about Recoll packages can usually be found on the - Recoll WEB site downloads + Recoll Web site downloads page The &WIN; version of Recoll comes in a self-contained setup diff --git a/src/qtgui/confgui/confguiindex.cpp b/src/qtgui/confgui/confguiindex.cpp index 4f9e7e57..a915b9a4 100644 --- a/src/qtgui/confgui/confguiindex.cpp +++ b/src/qtgui/confgui/confguiindex.cpp @@ -331,7 +331,7 @@ bool ConfIndexW::setupWebHistoryPanel(int idx) { ConfParamW *bparam = m_w->addParam( idx, ConfTabsW::CFPT_BOOL, "processwebqueue", - tr("Process the WEB history queue"), + tr("Process the Web history queue"), tr("Enables indexing Firefox visited pages.
    " "(you need also install the Firefox Recoll plugin)")); ConfParamW *cparam = m_w->addParam( @@ -353,6 +353,8 @@ bool ConfIndexW::setupWebHistoryPanel(int idx) "file (only waste space at the end)." ), -1, 1000*1000); // Max 1TB... m_w->enableLink(bparam, cparam); + m_w->addBlurb(idx, tr("Note: old pages will be erased to make space for " + "new ones when the maximum size is reached")); m_w->endOfList(idx); return true; }