diff --git a/src/common/webstore.cpp b/src/common/webstore.cpp index 5a070f6d..9361da29 100644 --- a/src/common/webstore.cpp +++ b/src/common/webstore.cpp @@ -37,15 +37,15 @@ WebStore::WebStore(RclConfig *cnf) int maxmbs = 40; cnf->getConfParam("webcachemaxmbs", &maxmbs); if ((m_cache = new CirCache(ccdir)) == 0) { - LOGERR("WebStore: cant create CirCache object\n" ); - return; + LOGERR("WebStore: cant create CirCache object\n" ); + return; } if (!m_cache->create(int64_t(maxmbs)*1000*1024, CirCache::CC_CRUNIQUE)) { - LOGERR("WebStore: cache file creation failed: " << + LOGERR("WebStore: cache file creation failed: " << m_cache->getReason() << "\n"); - delete m_cache; - m_cache = 0; - return; + delete m_cache; + m_cache = 0; + return; } } @@ -57,17 +57,17 @@ WebStore::~WebStore() // Read document from cache. Return the metadata as an Rcl::Doc // @param htt Web Hit Type bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc, - string& data, string *htt) + string& data, string *htt) { string dict; if (m_cache == 0) { - LOGERR("WebStore::getFromCache: cache is null\n"); - return false; + LOGERR("WebStore::getFromCache: cache is null\n"); + return false; } if (!m_cache->get(udi, dict, &data)) { - LOGDEB("WebStore::getFromCache: get failed\n"); - return false; + LOGDEB("WebStore::getFromCache: get failed\n"); + return false; } ConfSimple cf(dict, 1); @@ -89,4 +89,3 @@ bool WebStore::getFromCache(const string& udi, Rcl::Doc &dotdoc, dotdoc.meta[Rcl::Doc::keyudi] = udi; return true; } - diff --git a/src/common/webstore.h b/src/common/webstore.h index a720ba50..70386035 100644 --- a/src/common/webstore.h +++ b/src/common/webstore.h @@ -21,8 +21,8 @@ class RclConfig; namespace Rcl { - class Db; - class Doc; +class Db; +class Doc; } class CirCache; diff --git a/src/doc/user/usermanual.html b/src/doc/user/usermanual.html index 16885cc8..85405efa 100644 --- a/src/doc/user/usermanual.html +++ b/src/doc/user/usermanual.html @@ -140,9 +140,9 @@ alink="#0000FF"> "#RCL.INDEXING.REMOVABLE">Removable volumes
List of Tables
The extension works by copying visited WEB pages to an + "application">XUL API. The current version of the + extension is located in the Mozilla add-ons repository uses the + WebExtensions API, and + works with current Firefox + versions.
+The extension works by copying visited Web pages to an indexing queue directory, which Recoll then processes, indexing the - data, storing it into a local cache, then removing the file - from the queue.
-Because the WebExtensions API introduces more
- constraints to what extensions can do, the new version
- works with one more step: the files are first created in
- the browser default downloads location (typically
- $HOME/Downloads ), then moved
- by a script in the old queue location. The script is
- automatically executed by the Recoll indexer versions 1.23.5 and
- newer. It could conceivably be executed independently to
- make the new browser extension compatible with an older
- Recoll version (the script
- is named recoll-we-move-files.py).
For the WebExtensions-based version to work, it is
- necessary to set the webdownloadsdir value in the
- configuration if it was changed from the default
- $HOME/Downloads in the
- browser preferences.
The visited WEB pages indexing feature can be enabled on + "application">Recoll then processes, storing the + data into a local cache, then indexing it, then removing + the file from the queue.
+The visited Web pages indexing feature can be enabled on
the Recoll side from the
GUI Index configuration
panel, or by editing the configuration file (set
processwebqueue to 1).
A current pointer to the extension can be found, along - with up-to-date instructions, on the The Recoll GUI has a + tool to list and edit the contents of the Web cache. + ( → Webcache editor)
+You can find more details on Web indexing, its usage and + configuration in a Recoll wiki.
-A copy of the indexed WEB pages is retained by Recoll in - a local cache (from which previews can be fetched). The - cache size can be adjusted from the Index configuration / Web history panel. Once the maximum size - is reached, old pages are purged - both from the cache and - the index - to make room for new ones, so you need to - explicitly archive in some other place the pages that you - want to keep indefinitely.
+ target="_top">Recoll 'Howto' entry. +A copy of the indexed Web pages is retained by Recoll
+ in a local cache (from which data is fetched for
+ previews, or when resetting the index). The cache has a
+ maximum size, which can be adjusted from the Index configuration / Web history panel (webcachemaxmbs parameter in recoll.conf). Once the maximum size is
+ reached, old pages are erased to make room for new ones.
+ The pages which you want to keep indefinitely need to be
+ explicitly archived elsewhere. Using a very high value
+ for the cache size can avoid data erasure, but see the
+ above 'Howto' page for more details and gotchas.
/usr/share/recoll/examples/fragbuts.xml),
contains an example which filters the results from the
- WEB history.
+ Web history.
Here follows an example:
<?xml version="1.0" encoding="UTF-8"?>
<fragbuts version="1.0">
<radiobuttons>
- <!-- Actually useful: toggle WEB queue results inclusion -->
+ <!-- Actually useful: toggle Web queue results inclusion -->
<fragbut>
<label>Include Web Results</label>
<frag></frag>
@@ -3996,7 +3987,7 @@ fs.inotify.max_user_watches=32768
given context (e.g. within a preview window, within the
result table).
-
+
Table 3.1. Keyboard
shortcuts
@@ -7940,11 +7931,11 @@ hasextract = False
"application">FreeBSD ports, etc.), or from some
type of "backports" repository providing versions newer
than the standard ones, or found on the Recoll WEB site in some cases. The
+ "application">Recoll Web site in some cases. The
most up-to-date information about Recoll packages can
usually be found on the Recoll WEB site
+ "_top">Recoll Web site
downloads page
The Windows version of
Recoll comes in a self-contained setup file, there is
diff --git a/src/doc/user/usermanual.xml b/src/doc/user/usermanual.xml
index 385ead62..9526a320 100644
--- a/src/doc/user/usermanual.xml
+++ b/src/doc/user/usermanual.xml
@@ -282,7 +282,7 @@
Search Provider .
A
- WEB interface .
+ Web interface .
@@ -1257,56 +1257,51 @@ recollindex -c "$confdir"
-
- &LIN;: indexing visited WEB pages
+
+ &LIN;: indexing visited Web pages
With the help of a Firefox
- extension, &RCL; can index the Internet pages that you visit. The
- extension has a long history: it was initially designed for the
- Beagle indexer, then adapted to &RCL; and
- the Firefox XUL
- API. A new version of the addon has been written to work with the
- WebExtensions API, which is the only one
- supported after Firefox version 57.
+ extension, &RCL; can index the Internet pages that you visit. The
+ extension has a long history: it was initially designed for
+ the Beagle indexer, then adapted to
+ &RCL; and
+ the Firefox XUL
+ API. The current version of the extension is located in
+ the Mozilla
+ add-ons repository uses
+ the WebExtensions API, and works with
+ current Firefox versions.
- The extension works by copying visited WEB pages to an indexing
- queue directory, which &RCL; then processes, indexing the data,
- storing it into a local cache, then removing the file from the
- queue.
-
- Because the WebExtensions API introduces more constraints to
- what extensions can do, the new version works with one
- more step: the files are first created in the browser default
- downloads location (typically $HOME/Downloads ),
- then moved by a script in the old queue location. The script is
- automatically executed by the &RCL; indexer versions 1.23.5 and
- newer. It could conceivably be executed independently to make the new
- browser extension compatible with an older &RCL; version (the script
- is named recoll-we-move-files.py ).
-
- For the WebExtensions-based version to work, it is
- necessary to set the webdownloadsdir value in the
- configuration if it was changed from the default
- $HOME/Downloads in the browser
- preferences.
+ The extension works by copying visited Web pages to an indexing
+ queue directory, which &RCL; then processes, storing the data into a
+ local cache, then indexing it, then removing the file from the
+ queue.
- The visited WEB pages indexing feature can be enabled on the
- &RCL; side from the GUI Index configuration
- panel, or by editing the configuration file (set
- processwebqueue to 1).
+ The visited Web pages indexing feature can be enabled on the
+ &RCL; side from the GUI Index configuration
+ panel, or by editing the configuration file (set
+ processwebqueue to 1).
- A current pointer to the extension can be found, along with
- up-to-date instructions, on the
- Recoll wiki .
+ The &RCL; GUI has a tool to list and edit the contents of the
+ Web
+ cache. (Tools Webcache
+ editor )
- A copy of the indexed WEB pages is retained by Recoll in a
- local cache (from which previews can be fetched). The cache size can
- be adjusted from the Index configuration /
- Web history panel. Once the maximum size
- is reached, old pages are purged - both from the cache and the index
- - to make room for new ones, so you need to explicitly archive in
- some other place the pages that you want to keep
- indefinitely.
+ You can find more details on Web indexing, its usage and configuration
+ in a Recoll 'Howto' entry .
+
+ The cache is not an archive A copy of
+ the indexed Web pages is retained by Recoll in a local cache
+ (from which data is fetched for previews, or when resetting the
+ index). The cache has a maximum size, which can be adjusted from
+ the Index configuration / Web
+ history panel (webcachemaxmbs
+ parameter in recoll.conf ). Once the maximum
+ size is reached, old pages are erased to make room for new ones.
+ The pages which you want to keep indefinitely need to be
+ explicitly archived elsewhere. Using a very high value for
+ the cache size can avoid data erasure, but see the above 'Howto'
+ page for more details and gotchas.
@@ -2475,7 +2470,7 @@ fs.inotify.max_user_watches=32768
file inside the configuration directory. The sample file
distributed with &RCL; (which you should be able to find under
/usr/share/recoll/examples/fragbuts.xml ),
- contains an example which filters the results from the WEB
+ contains an example which filters the results from the Web
history.
@@ -2485,7 +2480,7 @@ fs.inotify.max_user_watches=32768
-
+
@@ -6115,11 +6110,11 @@ hasextract = False
Debian/Ubuntu apt ,
FreeBSD ports, etc.), or from some type
of "backports" repository providing versions newer than the standard
- ones, or found on the &RCL; WEB site in some
+ ones, or found on the &RCL; Web site in some
cases. The most up-to-date information about Recoll packages can
usually be found on the
- Recoll WEB site downloads
+ Recoll Web site downloads
page
The &WIN; version of Recoll comes in a self-contained setup
diff --git a/src/qtgui/confgui/confguiindex.cpp b/src/qtgui/confgui/confguiindex.cpp
index 4f9e7e57..a915b9a4 100644
--- a/src/qtgui/confgui/confguiindex.cpp
+++ b/src/qtgui/confgui/confguiindex.cpp
@@ -331,7 +331,7 @@ bool ConfIndexW::setupWebHistoryPanel(int idx)
{
ConfParamW *bparam = m_w->addParam(
idx, ConfTabsW::CFPT_BOOL, "processwebqueue",
- tr("Process the WEB history queue"),
+ tr("Process the Web history queue"),
tr("Enables indexing Firefox visited pages.
"
"(you need also install the Firefox Recoll plugin)"));
ConfParamW *cparam = m_w->addParam(
@@ -353,6 +353,8 @@ bool ConfIndexW::setupWebHistoryPanel(int idx)
"file (only waste space at the end)."
), -1, 1000*1000); // Max 1TB...
m_w->enableLink(bparam, cparam);
+ m_w->addBlurb(idx, tr("Note: old pages will be erased to make space for "
+ "new ones when the maximum size is reached"));
m_w->endOfList(idx);
return true;
}