Add and interface a script to move the files generated by the WebExtensions new browser extension into the web input queue
This commit is contained in:
parent
c123b17f19
commit
5afe1aa631
@ -636,6 +636,7 @@ filters/rclxml.py \
|
|||||||
filters/rclxmp.py \
|
filters/rclxmp.py \
|
||||||
filters/rclxslt.py \
|
filters/rclxslt.py \
|
||||||
filters/rclzip \
|
filters/rclzip \
|
||||||
|
filters/recoll-we-move-files.py \
|
||||||
filters/ppt-dump.py \
|
filters/ppt-dump.py \
|
||||||
filters/xls-dump.py \
|
filters/xls-dump.py \
|
||||||
filters/xlsxmltocsv.py \
|
filters/xlsxmltocsv.py \
|
||||||
@ -645,6 +646,7 @@ python/recoll/recoll/rclconfig.py
|
|||||||
install-data-hook:
|
install-data-hook:
|
||||||
(cd $(DESTDIR)/$(filterdir); \
|
(cd $(DESTDIR)/$(filterdir); \
|
||||||
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
|
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
|
||||||
|
chmod a+x recoll-we-move-files.py; \
|
||||||
chmod 0644 msodump.zip rclexecm.py rcllatinstops.zip rclconfig.py)
|
chmod 0644 msodump.zip rclexecm.py rcllatinstops.zip rclconfig.py)
|
||||||
|
|
||||||
if MAKEUSERDOC
|
if MAKEUSERDOC
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
1.23.4
|
1.23.5
|
||||||
|
|||||||
@ -39,7 +39,7 @@ index.html: usermanual.xml
|
|||||||
usermanual.pdf: usermanual.xml
|
usermanual.pdf: usermanual.xml
|
||||||
dblatex $<
|
dblatex $<
|
||||||
|
|
||||||
UTILBUILDS=/home/dockes/projets/builds/medocutils/
|
UTILBUILDS=/home/dockes/tmp/builds/medocutils/
|
||||||
recoll-conf-xml:
|
recoll-conf-xml:
|
||||||
$(UTILBUILDS)/confxml --docbook \
|
$(UTILBUILDS)/confxml --docbook \
|
||||||
--idprefix=RCL.INSTALL.CONFIG.RECOLLCONF \
|
--idprefix=RCL.INSTALL.CONFIG.RECOLLCONF \
|
||||||
|
|||||||
@ -83,7 +83,7 @@ be ignored inside zip archives. This is used directly by
|
|||||||
the zip handler, and has a function similar to skippedNames, but works
|
the zip handler, and has a function similar to skippedNames, but works
|
||||||
independantly. Can be redefined for subdirectories. Supported by recoll
|
independantly. Can be redefined for subdirectories. Supported by recoll
|
||||||
1.20 and newer. See
|
1.20 and newer. See
|
||||||
https://www.lesbonscomptes.com/recoll/faqsandhowtos/Filtering%20out%20Zip%20archive%20members
|
https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
||||||
</para></listitem></varlistentry>
|
</para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FOLLOWLINKS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FOLLOWLINKS">
|
||||||
<term><varname>followLinks</varname></term>
|
<term><varname>followLinks</varname></term>
|
||||||
@ -362,9 +362,17 @@ Default: 40 MB.
|
|||||||
Reducing the size will not physically truncate the file.</para></listitem></varlistentry>
|
Reducing the size will not physically truncate the file.</para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR">
|
||||||
<term><varname>webqueuedir</varname></term>
|
<term><varname>webqueuedir</varname></term>
|
||||||
<listitem><para>The path to the Web indexing queue. This is
|
<listitem><para>The path to the Web indexing queue. This used to be
|
||||||
hard-coded in the plugin as ~/.recollweb/ToIndex so there should be no
|
hard-coded in the old plugin as ~/.recollweb/ToIndex so there would be no
|
||||||
need or possibility to change it.</para></listitem></varlistentry>
|
need or possibility to change it, but the WebExtensions plugin now downloads
|
||||||
|
the files to the user Downloads directory, and a script moves them to
|
||||||
|
webqueuedir. The script reads this value from the config so it has become
|
||||||
|
possible to change it.</para></listitem></varlistentry>
|
||||||
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBDOWNLOADSDIR">
|
||||||
|
<term><varname>webdownloadsdir</varname></term>
|
||||||
|
<listitem><para>The path to browser downloads directory. This is
|
||||||
|
where the new browser add-on extension has to create the files. They are
|
||||||
|
then moved by a script to webqueuedir.</para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLDICDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLDICDIR">
|
||||||
<term><varname>aspellDicDir</varname></term>
|
<term><varname>aspellDicDir</varname></term>
|
||||||
<listitem><para>Aspell dictionary storage directory location. The
|
<listitem><para>Aspell dictionary storage directory location. The
|
||||||
|
|||||||
@ -131,8 +131,8 @@ alink="#0000FF">
|
|||||||
</dl>
|
</dl>
|
||||||
</dd>
|
</dd>
|
||||||
<dt><span class="sect1">2.4. <a href=
|
<dt><span class="sect1">2.4. <a href=
|
||||||
"#RCL.INDEXING.WEBQUEUE">Indexing WEB pages you
|
"#RCL.INDEXING.WEBQUEUE">Indexing the WEB pages which
|
||||||
wisit</a></span></dt>
|
you wisit.</a></span></dt>
|
||||||
<dt><span class="sect1">2.5. <a href=
|
<dt><span class="sect1">2.5. <a href=
|
||||||
"#RCL.INDEXING.EXTATTR">Extended attributes
|
"#RCL.INDEXING.EXTATTR">Extended attributes
|
||||||
data</a></span></dt>
|
data</a></span></dt>
|
||||||
@ -1505,27 +1505,56 @@ thrQSizes = -1 -1 -1
|
|||||||
<div>
|
<div>
|
||||||
<h2 class="title" style="clear: both"><a name=
|
<h2 class="title" style="clear: both"><a name=
|
||||||
"RCL.INDEXING.WEBQUEUE" id=
|
"RCL.INDEXING.WEBQUEUE" id=
|
||||||
"RCL.INDEXING.WEBQUEUE"></a>2.4.<2E>Indexing WEB pages
|
"RCL.INDEXING.WEBQUEUE"></a>2.4.<2E>Indexing the WEB
|
||||||
you wisit</h2>
|
pages which you wisit.</h2>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<p>With the help of a <span class=
|
<p>With the help of a <span class=
|
||||||
"application">Firefox</span> extension, <span class=
|
"application">Firefox</span> extension, <span class=
|
||||||
"application">Recoll</span> can index the Internet pages
|
"application">Recoll</span> can index the Internet pages
|
||||||
that you visit. The extension was initially designed for
|
that you visit. The extension has a long history: it was
|
||||||
the <span class="application">Beagle</span> indexer, but it
|
initially designed for the <span class=
|
||||||
has recently be renamed and better adapted to <span class=
|
"application">Beagle</span> indexer, then adapted to
|
||||||
"application">Recoll</span>.</p>
|
<span class="application">Recoll</span> and the
|
||||||
|
<span class="application">Firefox</span> <span class=
|
||||||
|
"application">XUL</span> API. A new version of the addon
|
||||||
|
has been written to work with the <span class=
|
||||||
|
"application">WebExtensions</span> API, which is the only
|
||||||
|
one supported after <span class=
|
||||||
|
"application">Firefox</span> version 57.</p>
|
||||||
<p>The extension works by copying visited WEB pages to an
|
<p>The extension works by copying visited WEB pages to an
|
||||||
indexing queue directory, which <span class=
|
indexing queue directory, which <span class=
|
||||||
"application">Recoll</span> then processes, indexing the
|
"application">Recoll</span> then processes, indexing the
|
||||||
data, storing it into a local cache, then removing the file
|
data, storing it into a local cache, then removing the file
|
||||||
from the queue.</p>
|
from the queue.</p>
|
||||||
<p>This feature can be enabled in the GUI <span class=
|
<p>Because the WebExtensions API introduces more
|
||||||
"guilabel">Index configuration</span> panel, or by editing
|
constraints to what extensions can do, the new version
|
||||||
the configuration file (set <code class=
|
works with one more step: the files are first created in
|
||||||
"varname">processwebqueue</code> to 1).</p>
|
the browser default downloads location (typically
|
||||||
|
<code class="filename">$HOME/Downloads</code> ), then moved
|
||||||
|
by a script in the old queue location. The script is
|
||||||
|
automatically executed by the <span class=
|
||||||
|
"application">Recoll</span> indexer versions 1.23.5 and
|
||||||
|
newer. It could conceivably be executed independantly to
|
||||||
|
make the new browser extension compatible with an older
|
||||||
|
<span class="application">Recoll</span> version (the script
|
||||||
|
is named <span class=
|
||||||
|
"command"><strong>recoll-we-move-files.py</strong></span>).</p>
|
||||||
|
<div class="note" style=
|
||||||
|
"margin-left: 0.5in; margin-right: 0.5in;">
|
||||||
|
<h3 class="title">Note</h3>
|
||||||
|
<p>For the WebExtensions-based version to work, it is
|
||||||
|
necessary to set the <code class=
|
||||||
|
"literal">webdownloadsdir</code> value in the
|
||||||
|
configuration if it was changed from the default
|
||||||
|
<code class="filename">$HOME/Downloads</code> in the
|
||||||
|
browser preferences.</p>
|
||||||
|
</div>
|
||||||
|
<p>The visited WEB pages indexing feature can be enabled in
|
||||||
|
the GUI <span class="guilabel">Index configuration</span>
|
||||||
|
panel, or by editing the configuration file (set
|
||||||
|
<code class="varname">processwebqueue</code> to 1).</p>
|
||||||
<p>A current pointer to the extension can be found, along
|
<p>A current pointer to the extension can be found, along
|
||||||
with up-to-date instructions, on the <a class="ulink" href=
|
with up-to-date instructions, on the <a class="ulink" href=
|
||||||
"https://www.lesbonscomptes.com/recoll/faqsandhowtos/IndexWebHistory"
|
"https://www.lesbonscomptes.com/recoll/faqsandhowtos/IndexWebHistory"
|
||||||
@ -1570,8 +1599,10 @@ thrQSizes = -1 -1 -1
|
|||||||
the file MIME type.</p>
|
the file MIME type.</p>
|
||||||
</dd>
|
</dd>
|
||||||
<dt><span class="term">charset</span></dt>
|
<dt><span class="term">charset</span></dt>
|
||||||
<dd>If set, this defines the file character set (mostly
|
<dd>
|
||||||
useful for plain text files).</dd>
|
<p>If set, this defines the file character set
|
||||||
|
(mostly useful for plain text files).</p>
|
||||||
|
</dd>
|
||||||
</dl>
|
</dl>
|
||||||
</div>
|
</div>
|
||||||
<p>By default, other attributes are handled as <span class=
|
<p>By default, other attributes are handled as <span class=
|
||||||
@ -7854,7 +7885,7 @@ thesame = "some string with spaces"
|
|||||||
function similar to skippedNames, but works
|
function similar to skippedNames, but works
|
||||||
independantly. Can be redefined for subdirectories.
|
independantly. Can be redefined for subdirectories.
|
||||||
Supported by recoll 1.20 and newer. See
|
Supported by recoll 1.20 and newer. See
|
||||||
https://www.lesbonscomptes.com/recoll/faqsandhowtos/Filtering%20out%20Zip%20archive%20members</p>
|
https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html</p>
|
||||||
</dd>
|
</dd>
|
||||||
<dt>
|
<dt>
|
||||||
<a name="RCL.INSTALL.CONFIG.RECOLLCONF.FOLLOWLINKS"
|
<a name="RCL.INSTALL.CONFIG.RECOLLCONF.FOLLOWLINKS"
|
||||||
@ -8370,10 +8401,25 @@ thesame = "some string with spaces"
|
|||||||
"RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR"></a><span class="term"><code class="varname">webqueuedir</code></span>
|
"RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR"></a><span class="term"><code class="varname">webqueuedir</code></span>
|
||||||
</dt>
|
</dt>
|
||||||
<dd>
|
<dd>
|
||||||
<p>The path to the Web indexing queue. This is
|
<p>The path to the Web indexing queue. This used to
|
||||||
hard-coded in the plugin as ~/.recollweb/ToIndex so
|
be hard-coded in the old plugin as
|
||||||
there should be no need or possibility to change
|
~/.recollweb/ToIndex so there would be no need or
|
||||||
it.</p>
|
possibility to change it, but the WebExtensions
|
||||||
|
plugin now downloads the files to the user
|
||||||
|
Downloads directory, and a script moves them to
|
||||||
|
webqueuedir. The script reads this value from the
|
||||||
|
config so it has become possible to change it.</p>
|
||||||
|
</dd>
|
||||||
|
<dt>
|
||||||
|
<a name=
|
||||||
|
"RCL.INSTALL.CONFIG.RECOLLCONF.WEBDOWNLOADSDIR" id=
|
||||||
|
"RCL.INSTALL.CONFIG.RECOLLCONF.WEBDOWNLOADSDIR"></a><span class="term"><code class="varname">webdownloadsdir</code></span>
|
||||||
|
</dt>
|
||||||
|
<dd>
|
||||||
|
<p>The path to browser downloads directory. This is
|
||||||
|
where the new browser add-on extension has to
|
||||||
|
create the files. They are then moved by a script
|
||||||
|
to webqueuedir.</p>
|
||||||
</dd>
|
</dd>
|
||||||
<dt>
|
<dt>
|
||||||
<a name=
|
<a name=
|
||||||
|
|||||||
@ -965,27 +965,46 @@ thrQSizes = -1 -1 -1
|
|||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="RCL.INDEXING.WEBQUEUE">
|
<sect1 id="RCL.INDEXING.WEBQUEUE">
|
||||||
<title>Indexing WEB pages you wisit</title>
|
<title>Indexing the WEB pages which you wisit.</title>
|
||||||
|
|
||||||
<para>With the help of a <application>Firefox</application>
|
<para>With the help of a <application>Firefox</application>
|
||||||
extension, &RCL; can index the Internet pages that you visit. The
|
extension, &RCL; can index the Internet pages that you visit. The
|
||||||
extension was initially designed for the
|
extension has a long history: it was initially designed for the
|
||||||
<application>Beagle</application> indexer, but it has recently be
|
<application>Beagle</application> indexer, then adapted to &RCL; and
|
||||||
renamed and better adapted to &RCL;.</para>
|
the <application>Firefox</application> <application>XUL</application>
|
||||||
|
API. A new version of the addon has been written to work with the
|
||||||
|
<application>WebExtensions</application> API, which is the only one
|
||||||
|
supported after <application>Firefox</application> version 57.</para>
|
||||||
|
|
||||||
<para>The extension works by copying visited WEB pages to an indexing
|
<para>The extension works by copying visited WEB pages to an indexing
|
||||||
queue directory, which &RCL; then processes, indexing the data,
|
queue directory, which &RCL; then processes, indexing the data,
|
||||||
storing it into a local cache, then removing the file from the
|
storing it into a local cache, then removing the file from the
|
||||||
queue.</para>
|
queue.</para>
|
||||||
|
|
||||||
<para>This feature can be enabled in the GUI
|
<para>Because the WebExtensions API introduces more constraints to
|
||||||
<guilabel>Index configuration</guilabel>
|
what extensions can do, the new version works with one
|
||||||
panel, or by editing the configuration file (set
|
more step: the files are first created in the browser default
|
||||||
<varname>processwebqueue</varname> to 1).</para>
|
downloads location (typically <filename>$HOME/Downloads</filename> ),
|
||||||
|
then moved by a script in the old queue location. The script is
|
||||||
|
automatically executed by the &RCL; indexer versions 1.23.5 and
|
||||||
|
newer. It could conceivably be executed independantly to make the new
|
||||||
|
browser extension compatible with an older &RCL; version (the script
|
||||||
|
is named <command>recoll-we-move-files.py</command>).</para>
|
||||||
|
|
||||||
|
<note><para>For the WebExtensions-based version to work, it is
|
||||||
|
necessary to set the <literal>webdownloadsdir</literal> value in the
|
||||||
|
configuration if it was changed from the default
|
||||||
|
<filename>$HOME/Downloads</filename> in the browser
|
||||||
|
preferences.</para></note>
|
||||||
|
|
||||||
|
<para>The visited WEB pages indexing feature can be enabled in the
|
||||||
|
GUI <guilabel>Index configuration</guilabel> panel, or by editing the
|
||||||
|
configuration file (set <varname>processwebqueue</varname> to
|
||||||
|
1).</para>
|
||||||
|
|
||||||
<para>A current pointer to the extension can be found, along with
|
<para>A current pointer to the extension can be found, along with
|
||||||
up-to-date instructions, on the
|
up-to-date instructions, on the
|
||||||
<ulink url="&FAQS;IndexWebHistory">Recoll wiki</ulink>.</para>
|
<ulink url="&FAQS;IndexWebHistory">Recoll wiki</ulink>.</para>
|
||||||
|
|
||||||
<para>A copy of the indexed WEB pages is retained by Recoll in a
|
<para>A copy of the indexed WEB pages is retained by Recoll in a
|
||||||
local cache (from which previews can be fetched). The cache size can
|
local cache (from which previews can be fetched). The cache size can
|
||||||
@ -1020,8 +1039,8 @@ thrQSizes = -1 -1 -1
|
|||||||
</varlistentry>
|
</varlistentry>
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term>charset</term>
|
<term>charset</term>
|
||||||
<listitem>If set, this defines the file character set
|
<listitem><para>If set, this defines the file character set
|
||||||
(mostly useful for plain text files).</listitem>
|
(mostly useful for plain text files).</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</para>
|
</para>
|
||||||
|
|||||||
@ -42,8 +42,10 @@ try:
|
|||||||
except:
|
except:
|
||||||
import rclconfig
|
import rclconfig
|
||||||
|
|
||||||
|
verbosity = 0
|
||||||
def logdeb(s):
|
def logdeb(s):
|
||||||
print("%s"%s, file=sys.stderr)
|
if verbosity >= 4:
|
||||||
|
print("%s"%s, file=sys.stderr)
|
||||||
|
|
||||||
# # wnloaded instances of the same page are suffixed with (nn) by the
|
# # wnloaded instances of the same page are suffixed with (nn) by the
|
||||||
# browser. We are passed a list of (hash, instancenum, filename)
|
# browser. We are passed a list of (hash, instancenum, filename)
|
||||||
@ -94,8 +96,12 @@ def usage():
|
|||||||
print("Usage: recoll-we-move-files.py [<downloaddir>]", file=sys.stderr)
|
print("Usage: recoll-we-move-files.py [<downloaddir>]", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Source dir is parameter, else default Downloads directory
|
config = rclconfig.RclConfig()
|
||||||
downloadsdir = os.path.expanduser("~/Downloads")
|
|
||||||
|
# Source dir is parameter, else from config else default Downloads directory
|
||||||
|
downloadsdir = config.getConfParam("webdownloadsdir")
|
||||||
|
if not downloadsdir:
|
||||||
|
downloadsdir = os.path.expanduser("~/Downloads")
|
||||||
if len(sys.argv) == 2:
|
if len(sys.argv) == 2:
|
||||||
mydir = sys.argv[1]
|
mydir = sys.argv[1]
|
||||||
elif len(sys.argv) == 1:
|
elif len(sys.argv) == 1:
|
||||||
@ -106,12 +112,13 @@ if not os.path.isdir(mydir):
|
|||||||
usage()
|
usage()
|
||||||
|
|
||||||
# Get target webqueue recoll directory from recoll configuration
|
# Get target webqueue recoll directory from recoll configuration
|
||||||
config = rclconfig.RclConfig()
|
|
||||||
webqueuedir = config.getConfParam("webqueuedir")
|
webqueuedir = config.getConfParam("webqueuedir")
|
||||||
if not webqueuedir:
|
if not webqueuedir:
|
||||||
webqueuedir = "~/.recollweb/ToIndex"
|
webqueuedir = "~/.recollweb/ToIndex"
|
||||||
webqueuedir = os.path.expanduser(webqueuedir)
|
webqueuedir = os.path.expanduser(webqueuedir)
|
||||||
logdeb("webqueuedir is %s" % webqueuedir)
|
os.makedirs(webqueuedir, exist_ok = True)
|
||||||
|
|
||||||
|
# logdeb("webqueuedir is %s" % webqueuedir)
|
||||||
|
|
||||||
# Get the lists of all files created by the browser addon
|
# Get the lists of all files created by the browser addon
|
||||||
mfiles, cfiles = list_all_files(mydir)
|
mfiles, cfiles = list_all_files(mydir)
|
||||||
@ -130,7 +137,9 @@ cfiles = delete_previous_instances(cfiles, downloadsdir)
|
|||||||
for hash in cfiles.keys():
|
for hash in cfiles.keys():
|
||||||
if hash in mfiles.keys():
|
if hash in mfiles.keys():
|
||||||
newname = "firefox-recoll-web-"+hash
|
newname = "firefox-recoll-web-"+hash
|
||||||
shutil.move(cfiles[hash], os.path.join(webqueuedir, newname))
|
shutil.move(os.path.join(downloadsdir, cfiles[hash]),
|
||||||
shutil.move(mfiles[hash], os.path.join(webqueuedir, "." + newname))
|
os.path.join(webqueuedir, newname))
|
||||||
|
shutil.move(os.path.join(downloadsdir, mfiles[hash]),
|
||||||
|
os.path.join(webqueuedir, "." + newname))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -125,6 +125,7 @@ bool ConfIndexer::index(bool resetbefore, ixType typestorun, int flags)
|
|||||||
}
|
}
|
||||||
#ifndef DISABLE_WEB_INDEXER
|
#ifndef DISABLE_WEB_INDEXER
|
||||||
if (m_doweb && (typestorun & IxTWebQueue)) {
|
if (m_doweb && (typestorun & IxTWebQueue)) {
|
||||||
|
runWebFilesMoverScript(m_config);
|
||||||
deleteZ(m_webindexer);
|
deleteZ(m_webindexer);
|
||||||
m_webindexer = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
m_webindexer = new BeagleQueueIndexer(m_config, &m_db, m_updater);
|
||||||
if (!m_webindexer || !m_webindexer->index()) {
|
if (!m_webindexer || !m_webindexer->index()) {
|
||||||
|
|||||||
@ -464,15 +464,22 @@ bool startMonitor(RclConfig *conf, int opts)
|
|||||||
bool timedout;
|
bool timedout;
|
||||||
time_t lastauxtime = time(0);
|
time_t lastauxtime = time(0);
|
||||||
time_t lastixtime = lastauxtime;
|
time_t lastixtime = lastauxtime;
|
||||||
|
time_t lastmovetime = 0;
|
||||||
bool didsomething = false;
|
bool didsomething = false;
|
||||||
list<string> modified;
|
list<string> modified;
|
||||||
list<string> deleted;
|
list<string> deleted;
|
||||||
|
|
||||||
;
|
|
||||||
|
|
||||||
// Set a relatively short timeout for better monitoring of exit requests
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
time_t now = time(0);
|
||||||
|
if (now - lastmovetime > ixinterval) {
|
||||||
|
lastmovetime = now;
|
||||||
|
runWebFilesMoverScript(conf);
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
// Wait for event or timeout.
|
||||||
|
// Set a relatively short timeout for better monitoring of
|
||||||
|
// exit requests.
|
||||||
std::unique_lock<std::mutex> lock = rclEQ.wait(2, &timedout);
|
std::unique_lock<std::mutex> lock = rclEQ.wait(2, &timedout);
|
||||||
|
|
||||||
// x11IsAlive() can't be called from ok() because both
|
// x11IsAlive() can't be called from ok() because both
|
||||||
@ -525,9 +532,9 @@ bool startMonitor(RclConfig *conf, int opts)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
now = time(0);
|
||||||
// Process. We don't do this every time but let the lists accumulate
|
// Process. We don't do this every time but let the lists accumulate
|
||||||
// a little, this saves processing. Start at once if list is big.
|
// a little, this saves processing. Start at once if list is big.
|
||||||
time_t now = time(0);
|
|
||||||
if (expeditedIndexingRequested(conf) ||
|
if (expeditedIndexingRequested(conf) ||
|
||||||
(now - lastixtime > ixinterval) ||
|
(now - lastixtime > ixinterval) ||
|
||||||
(deleted.size() + modified.size() > 20)) {
|
(deleted.size() + modified.size() > 20)) {
|
||||||
@ -553,8 +560,9 @@ bool startMonitor(RclConfig *conf, int opts)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Recreate the auxiliary dbs every hour at most.
|
// Recreate the auxiliary dbs every hour at most.
|
||||||
if (didsomething && time(0) - lastauxtime > auxinterval) {
|
now = time(0);
|
||||||
lastauxtime = time(0);
|
if (didsomething && now - lastauxtime > auxinterval) {
|
||||||
|
lastauxtime = now;
|
||||||
didsomething = false;
|
didsomething = false;
|
||||||
if (!createAuxDbs(conf)) {
|
if (!createAuxDbs(conf)) {
|
||||||
// We used to bail out on error here. Not anymore,
|
// We used to bail out on error here. Not anymore,
|
||||||
|
|||||||
@ -337,6 +337,48 @@ static bool checktopdirs(RclConfig *config, vector<string>& nonexist)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool runWebFilesMoverScript(RclConfig *config)
|
||||||
|
{
|
||||||
|
static string downloadsdir;
|
||||||
|
if (downloadsdir.empty()) {
|
||||||
|
if (!config->getConfParam("downloadsdir", downloadsdir)) {
|
||||||
|
downloadsdir = path_tildexpand("~/Downloads");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static string cmdpath;
|
||||||
|
if (cmdpath.empty()) {
|
||||||
|
cmdpath = config->findFilter("recoll-we-move-files.py");
|
||||||
|
if (cmdpath.empty()) {
|
||||||
|
LOGERR("runWFMoverScript: recoll-we-move-files.py not found\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Arrange to not actually run the script if the directory did not change */
|
||||||
|
static time_t dirmtime;
|
||||||
|
time_t ndirmtime = 0;
|
||||||
|
struct stat st;
|
||||||
|
if (::stat(downloadsdir.c_str(), &st) == 0) {
|
||||||
|
ndirmtime = st.st_mtime;
|
||||||
|
}
|
||||||
|
/* If stat fails, presumably Downloads does not exist or is not
|
||||||
|
accessible, dirmtime and mdirmtime stay at 0, and we never
|
||||||
|
execute the script, which is the right thing. */
|
||||||
|
if (dirmtime != ndirmtime) {
|
||||||
|
/* The script is going to change the directory, so updating
|
||||||
|
dirmtime before it runs means that we are going to execute
|
||||||
|
it one time too many (it will run without doing anything),
|
||||||
|
but we can't set the mtime to after the run in case files
|
||||||
|
are created during the run. */
|
||||||
|
dirmtime = ndirmtime;
|
||||||
|
ExecCmd cmd;
|
||||||
|
int status = cmd.doexec(cmdpath, {});
|
||||||
|
return status == 0;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static const char *thisprog;
|
static const char *thisprog;
|
||||||
|
|
||||||
static const char usage [] =
|
static const char usage [] =
|
||||||
|
|||||||
@ -26,6 +26,16 @@ extern bool indexfiles(RclConfig *config, std::list<std::string> &filenames);
|
|||||||
extern bool purgefiles(RclConfig *config, std::list<std::string> &filenames);
|
extern bool purgefiles(RclConfig *config, std::list<std::string> &filenames);
|
||||||
extern bool createAuxDbs(RclConfig *config);
|
extern bool createAuxDbs(RclConfig *config);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method for executing the recoll-we (new WebExtensions plugin) helper
|
||||||
|
* script. This moves files from the browser download directory (only
|
||||||
|
* place where the browser accepts to create them), to the web queue
|
||||||
|
* dir). This keeps the c++ code compatible with old and new addon.
|
||||||
|
* The script is executed before a batch pass, or from time to time in
|
||||||
|
* the monitor, if web processing is enabled.
|
||||||
|
*/
|
||||||
|
extern bool runWebFilesMoverScript(RclConfig *);
|
||||||
|
|
||||||
extern int stopindexing;
|
extern int stopindexing;
|
||||||
|
|
||||||
// Try to explain what went wrong...
|
// Try to explain what went wrong...
|
||||||
|
|||||||
@ -438,6 +438,13 @@ webcachemaxmbs = 40
|
|||||||
# possible to change it.</descr></var>
|
# possible to change it.</descr></var>
|
||||||
#webqueuedir = ~/.recollweb/ToIndex
|
#webqueuedir = ~/.recollweb/ToIndex
|
||||||
|
|
||||||
|
# <var name="webdownloadsdir" type="fn">
|
||||||
|
#
|
||||||
|
# <brief>The path to browser downloads directory.</brief><descr>This is
|
||||||
|
# where the new browser add-on extension has to create the files. They are
|
||||||
|
# then moved by a script to webqueuedir.</descr></var>
|
||||||
|
#webdownloadsdir = ~/Downloads
|
||||||
|
|
||||||
# <var name="aspellDicDir" type="dfn">
|
# <var name="aspellDicDir" type="dfn">
|
||||||
#
|
#
|
||||||
# <brief>Aspell dictionary storage directory location.</brief> <descr>The
|
# <brief>Aspell dictionary storage directory location.</brief> <descr>The
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user