added adaptor script for new browser plugin
This commit is contained in:
parent
efcb4e0947
commit
cd44aa33e1
136
src/filters/recoll-we-move-files.py
Executable file
136
src/filters/recoll-we-move-files.py
Executable file
@ -0,0 +1,136 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (C) 2017 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
""" Monitor download directory for pages created by the recoll-we extension.
|
||||||
|
|
||||||
|
The files (content file and metadata side file) are then renamed and
|
||||||
|
moved to the Recoll web queue directory.
|
||||||
|
|
||||||
|
We recognize recoll files by their pattern:
|
||||||
|
recoll-we-[cm]-<md5>.rclwe, and the fact that both a d and an m file
|
||||||
|
must exist. While not absolutely foolproof, this should be quite robust.
|
||||||
|
|
||||||
|
The script is normally executed by recollindex at appropriate times,
|
||||||
|
but it can also be run by hand.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
try:
|
||||||
|
from hashlib import md5 as md5
|
||||||
|
except:
|
||||||
|
import md5
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
try:
|
||||||
|
from recoll import rclconfig
|
||||||
|
except:
|
||||||
|
import rclconfig
|
||||||
|
|
||||||
|
def logdeb(s):
|
||||||
|
print("%s"%s, file=sys.stderr)
|
||||||
|
|
||||||
|
# # wnloaded instances of the same page are suffixed with (nn) by the
|
||||||
|
# browser. We are passed a list of (hash, instancenum, filename)
|
||||||
|
# triplets, sort it, and keep only the latest file.
|
||||||
|
def delete_previous_instances(l, downloadsdir):
|
||||||
|
l.sort(key = lambda e: "%s-%05d"%(e[0], e[1]), reverse=True)
|
||||||
|
ret = {}
|
||||||
|
i = 0
|
||||||
|
while i < len(l):
|
||||||
|
hash,num,fn = l[i]
|
||||||
|
logdeb("Found %s"%fn)
|
||||||
|
ret[hash] = fn
|
||||||
|
j = 1
|
||||||
|
while i + j < len(l):
|
||||||
|
if l[i+j][0] == hash:
|
||||||
|
ofn = l[i+j][2]
|
||||||
|
logdeb("Deleting %s"%ofn)
|
||||||
|
os.unlink(os.path.join(downloadsdir, ofn))
|
||||||
|
j += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
i += j
|
||||||
|
return ret
|
||||||
|
|
||||||
|
fn_re = re.compile('''recoll-we-([mc])-([0-9a-f]+)(\([0-9]+\))?\.rclwe''')
|
||||||
|
|
||||||
|
def list_all_files(dir):
|
||||||
|
files=os.listdir(dir)
|
||||||
|
mfiles = []
|
||||||
|
cfiles = []
|
||||||
|
for fn in files:
|
||||||
|
mo = fn_re.match(fn)
|
||||||
|
if mo:
|
||||||
|
mc = mo.group(1)
|
||||||
|
hash = mo.group(2)
|
||||||
|
num = mo.group(3)
|
||||||
|
if not num:
|
||||||
|
num = "(0)"
|
||||||
|
num = int(num.strip("()"))
|
||||||
|
if mc == 'm':
|
||||||
|
mfiles.append([hash, num, fn])
|
||||||
|
else:
|
||||||
|
cfiles.append([hash, num, fn])
|
||||||
|
return mfiles,cfiles
|
||||||
|
|
||||||
|
#######################
|
||||||
|
def usage():
|
||||||
|
print("Usage: recoll-we-move-files.py [<downloaddir>]", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Source dir is parameter, else default Downloads directory
|
||||||
|
downloadsdir = os.path.expanduser("~/Downloads")
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
mydir = sys.argv[1]
|
||||||
|
elif len(sys.argv) == 1:
|
||||||
|
mydir = downloadsdir
|
||||||
|
else:
|
||||||
|
usage()
|
||||||
|
if not os.path.isdir(mydir):
|
||||||
|
usage()
|
||||||
|
|
||||||
|
# Get target webqueue recoll directory from recoll configuration
|
||||||
|
config = rclconfig.RclConfig()
|
||||||
|
webqueuedir = config.getConfParam("webqueuedir")
|
||||||
|
if not webqueuedir:
|
||||||
|
webqueuedir = "~/.recollweb/ToIndex"
|
||||||
|
webqueuedir = os.path.expanduser(webqueuedir)
|
||||||
|
logdeb("webqueuedir is %s" % webqueuedir)
|
||||||
|
|
||||||
|
# Get the lists of all files created by the browser addon
|
||||||
|
mfiles, cfiles = list_all_files(mydir)
|
||||||
|
|
||||||
|
# Only keep the last version
|
||||||
|
mfiles = delete_previous_instances(mfiles, downloadsdir)
|
||||||
|
cfiles = delete_previous_instances(cfiles, downloadsdir)
|
||||||
|
|
||||||
|
#logdeb("Mfiles: %s"% mfiles)
|
||||||
|
#logdeb("Cfiles: %s"% cfiles)
|
||||||
|
|
||||||
|
# Move files to webqueuedir target directory
|
||||||
|
# The webextensions plugin creates the metadata files first. So it may
|
||||||
|
# happen that a data file is missing, keep them for next pass.
|
||||||
|
# The old plugin created the data first, so we move data then meta
|
||||||
|
for hash in cfiles.keys():
|
||||||
|
if hash in mfiles.keys():
|
||||||
|
newname = "firefox-recoll-web-"+hash
|
||||||
|
shutil.move(cfiles[hash], os.path.join(webqueuedir, newname))
|
||||||
|
shutil.move(mfiles[hash], os.path.join(webqueuedir, "." + newname))
|
||||||
|
|
||||||
|
|
||||||
@ -430,9 +430,12 @@ webcachemaxmbs = 40
|
|||||||
|
|
||||||
# <var name="webqueuedir" type="fn">
|
# <var name="webqueuedir" type="fn">
|
||||||
#
|
#
|
||||||
# <brief>The path to the Web indexing queue.</brief><descr>This is
|
# <brief>The path to the Web indexing queue.</brief><descr>This used to be
|
||||||
# hard-coded in the plugin as ~/.recollweb/ToIndex so there should be no
|
# hard-coded in the old plugin as ~/.recollweb/ToIndex so there would be no
|
||||||
# need or possibility to change it.</descr></var>
|
# need or possibility to change it, but the WebExtensions plugin now downloads
|
||||||
|
# the files to the user Downloads directory, and a script moves them to
|
||||||
|
# webqueuedir. The script reads this value from the config so it has become
|
||||||
|
# possible to change it.</descr></var>
|
||||||
#webqueuedir = ~/.recollweb/ToIndex
|
#webqueuedir = ~/.recollweb/ToIndex
|
||||||
|
|
||||||
# <var name="aspellDicDir" type="dfn">
|
# <var name="aspellDicDir" type="dfn">
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user