Compare commits
301 Commits
2a92200012
...
87256b6a69
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
87256b6a69 | ||
|
|
04335a11d7 | ||
|
|
63f9e278da | ||
|
|
394264a165 | ||
|
|
21fedc1fb4 | ||
|
|
06cd2bfd87 | ||
|
|
b1b0a41973 | ||
|
|
5087447ef6 | ||
|
|
20c3a7ed12 | ||
|
|
9e0018034c | ||
|
|
cbcefc1517 | ||
|
|
2882acaa43 | ||
|
|
bd758648d2 | ||
|
|
7bf224df10 | ||
|
|
69ebfd3d95 | ||
|
|
4fe4a34949 | ||
|
|
d682348b7f | ||
|
|
e2901a5506 | ||
|
|
cadfabc3b1 | ||
|
|
5fc0066a34 | ||
|
|
78183a8e00 | ||
|
|
34003390f2 | ||
|
|
dcbfe05296 | ||
|
|
7b5a87df38 | ||
|
|
30bf5df03e | ||
|
|
84159dd54a | ||
|
|
c96d29e3a3 | ||
|
|
6aa5ef9064 | ||
|
|
c86cb9438b | ||
|
|
27f4d1f321 | ||
|
|
e0e1811e83 | ||
|
|
422d24e94e | ||
|
|
8deed63be6 | ||
|
|
35753d17d0 | ||
|
|
c218d5bbcc | ||
|
|
db8961740e | ||
|
|
c1bec06711 | ||
|
|
3e3711da51 | ||
|
|
5608a8f79c | ||
|
|
758f468e3a | ||
|
|
32b74f2ac2 | ||
|
|
d00ea4e420 | ||
|
|
565a1115f7 | ||
|
|
bed57aeaf6 | ||
|
|
708a57de1e | ||
|
|
a052278846 | ||
|
|
489e88c87d | ||
|
|
02a4a69482 | ||
|
|
b8e062f66c | ||
|
|
bf46e6ca0e | ||
|
|
16467900bd | ||
|
|
dab46b67fe | ||
|
|
4ad7c71ff0 | ||
|
|
44530f277b | ||
|
|
c746d044c4 | ||
|
|
a7d86cb64c | ||
|
|
64a72be069 | ||
|
|
cebef3cadf | ||
|
|
8bd38a858d | ||
|
|
426fee8a86 | ||
|
|
3404c2f726 | ||
|
|
ea133d7d60 | ||
|
|
7d028e7ca9 | ||
|
|
18c7c54bbf | ||
|
|
225081563d | ||
|
|
a72045e8a3 | ||
|
|
a1501359e3 | ||
|
|
38fa35402a | ||
|
|
00f29bf3ed | ||
|
|
d920fd377a | ||
|
|
45324033b3 | ||
|
|
6bb1eec06b | ||
|
|
b50bdc5cab | ||
|
|
e43e223ef8 | ||
|
|
2e53989d8f | ||
|
|
5a3b366911 | ||
|
|
4954c1d855 | ||
|
|
34e88e587d | ||
|
|
5ef20d0d69 | ||
|
|
9b06d05be7 | ||
|
|
70723a5280 | ||
|
|
4f66e7688d | ||
|
|
ae04d0692c | ||
|
|
6c777f8fe7 | ||
|
|
3560b9e3b0 | ||
|
|
e7074329b5 | ||
|
|
ab0aba8c36 | ||
|
|
03f82fa115 | ||
|
|
7ceb9c6837 | ||
|
|
96ebb369a5 | ||
|
|
2831993e19 | ||
|
|
acc7cdc189 | ||
|
|
6f7f1238f7 | ||
|
|
8b129f6058 | ||
|
|
389daa134c | ||
|
|
eb96263ab9 | ||
|
|
9a7561517f | ||
|
|
3071ea203e | ||
|
|
5325c0a533 | ||
|
|
6575476d47 | ||
|
|
435620dbb1 | ||
|
|
45f387e791 | ||
|
|
5b20e64e96 | ||
|
|
e6596cb26d | ||
|
|
4e8cc666f0 | ||
|
|
8bd1494fd3 | ||
|
|
d127fbc738 | ||
|
|
08aea6a95a | ||
|
|
a6a2abd251 | ||
|
|
e87d7f0683 | ||
|
|
42925fc238 | ||
|
|
f1b0db7226 | ||
|
|
1be54c2709 | ||
|
|
b9de1ed07e | ||
|
|
be12db218b | ||
|
|
fbfa818a3a | ||
|
|
9466b86b7b | ||
|
|
f4a5fe38c6 | ||
|
|
e34ccb69b8 | ||
|
|
638450c8d4 | ||
|
|
c8362269f4 | ||
|
|
64bff3310c | ||
|
|
51df3e834b | ||
|
|
5437c803ae | ||
|
|
0250e64b5a | ||
|
|
28fa7387ec | ||
|
|
2ebb0a689d | ||
|
|
be758e2c94 | ||
|
|
baf0df0c7e | ||
|
|
8cec995573 | ||
|
|
d08a59dd98 | ||
|
|
6fdb19700b | ||
|
|
48c447bd48 | ||
|
|
f2b24cf22d | ||
|
|
561592b618 | ||
|
|
ad2db6cd1c | ||
|
|
6643f48a7f | ||
|
|
7ef02a3b09 | ||
|
|
4ebffd1a31 | ||
|
|
adde2e5d4e | ||
|
|
8dacf0769a | ||
|
|
26c4c778a4 | ||
|
|
4a566b1540 | ||
|
|
e892d0db44 | ||
|
|
c323e1468d | ||
|
|
63d1318d6b | ||
|
|
3bbe5069d6 | ||
|
|
53b8b7e237 | ||
|
|
f201f5ee1b | ||
|
|
64080df169 | ||
|
|
77024d3f6a | ||
|
|
03149a57b4 | ||
|
|
11865812a4 | ||
|
|
064018ea69 | ||
|
|
4ca4bf21db | ||
|
|
5666ec568a | ||
|
|
b49696653b | ||
|
|
7692007939 | ||
|
|
1420533ceb | ||
|
|
32195d0899 | ||
|
|
44c0ec93e4 | ||
|
|
d1db0380f2 | ||
|
|
19ffb9b557 | ||
|
|
67e158b7f4 | ||
|
|
b5b48e4f50 | ||
|
|
fdb18a7c4b | ||
|
|
fbcf0f42e9 | ||
|
|
497b61e017 | ||
|
|
266967bea7 | ||
|
|
b118c93b4f | ||
|
|
f0efed545d | ||
|
|
0673f0f03e | ||
|
|
b51f9efcf7 | ||
|
|
87f86ac268 | ||
|
|
754a5b87a7 | ||
|
|
265bbd0c6e | ||
|
|
47ea12353a | ||
|
|
844b4e8b03 | ||
|
|
8b3792026f | ||
|
|
667e661c46 | ||
|
|
3f79ec7bd4 | ||
|
|
4c3ef66018 | ||
|
|
1acc059eea | ||
|
|
a777995946 | ||
|
|
6eb4f3681d | ||
|
|
d2d2cbff14 | ||
|
|
4e710f239a | ||
|
|
d883500c17 | ||
|
|
3a9d7f7cb6 | ||
|
|
03378c55a4 | ||
|
|
9eebfd24ec | ||
|
|
f8e556fcee | ||
|
|
7b42907441 | ||
|
|
25d6d78902 | ||
|
|
8c122d7de8 | ||
|
|
71ec75c2cc | ||
|
|
b3bb3784fc | ||
|
|
a7a66ebc03 | ||
|
|
bb06a94a52 | ||
|
|
10f5240b7c | ||
|
|
1f422eb023 | ||
|
|
d55decef20 | ||
|
|
b07ae087d9 | ||
|
|
078675730c | ||
|
|
0b6aa25dd4 | ||
|
|
3a5c1a0b6b | ||
|
|
4696d80cec | ||
|
|
15cc242e1f | ||
|
|
50160f3fa7 | ||
|
|
ddb0caddc1 | ||
|
|
213589f00f | ||
|
|
c51f6ee10d | ||
|
|
1072f88f11 | ||
|
|
5fcffb7654 | ||
|
|
c37765524d | ||
|
|
e121695a3c | ||
|
|
d942b23c85 | ||
|
|
1593b1d87f | ||
|
|
60538af5de | ||
|
|
2b1948d2c6 | ||
|
|
fd15540895 | ||
|
|
fb1876dfd0 | ||
|
|
5ea81045ef | ||
|
|
02c9a6f3f7 | ||
|
|
5f2716e628 | ||
|
|
756a944ef3 | ||
|
|
f813c1356e | ||
|
|
ffd01adec2 | ||
|
|
c750beb26a | ||
|
|
868643d3a2 | ||
|
|
1d96034869 | ||
|
|
65ff46990e | ||
|
|
996a8fd5b8 | ||
|
|
25e3f39f81 | ||
|
|
cdaa86b9a0 | ||
|
|
2f6a34d2b4 | ||
|
|
83b337fc50 | ||
|
|
310d28a4a1 | ||
|
|
2c60e382f8 | ||
|
|
58a6d2bd70 | ||
|
|
62252daffe | ||
|
|
43f2692d1d | ||
|
|
9e2e73a995 | ||
|
|
a1e98c1bdc | ||
|
|
728129e5ce | ||
|
|
a24fc7bacc | ||
|
|
4cc0bc90b6 | ||
|
|
e3e270fe81 | ||
|
|
fa790f52de | ||
|
|
00085e70a3 | ||
|
|
58d98b5626 | ||
|
|
1d158f329a | ||
|
|
1f92478daf | ||
|
|
1a20debf56 | ||
|
|
0226988817 | ||
|
|
3ea7c0fe2a | ||
|
|
d00c300c03 | ||
|
|
8a98635c3a | ||
|
|
687022d2ae | ||
|
|
7b81c16ea0 | ||
|
|
c182f13c96 | ||
|
|
80146437d8 | ||
|
|
ffcd856353 | ||
|
|
6dadb83951 | ||
|
|
a7626487cc | ||
|
|
6e148c164a | ||
|
|
dc0e3ef04a | ||
|
|
f7f693f437 | ||
|
|
b5013b41e1 | ||
|
|
feb2a3ec59 | ||
|
|
02fe696406 | ||
|
|
7c93f3a057 | ||
|
|
832f30dc69 | ||
|
|
bad5d41439 | ||
|
|
4aeb9a8b78 | ||
|
|
7179e0dbf8 | ||
|
|
f20c189fa5 | ||
|
|
31348b5470 | ||
|
|
2e6b25b1a8 | ||
|
|
bce780be88 | ||
|
|
9d3869c2b9 | ||
|
|
ae3480eab4 | ||
|
|
d5060a132c | ||
|
|
6b22d38a3e | ||
|
|
275a1210e0 | ||
|
|
aae1f07a9f | ||
|
|
3cd8506ff8 | ||
|
|
ce0352eff4 | ||
|
|
f91185fd53 | ||
|
|
03e277c0c9 | ||
|
|
b77ca3fe56 | ||
|
|
9437522d3c | ||
|
|
3df83ec982 | ||
|
|
a67dd3f8a3 | ||
|
|
03c1e4ce8a | ||
|
|
e6055681d4 | ||
|
|
ae8fd0902a | ||
|
|
9d32773e84 | ||
|
|
00987509f8 | ||
|
|
3f1cc004b8 | ||
|
|
a1f78ba57c |
5
.gitignore
vendored
5
.gitignore
vendored
@ -24,8 +24,9 @@ build-*-Debug
|
||||
build-*-Release
|
||||
libtool
|
||||
ptrans
|
||||
**/Makefile.in
|
||||
src/Makefile
|
||||
src/Makefile.in
|
||||
src/rclgrep/Makefile
|
||||
src/TAGS
|
||||
src/aclocal.m4
|
||||
src/autom4te.cache
|
||||
@ -77,7 +78,6 @@ src/recollq
|
||||
src/sampleconf/rclmon.sh
|
||||
src/sampleconf/recoll.conf
|
||||
src/testmains/Makefile
|
||||
src/testmains/Makefile.in
|
||||
src/xadump
|
||||
stamp-h1
|
||||
tests/casediac/aspdict.en.rws
|
||||
@ -103,6 +103,7 @@ tests/indexedmimetypes/missing
|
||||
tests/indexedmimetypes/recoll.conf
|
||||
tests/indexedmimetypes/xapiandb
|
||||
tests/xattr/mimeview
|
||||
unac/autom4te.cache
|
||||
website/faqsandhowtos/*.html
|
||||
website/idxthreads/forkingRecoll.html
|
||||
website/idxthreads/xapDocCopyCrash.html
|
||||
|
||||
@ -14,8 +14,8 @@ share/pixmaps/recoll.png
|
||||
%%DATADIR%%/filters/hotrecoll.py
|
||||
%%DATADIR%%/filters/rclabw
|
||||
%%DATADIR%%/filters/rclaptosidman
|
||||
%%DATADIR%%/filters/rclaudio
|
||||
%%DATADIR%%/filters/rclchm
|
||||
%%DATADIR%%/filters/rclaudio.py
|
||||
%%DATADIR%%/filters/rclchm.py
|
||||
%%DATADIR%%/filters/rcldjvu
|
||||
%%DATADIR%%/filters/rcldoc
|
||||
%%DATADIR%%/filters/rcldvi
|
||||
@ -23,11 +23,11 @@ share/pixmaps/recoll.png
|
||||
%%DATADIR%%/filters/rclfb2
|
||||
%%DATADIR%%/filters/rclflac
|
||||
%%DATADIR%%/filters/rclgaim
|
||||
%%DATADIR%%/filters/rclics
|
||||
%%DATADIR%%/filters/rclics.py
|
||||
%%DATADIR%%/filters/rclid3
|
||||
%%DATADIR%%/filters/rclimg
|
||||
%%DATADIR%%/filters/rclinfo
|
||||
%%DATADIR%%/filters/rclkar
|
||||
%%DATADIR%%/filters/rclinfo.py
|
||||
%%DATADIR%%/filters/rclkar.py
|
||||
%%DATADIR%%/filters/rclkwd
|
||||
%%DATADIR%%/filters/rcllatinclass.py
|
||||
%%DATADIR%%/filters/rcllatinstops.zip
|
||||
@ -41,7 +41,7 @@ share/pixmaps/recoll.png
|
||||
%%DATADIR%%/filters/rclps
|
||||
%%DATADIR%%/filters/rclpurple
|
||||
%%DATADIR%%/filters/rclpython
|
||||
%%DATADIR%%/filters/rclrar
|
||||
%%DATADIR%%/filters/rclrar.py
|
||||
%%DATADIR%%/filters/rclrtf
|
||||
%%DATADIR%%/filters/rclscribus
|
||||
%%DATADIR%%/filters/rclshowinfo
|
||||
@ -51,11 +51,11 @@ share/pixmaps/recoll.png
|
||||
%%DATADIR%%/filters/rcltex
|
||||
%%DATADIR%%/filters/rcltext
|
||||
%%DATADIR%%/filters/rcluncomp
|
||||
%%DATADIR%%/filters/rclwar
|
||||
%%DATADIR%%/filters/rclwar.py
|
||||
%%DATADIR%%/filters/rclwpd
|
||||
%%DATADIR%%/filters/rclxls
|
||||
%%DATADIR%%/filters/rclzip
|
||||
%%DATADIR%%/filters/rcl7z
|
||||
%%DATADIR%%/filters/rclzip.py
|
||||
%%DATADIR%%/filters/rcl7z.py
|
||||
%%DATADIR%%/filters/xdg-open
|
||||
%%DATADIR%%/images/aptosid-book.png
|
||||
%%DATADIR%%/images/aptosid-manual.png
|
||||
|
||||
@ -5,30 +5,30 @@
|
||||
# sudo apt-get install pkg-kde-tools cdbs
|
||||
|
||||
# Active series:
|
||||
# 16.04LTS xenial 2021-04
|
||||
# 18.04LTS bionic 2023-04
|
||||
# 20.04LTS focal 2025-04
|
||||
# 20.10 groovy 2021-07
|
||||
# 21.04 hirsute 2022-01
|
||||
# 22.04LTS jammy 2027-04
|
||||
SERIES="bionic focal jammy kinetic"
|
||||
|
||||
PPA_KEYID=7808CE96D38B9201
|
||||
|
||||
RCLVERS=1.31.0
|
||||
RCLVERS=1.33.1
|
||||
SCOPEVERS=1.20.2.4
|
||||
GSSPVERS=1.1.0
|
||||
GSSPVERS=1.1.1
|
||||
PPAVERS=1
|
||||
|
||||
#
|
||||
RCLSRC=/y/home/dockes/projets/fulltext/recoll/src
|
||||
SCOPESRC=/y/home/dockes/projets/fulltext/unity-scope-recoll
|
||||
GSSPSRC=/y/home/dockes/projets/fulltext/gssp-recoll
|
||||
RCLDOWNLOAD=/y/home/dockes/projets/lesbonscomptes/recoll
|
||||
#
|
||||
#Y=/y
|
||||
Y=
|
||||
RCLSRC=${Y}/home/dockes/projets/fulltext/recoll/src
|
||||
SCOPESRC=${Y}/home/dockes/projets/fulltext/unity-scope-recoll
|
||||
GSSPSRC=${Y}/home/dockes/projets/fulltext/gssp-recoll
|
||||
RCLDOWNLOAD=${Y}/home/dockes/projets/lesbonscomptes/recoll
|
||||
|
||||
PPANAME=recoll15-ppa
|
||||
PPANAME=recollexp1-ppa
|
||||
#PPANAME=recoll-webengine-ppa
|
||||
|
||||
case $RCLVERS in
|
||||
[23]*) PPANAME=recollexp-ppa;;
|
||||
*) PPANAME=recoll15-ppa;;
|
||||
esac
|
||||
#PPANAME=recollexp-ppa
|
||||
echo "PPA: $PPANAME. Type CR if Ok, else ^C"
|
||||
read rep
|
||||
|
||||
@ -49,8 +49,8 @@ check_recoll_orig()
|
||||
|
||||
####### QT
|
||||
debdir=debian
|
||||
series="bionic focal groovy hirsute"
|
||||
series=
|
||||
series=$SERIES
|
||||
#series=bionic
|
||||
|
||||
if test "X$series" != X ; then
|
||||
check_recoll_orig
|
||||
@ -77,7 +77,7 @@ for series in $series ; do
|
||||
-e s/PPAVERS/${PPAVERS}/g \
|
||||
< ${debdir}/changelog > recoll-${RCLVERS}/debian/changelog
|
||||
|
||||
(cd recoll-${RCLVERS};debuild -k$PPA_KEYID -S -sa) || break
|
||||
(cd recoll-${RCLVERS};debuild -d -k$PPA_KEYID -S -sa) || break
|
||||
|
||||
dput $PPANAME recoll_${RCLVERS}-1~ppa${PPAVERS}~${series}1_source.changes
|
||||
done
|
||||
@ -85,8 +85,8 @@ done
|
||||
|
||||
|
||||
### KIO.
|
||||
series="bionic focal groovy hirsute"
|
||||
#series=
|
||||
series=$SERIES
|
||||
series=
|
||||
|
||||
debdir=debiankio
|
||||
topdir=kio-recoll-${RCLVERS}
|
||||
@ -125,7 +125,7 @@ for svers in $series ; do
|
||||
done
|
||||
|
||||
### GSSP
|
||||
series="bionic focal groovy hirsute"
|
||||
series=$SERIES
|
||||
series=
|
||||
|
||||
debdir=debiangssp
|
||||
|
||||
@ -1,3 +1,169 @@
|
||||
recoll (1.33.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Small updates to the build files to accomodate the new rclgrep utility.
|
||||
* New textunknownasplain configuration variable to index all files with no known association
|
||||
but identified as text/xxx by the "file" or "xdg-mime" command.
|
||||
* Make sure that a single double-quoted word is not stem-expanded (act as if it was
|
||||
capitalized). Expanding a quoted term is unexpected.
|
||||
* Apply stemming to terms containing a single dash. These were not expanded before.
|
||||
* Linux real time: fix monitoring under topdirs members which are symbolic links.
|
||||
* Fix the GUI simple search which was broken in 1.33.0 when switching filters on/off
|
||||
* Exclude Tamil characters from unac processing (experimental for now).
|
||||
* Windows GUI directory side filters: the computed paths were wrong on Windows.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 25 Sep 2022 19:19:00 +0200
|
||||
|
||||
recoll (1.33.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Query processing: simplify queries a bit more before sending to Xapian, allows using OP_FILTER
|
||||
for path filtering. -> Medium version bump.
|
||||
* GUI: allow specifying a fixed geometry for the results list viewport by setting
|
||||
RECOLL_RESULTS_GEOMETRY=widthxheight . For people with fixed-width result formats CSS.
|
||||
* recollq: add option to extract a result document into a file.
|
||||
* Replace application/x-flac with audio/flac for FLAC audio files.
|
||||
* Fix web queue processing for non-default configuration directories.
|
||||
* Fix encoding issue in pdf attachment extraction.
|
||||
* GUI: result list: fix issue with webengine builds not displaying Icons. Paging still not working
|
||||
right with webengine (QTBUG-105842). Main builds revert/remain to webkit.
|
||||
* Misc. small adjustments.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 30 Aug 2022 10:59:00 +0200
|
||||
|
||||
recoll (1.32.8-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Add environment variable RECOLL_RESULTS_GEOMETRY for forcing a fixed geometry to the results
|
||||
list viewport.
|
||||
* Fix result list Qt Webengine icon display issue.
|
||||
* Improve result list paging behaviour. Only fully works with Qt Webkit.
|
||||
* recollq: add option to extract result document to a file.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 21 Aug 2022 07:59:00 +0200
|
||||
|
||||
recoll (1.32.7-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* kio_recoll: updated to work with newer kf5 versions (it seems that 5.96 broke it at least on
|
||||
arch linux).
|
||||
* rclaudio: fix extracting comment fields from flac files.
|
||||
* Python code preview: get rid of spurious encoding value output.
|
||||
* Fix glitch in Qt GUI when between list and table display.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 07 Aug 2022 17:42:00 +0200
|
||||
|
||||
recoll (1.32.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* GUI: switch to using Qt-Webengine instead of Qt-Webkit because of CSS support issues in Webkit.
|
||||
* GUI: result list paragraph format. Preserve unquoted % characters if there is no matching
|
||||
translation. USer manual: document the need to quote % as %% anyway.
|
||||
* GUI: result list devel/debug. Add parameter to dump the HTML sent to the engine.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 05 Jul 2022 09:56:00 +0200
|
||||
|
||||
recoll (1.32.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Try to improve font size consistency by using px sizes everywhere.
|
||||
* Fix Increase/Decrease font size menu options.
|
||||
* Allow displaying line numbers in snippets.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Wed, 29 Jun 2022 09:36:00 +0200
|
||||
|
||||
recoll (1.32.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Fix issues when opening a file with %F (parent of subdocument): avoid creating a temporary file
|
||||
when this can be avoided.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 21 Jun 2022 20:51:00 +0200
|
||||
|
||||
recoll (1.32.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Restore result list popup menu function when using webengine. This was broken in 1.32.1.
|
||||
* Show progress dialog when result list abstracts generation takes too long
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 14 Jun 2022 07:51:00 +0200
|
||||
|
||||
recoll (1.32.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* GUI side panel filters: make sure the filter is applied even if set before
|
||||
the query.
|
||||
* GUI side panel directory filter: compute the tree from the index, not the
|
||||
file system, to allow filtering data from external indexes. Update the tree
|
||||
when an indexing completes.
|
||||
* Implement whole UI scaling factor (fonts only, no icons).
|
||||
* Orgmode: add orgmodesubdocs configuration variable to decide if we index
|
||||
whole files or create subdocuments for nodes. Also index text before the
|
||||
first heading.
|
||||
* GUI: fix path translation for importing an index from Windows.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Fri, 20 May 2022 10:55:00 +0200
|
||||
|
||||
recoll (1.32.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* GUI: added a collapsible side pane for quick filtering on dates or
|
||||
directories.
|
||||
* Showing duplicates now uses a spreadsheet like the result table for
|
||||
easy access to the duplicate files.
|
||||
* Fixed the temporary copy open dialog (again!).
|
||||
* The default mimeview and mimeconf configuration files were
|
||||
separated into generic and system-specific parts to avoid update
|
||||
errors (no consequences for users).
|
||||
* Renamed all Python input handler with a .py extension. This is
|
||||
relied on Windows rather than listing an explicit python
|
||||
interpreter.
|
||||
* Added %l specification to viewer definitions for opening at a
|
||||
specific line.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Fri, 11 Mar 2022 18:17:00 +0100
|
||||
|
||||
recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Almost no change: translation files update.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
|
||||
|
||||
recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Fix ennoying bug in tesseract OCR temporary files cleanup.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 04 Dec 2021 10:05:00 +0100
|
||||
|
||||
recoll (1.31.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Linux/Mac: Bug in threads management could result in index corruption or crash
|
||||
after signal interrupt.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Thu, 25 Nov 2021 16:30:00 +0100
|
||||
|
||||
recoll (1.31.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Highligthing for group (phrase/near): eliminate some spurious matches.
|
||||
* Fix page number string detection which could sometimes prevent correct
|
||||
highlighting in snippets.
|
||||
* Avoid query completer consuming excessive resources on unstripped
|
||||
indexes.
|
||||
* Fix some cases where different instances of the indexer could use
|
||||
different pid/lock files.
|
||||
* Fix processing on some unicode dash and apos character variations.
|
||||
* PST: fix indexing in marginal cases. Extract message dates.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 13 Nov 2021 16:30:00 +0100
|
||||
|
||||
recoll (1.31.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Add support for .ipynb iPython/Jupyter notebook format.
|
||||
* Implement Alt+/ shortcut to search the menu entries and possibly execute the result.
|
||||
* Fix configuration GUI button margins on Mac OS.
|
||||
* Add *.pyc __pycache__ .pytest_cache .tox and .direnv to the default skipped names list.
|
||||
* Add /opt/homebrew/bin to the helper search path when built under Mac Homebrew.
|
||||
* Linux: let recollindex adjust its OOM killer "badness" on startup.
|
||||
* simple search: add Ctrl+H as keyboard shortcut for "show history".
|
||||
* Renamed the fragment buttons configuration file from fragbuts.xml to fragment-buttons.xml.
|
||||
* Zip archives: set the modification date attribute for members.
|
||||
* ost/pst filter: fix not fetching the message dates.
|
||||
* Anchored searches: remove unwarranted slack increase. The anchor term should behave like a
|
||||
normal one for slack computations.
|
||||
* Fix djvu issues on Windows.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 11 Oct 2021 10:51:00 +0200
|
||||
|
||||
recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* GUI: modified shortcuts were not read from the preferences !
|
||||
|
||||
@ -3,18 +3,19 @@ Section: x11
|
||||
Priority: optional
|
||||
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
|
||||
Build-Depends: bison,
|
||||
debhelper (>= 9),
|
||||
debhelper (>= 10),
|
||||
dh-python,
|
||||
dpkg-dev (>= 1.16.1~),
|
||||
libaspell-dev,
|
||||
libchm-dev,
|
||||
# qtwebengine5-dev,
|
||||
libqt5webkit5-dev,
|
||||
libx11-dev,
|
||||
libxapian-dev (>= 1.2.0),
|
||||
libxslt1-dev,
|
||||
libz-dev,
|
||||
pkg-config,
|
||||
python-all-dev (>= 2.6.6-3~),
|
||||
python2-dev (>= 2.6.6-3~),
|
||||
python-setuptools,
|
||||
python3-all-dev,
|
||||
python3-setuptools,
|
||||
|
||||
115
packaging/debian/debian/control-bionic
Normal file
115
packaging/debian/debian/control-bionic
Normal file
@ -0,0 +1,115 @@
|
||||
Source: recoll
|
||||
Section: x11
|
||||
Priority: optional
|
||||
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
|
||||
Build-Depends: bison,
|
||||
debhelper (>= 9),
|
||||
dh-python,
|
||||
dpkg-dev (>= 1.16.1~),
|
||||
libaspell-dev,
|
||||
libchm-dev,
|
||||
libqt5webkit5-dev,
|
||||
# qtwebengine5-dev,
|
||||
libx11-dev,
|
||||
libxapian-dev (>= 1.2.0),
|
||||
libxslt1-dev,
|
||||
libz-dev,
|
||||
pkg-config,
|
||||
python-all-dev (>= 2.6.6-3~),
|
||||
python-setuptools,
|
||||
python3-all-dev,
|
||||
python3-setuptools,
|
||||
qtbase5-dev
|
||||
X-Python3-Version: >= 3.4
|
||||
Vcs-Git: https://salsa.debian.org/debian/recoll.git
|
||||
Vcs-Browser: https://salsa.debian.org/debian/recoll
|
||||
Homepage: https://www.lesbonscomptes.com/recoll
|
||||
Standards-Version: 4.2.1
|
||||
|
||||
Package: recoll
|
||||
Architecture: all
|
||||
Depends: recollcmd, recollgui, ${misc:Depends}
|
||||
Description: Personal full text search package
|
||||
This package is a personal full text search package is based on a very strong
|
||||
backend (Xapian), for which it provides an easy to use and feature-rich
|
||||
interface.
|
||||
.
|
||||
Features:
|
||||
* Qt-based GUI.
|
||||
* Supports the following document types (and their compressed versions)
|
||||
- Natively: text, html, OpenOffice files, excel, ppt, maildir and
|
||||
mailbox (Mozilla and IceDove mail) with attachments, pidgin log files
|
||||
- With external helpers: pdf (pdftotext), postscript (ghostscript), msword
|
||||
(antiword), rtf (unrtf). And others...
|
||||
* Powerful query facilities, with boolean searches, phrases, filter on file
|
||||
types and directory tree.
|
||||
* Support for multiple charsets, Internal processing and storage uses Unicode
|
||||
UTF-8.
|
||||
* Stemming performed at query time (can switch stemming language after
|
||||
indexing).
|
||||
* Easy installation. No database daemon, web server or exotic language
|
||||
necessary.
|
||||
* The indexer can run either continuously or in batch.
|
||||
|
||||
Package: recollcmd
|
||||
Architecture: any
|
||||
Breaks: recoll (<< 1.23.7-2)
|
||||
Replaces: recoll (<< 1.23.7-2)
|
||||
Depends: python3, ${misc:Depends}, ${shlibs:Depends}
|
||||
Recommends: antiword,
|
||||
aspell,
|
||||
groff,
|
||||
libimage-exiftool-perl,
|
||||
poppler-utils,
|
||||
python3-lxml,
|
||||
python3-recoll,
|
||||
python3-six,
|
||||
python3-mutagen,
|
||||
python3-rarfile,
|
||||
unrtf,
|
||||
unzip,
|
||||
xdg-utils
|
||||
Suggests: ghostscript,
|
||||
libinotifytools0,
|
||||
untex,
|
||||
wv
|
||||
Description: Command line programs for recoll
|
||||
This package supports indexing and command line querying.
|
||||
|
||||
Package: recollgui
|
||||
Architecture: any
|
||||
Breaks: recoll (<< 1.23.7-2)
|
||||
Replaces: recoll (<< 1.23.7-2)
|
||||
Depends: recollcmd (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends}
|
||||
Description: GUI program and elements for recoll
|
||||
Main recoll GUI for configuring, controlling and querying recoll indexes.
|
||||
|
||||
Package: python-recoll
|
||||
Architecture: any
|
||||
Section: python
|
||||
Depends: python2,
|
||||
recollcmd (= ${binary:Version}),
|
||||
${misc:Depends},
|
||||
${python:Depends},
|
||||
${shlibs:Depends}
|
||||
Description: Python extension for recoll
|
||||
Personal full text search package which is based on a very strong backend
|
||||
(Xapian), for which it provides an easy to use and feature-rich interface.
|
||||
.
|
||||
This package provides Python extension module for recoll which can be use to
|
||||
extend recoll such as an Ubuntu Unity Lens.
|
||||
|
||||
Package: python3-recoll
|
||||
Architecture: any
|
||||
Section: python
|
||||
Depends: python3,
|
||||
recollcmd (= ${binary:Version}),
|
||||
${misc:Depends},
|
||||
${python3:Depends},
|
||||
${shlibs:Depends}
|
||||
Description: Python extension for recoll (Python3)
|
||||
Personal full text search package which is based on a very strong backend
|
||||
(Xapian), for which it provides an easy to use and feature-rich interface.
|
||||
.
|
||||
This package provides Python3 extension module for recoll which can be use to
|
||||
extend recoll such as an Ubuntu Unity Lens.
|
||||
@ -1,5 +1,5 @@
|
||||
diff --git a/internfile/mh_mbox.cpp b/srcinternfile/mh_mbox.cpp
|
||||
index 2a0918cf..92ad7e23 100644
|
||||
diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp
|
||||
index c77d42c8..ccd6a613 100644
|
||||
--- a/internfile/mh_mbox.cpp
|
||||
+++ b/internfile/mh_mbox.cpp
|
||||
@@ -27,6 +27,7 @@
|
||||
@ -19,19 +19,25 @@ index 2a0918cf..92ad7e23 100644
|
||||
int msgnum{0}; // Current message number in folder. Starts at 1
|
||||
int64_t lineno{0}; // debug
|
||||
int64_t fsize{0};
|
||||
@@ -321,7 +322,6 @@ void MimeHandlerMbox::clear_impl()
|
||||
{
|
||||
@@ -322,13 +323,6 @@ void MimeHandlerMbox::clear_impl()
|
||||
m->fn.erase();
|
||||
m->ipath.erase();
|
||||
- m->instream = ifstream();
|
||||
|
||||
- // We used to use m->instream = ifstream() which fails with some compilers, as the copy
|
||||
- // constructor is marked deleted in standard c++ (works with many compilers though).
|
||||
- if (m->instream.is_open()) {
|
||||
- m->instream.close();
|
||||
- }
|
||||
- m->instream.clear();
|
||||
-
|
||||
m->msgnum = 0;
|
||||
m->lineno = 0;
|
||||
m->fsize = 0;
|
||||
@@ -339,8 +339,9 @@ bool MimeHandlerMbox::set_document_file_impl(const string&, const string &fn)
|
||||
@@ -346,8 +340,9 @@ bool MimeHandlerMbox::set_document_file_impl(const string&, const string &fn)
|
||||
LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n");
|
||||
clear_impl();
|
||||
m->fn = fn;
|
||||
- m->instream = ifstream(fn.c_str(), std::ifstream::binary);
|
||||
- m->instream.open(fn.c_str(), std::ifstream::binary);
|
||||
- if (!m->instream.good()) {
|
||||
+ m->instream = std::unique_ptr<ifstream>(
|
||||
+ new ifstream(fn.c_str(), std::ifstream::binary));
|
||||
@ -39,7 +45,7 @@ index 2a0918cf..92ad7e23 100644
|
||||
LOGSYSERR("MimeHandlerMail::set_document_file", "ifstream", fn);
|
||||
return false;
|
||||
}
|
||||
@@ -389,13 +390,13 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||
@@ -396,13 +391,13 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||
fsize)) < 0) {
|
||||
goto out;
|
||||
}
|
||||
@ -57,7 +63,7 @@ index 2a0918cf..92ad7e23 100644
|
||||
LOGSYSERR("tryUseCache", "getline", "");
|
||||
goto out;
|
||||
}
|
||||
@@ -404,7 +405,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||
@@ -411,7 +406,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||
if ((fromregex(line) ||
|
||||
((quirks & MBOXQUIRK_TBIRD) && minifromregex(line))) ) {
|
||||
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
|
||||
@ -66,7 +72,7 @@ index 2a0918cf..92ad7e23 100644
|
||||
msgnum = mtarg -1;
|
||||
cachefound = true;
|
||||
} else {
|
||||
@@ -414,7 +415,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||
@@ -421,7 +416,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||
out:
|
||||
if (!cachefound) {
|
||||
// No cached result: scan.
|
||||
@ -75,7 +81,7 @@ index 2a0918cf..92ad7e23 100644
|
||||
msgnum = 0;
|
||||
}
|
||||
return cachefound;
|
||||
@@ -422,7 +423,7 @@ out:
|
||||
@@ -429,7 +424,7 @@ out:
|
||||
|
||||
bool MimeHandlerMbox::next_document()
|
||||
{
|
||||
@ -84,7 +90,7 @@ index 2a0918cf..92ad7e23 100644
|
||||
LOGERR("MimeHandlerMbox::next_document: not open\n");
|
||||
return false;
|
||||
}
|
||||
@@ -458,10 +459,10 @@ bool MimeHandlerMbox::next_document()
|
||||
@@ -465,10 +460,10 @@ bool MimeHandlerMbox::next_document()
|
||||
msgtxt.erase();
|
||||
string line;
|
||||
for (;;) {
|
||||
|
||||
@ -21,10 +21,11 @@ endif
|
||||
|
||||
# main packaging script based on dh7 syntax
|
||||
%:
|
||||
dh $@ --parallel --with python2 --with python3
|
||||
dh $@ --with python2 --with python3
|
||||
|
||||
override_dh_auto_configure:
|
||||
dh_auto_configure -- --enable-recollq --enable-xadump
|
||||
dh_auto_configure -- --enable-recollq --enable-xadump --enable-webkit
|
||||
# dh_auto_configure -- --enable-recollq --enable-xadump --enable-webengine
|
||||
|
||||
build3vers := $(shell py3versions -sv)
|
||||
|
||||
|
||||
@ -1,3 +1,76 @@
|
||||
kio-recoll (1.33.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 30 Aug 2022 10:59:00 +0200
|
||||
|
||||
kio-recoll (1.32.7-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Updated kio_recoll to work with newer kf5 versions (it seems that 5.96 broke it at least on
|
||||
arch).
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 07 Aug 2022 17:42:00 +0200
|
||||
|
||||
kio-recoll (1.32.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 05 Jul 2022 09:56:00 +0200
|
||||
|
||||
kio-recoll (1.32.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Wed, 29 Jun 2022 09:36:00 +0200
|
||||
|
||||
kio-recoll (1.32.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 14 Jun 2022 07:51:00 +0200
|
||||
|
||||
kio-recoll (1.32.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 15 May 2022 08:07:00 +0200
|
||||
|
||||
kio-recoll (1.32.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Fri, 11 Mar 2022 18:17:00 +0100
|
||||
|
||||
kio-recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
|
||||
|
||||
kio-recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow recoll version
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 04 Dec 2021 10:05:00 +0100
|
||||
|
||||
kio-recoll (1.31.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow recoll version
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Thu, 25 Nov 2021 16:30:00 +0100
|
||||
|
||||
kio-recoll (1.31.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow recoll version
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 13 Nov 2021 16:30:00 +0200
|
||||
|
||||
kio-recoll (1.31.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow recoll version
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 11 Oct 2021 10:55:00 +0200
|
||||
|
||||
kio-recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* Follow recoll version
|
||||
|
||||
@ -0,0 +1,13 @@
|
||||
diff --git a/src/kde/kioslave/kio_recoll/dirif.cpp b/src/kde/kioslave/kio_recoll/dirif.cpp
|
||||
index 4438a1e7..48284ece 100644
|
||||
--- a/kde/kioslave/kio_recoll/dirif.cpp
|
||||
+++ b/kde/kioslave/kio_recoll/dirif.cpp
|
||||
@@ -35,7 +35,7 @@
|
||||
#include <QDebug>
|
||||
#include <QUrl>
|
||||
#include <QStandardPaths>
|
||||
-#include <KF5/kio_version.h>
|
||||
+#include <kio_version.h>
|
||||
|
||||
#include "kio_recoll.h"
|
||||
#include "pathut.h"
|
||||
8
packaging/debian/rclgrep/debian/README.Debian
Normal file
8
packaging/debian/rclgrep/debian/README.Debian
Normal file
@ -0,0 +1,8 @@
|
||||
README for Debian
|
||||
-----------------
|
||||
|
||||
The rclgrep package is a partial installation of the recollcmd package, with no
|
||||
Xapian dependency. It conflicts with recollcmd, which also provides the rclgrep
|
||||
command.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 20 Sep 2022 08:32:00 +0200
|
||||
7
packaging/debian/rclgrep/debian/changelog
Normal file
7
packaging/debian/rclgrep/debian/changelog
Normal file
@ -0,0 +1,7 @@
|
||||
rclgrep (1.33.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||
|
||||
* 1st version of rclgrep: a non-indexed search program using recoll
|
||||
data extraction modules to effect grep-like function.
|
||||
|
||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 11 Sep 2022 10:59:00 +0200
|
||||
|
||||
1
packaging/debian/rclgrep/debian/compat
Normal file
1
packaging/debian/rclgrep/debian/compat
Normal file
@ -0,0 +1 @@
|
||||
11
|
||||
44
packaging/debian/rclgrep/debian/control
Normal file
44
packaging/debian/rclgrep/debian/control
Normal file
@ -0,0 +1,44 @@
|
||||
Source: rclgrep
|
||||
Section: x11
|
||||
Priority: optional
|
||||
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
|
||||
Build-Depends: debhelper (>= 10),
|
||||
dh-python,
|
||||
dpkg-dev (>= 1.16.1~),
|
||||
libchm-dev,
|
||||
libmagic-dev,
|
||||
libxslt1-dev,
|
||||
libz-dev,
|
||||
pkg-config,
|
||||
python3-all-dev,
|
||||
python3-setuptools
|
||||
X-Python3-Version: >= 3.6
|
||||
Homepage: https://www.lesbonscomptes.com/recoll
|
||||
Standards-Version: 4.2.1
|
||||
|
||||
Package: rclgrep
|
||||
Architecture: any
|
||||
Depends: python3, ${misc:Depends}, ${shlibs:Depends}
|
||||
Conflicts: recollcmd
|
||||
Recommends: antiword,
|
||||
groff,
|
||||
libimage-exiftool-perl,
|
||||
poppler-utils,
|
||||
python3-lxml,
|
||||
python3-six,
|
||||
python3-mutagen,
|
||||
python3-rarfile,
|
||||
unrtf,
|
||||
unzip,sfami
|
||||
xdg-utils
|
||||
Suggests: ghostscript,
|
||||
untex,
|
||||
wv
|
||||
Description: grep-like program based on recoll data extraction modules.
|
||||
The program supports most grep options and aims at supplying a very similar
|
||||
output format. It will search all formats supported by Recoll, including
|
||||
compound documents and nested archives (mbox, zip, ....) with full
|
||||
regexp support (unlike recoll). It does not not create
|
||||
an index and the package has no dependency on Xapian. In consequence,
|
||||
searching is vastly slower than when using recoll.
|
||||
|
||||
141
packaging/debian/rclgrep/debian/copyright
Normal file
141
packaging/debian/rclgrep/debian/copyright
Normal file
@ -0,0 +1,141 @@
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: recoll
|
||||
Upstream-Contact: Jean-Francois Dockes <jfd@recoll.org>
|
||||
Source: https://www.lesbonscomptes.com/recoll/
|
||||
|
||||
Files: *
|
||||
Copyright: 2005-2014, Jean-Francois Dockes <jfd@recoll.org>
|
||||
License: GPL-2+
|
||||
|
||||
Files: bincimapmime/*
|
||||
Copyright: 2002-2005, Andreas Aardal Hanssen <andreas-binc@bincimap.org>
|
||||
License: GPL-2+
|
||||
|
||||
Files: filters/rcl* internfile/htmlparse.cpp
|
||||
Copyright: 2000-2004, Mikio Hirabayashi
|
||||
License: GPL-2+
|
||||
|
||||
Files: filters/rclpython
|
||||
Copyright: J\xfcrgen Hermann, Mike Brown, Christopher Arndt
|
||||
<http://chrisarndt.de/en/software/python/colorize.html>
|
||||
License: GPL-2+
|
||||
|
||||
Files: internfile/htmlparse.cpp internfile/mh_html.cpp
|
||||
Copyright: 1999-2001, BrightStation PLC,
|
||||
2001, Ananova Ltd,
|
||||
2002-2004, Olly Betts.
|
||||
License: GPL-2+
|
||||
|
||||
Files: unac/*
|
||||
Copyright: 2000-2002, Loic Dachary <loic@senga.org>
|
||||
License: GPL-2+
|
||||
|
||||
Files: common/*
|
||||
Copyright: 2004-2005, J.F.Dockes
|
||||
License: GPL-2+
|
||||
|
||||
Files: debian/*
|
||||
Copyright: 2007-2014, Kartik Mistry <kartik@debian.org>
|
||||
License: GPL-2+
|
||||
|
||||
License: GPL-2+
|
||||
This package is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; either version 2 of the License, or (at your option) any later
|
||||
version.
|
||||
.
|
||||
This package is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
.
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this package; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
||||
St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
.
|
||||
On Debian systems, the complete text of the GNU General Public License can be
|
||||
found in `/usr/share/common-licenses/GPL-2' and
|
||||
`/usr/share/common-licenses/GPL-3'.
|
||||
|
||||
Files: aspell/*
|
||||
Copyright: 2001-2002, by Kevin Atkinson
|
||||
License: LGPL-2+
|
||||
|
||||
License: LGPL-2+
|
||||
This package is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License as published by the Free
|
||||
Software Foundation; either version 2 of the License, or (at your option) any
|
||||
later version.
|
||||
.
|
||||
This package is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
||||
details.
|
||||
.
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with this package; if not, write to the Free Software Foundation, Inc., 51
|
||||
Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
.
|
||||
On Debian systems, the complete text of the GNU Lesser General Public License
|
||||
can be found in `/usr/share/common-licenses/LGPL-2' and
|
||||
`/usr/share/common-licenses/LGPL-2.1' and `/usr/share/common-licenses/LGPL-3'.
|
||||
|
||||
Files: common/uproplist.h
|
||||
Copyright: 1991-2006, Unicode, Inc.
|
||||
License: Unicode
|
||||
|
||||
License: Unicode
|
||||
All rights reserved. Distributed under the Terms of Use in
|
||||
https://www.unicode.org/copyright.html
|
||||
.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of the Unicode data files and any associated documentation (the "Data Files")
|
||||
or Unicode software and any associated documentation (the "Software") to deal
|
||||
in the Data Files or Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge, publish, distribute, and/or
|
||||
sell copies of the Data Files or Software, and to permit persons to whom the
|
||||
Data Files or Software are furnished to do so, provided that (a) the above
|
||||
copyright notice(s) and this permission notice appear with all copies of the
|
||||
Data Files or Software, (b) both the above copyright notice(s) and this
|
||||
permission notice appear in associated documentation, and (c) there is clear
|
||||
notice in each modified Data File or in the Software as well as in the
|
||||
documentation associated with the Data File(s) or Software that the data or
|
||||
software has been modified.
|
||||
.
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD
|
||||
PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
|
||||
THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE
|
||||
DATA FILES OR SOFTWARE.
|
||||
.
|
||||
Except as contained in this notice, the name of a copyright holder shall not
|
||||
be used in advertising or otherwise to promote the sale, use or other dealings
|
||||
in these Data Files or Software without prior written authorization of the
|
||||
copyright holder.
|
||||
|
||||
Files: utils/md5.*
|
||||
Copyright: 1991-1992, RSA Data Security, Inc. All rights reserved.
|
||||
License: RSA
|
||||
|
||||
License: RSA
|
||||
MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
|
||||
.
|
||||
License to copy and use this software is granted provided that it is
|
||||
identified as the "RSA Data Security, Inc. MD5 Message-Digest Algorithm" in
|
||||
all material mentioning or referencing this software or this function.
|
||||
.
|
||||
License is also granted to make and use derivative works provided that such
|
||||
works are identified as "derived from the RSA Data Security, Inc. MD5
|
||||
Message-Digest Algorithm" in all material mentioning or referencing the
|
||||
derived work.
|
||||
.
|
||||
RSA Data Security, Inc. makes no representations concerning either the
|
||||
merchantability of this software or the suitability of this software for any
|
||||
particular purpose. It is provided "as is" without express or implied warranty
|
||||
of any kind.
|
||||
.
|
||||
These notices must be retained in any copies of any part of this documentation
|
||||
and/or software.
|
||||
2
packaging/debian/rclgrep/debian/rclgrep.install
Normal file
2
packaging/debian/rclgrep/debian/rclgrep.install
Normal file
@ -0,0 +1,2 @@
|
||||
usr/lib/python*/*-packages/recollchm/*
|
||||
usr/lib/python*/*-packages/recollchm-*/*
|
||||
44
packaging/debian/rclgrep/debian/rules
Executable file
44
packaging/debian/rclgrep/debian/rules
Executable file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
# Uncomment this to turn on verbose mode.
|
||||
#export DH_VERBOSE=1
|
||||
|
||||
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
|
||||
DPKG_EXPORT_BUILDFLAGS = 1
|
||||
include /usr/share/dpkg/buildflags.mk
|
||||
|
||||
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
|
||||
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
|
||||
|
||||
build3vers := $(shell py3versions -sv)
|
||||
|
||||
#build qt5 UI
|
||||
export QT_SELECT := qt5
|
||||
|
||||
ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
|
||||
NJOBS := -j $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
|
||||
endif
|
||||
|
||||
# main packaging script based on dh7 syntax
|
||||
%:
|
||||
dh $@ --with python3
|
||||
|
||||
override_dh_auto_configure:
|
||||
dh_auto_configure -- --enable-rclgrep --disable-python-module --disable-indexer \
|
||||
--disable-qtgui --disable-recollq --disable-testmains \
|
||||
--disable-xadump
|
||||
|
||||
build3vers := $(shell py3versions -sv)
|
||||
|
||||
override_dh_auto_install:
|
||||
dh_auto_install
|
||||
set -e && for i in $(build3vers); do \
|
||||
(cd python/pychm; python$$i ./setup.py install \
|
||||
--install-layout=deb \
|
||||
--prefix=/usr \
|
||||
--root=$(CURDIR)/debian/tmp/ ) ; \
|
||||
done
|
||||
find $(CURDIR) -type f -name '*.la' -exec rm -f '{}' \;
|
||||
find $(CURDIR) -type f -name '*.pyc' -exec rm -f '{}' \;
|
||||
rm -rf $(CURDIR)/debian/rclgrep/usr/lib/python*/*/*/__pycache__
|
||||
rm -rf $(CURDIR)/debian/tmp/usr/lib/python*/*/*/__pycache__
|
||||
1
packaging/debian/rclgrep/debian/source/format
Normal file
1
packaging/debian/rclgrep/debian/source/format
Normal file
@ -0,0 +1 @@
|
||||
3.0 (quilt)
|
||||
41
packaging/debian/rclgrep/debian/upstream/signing-key.asc
Normal file
41
packaging/debian/rclgrep/debian/upstream/signing-key.asc
Normal file
@ -0,0 +1,41 @@
|
||||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
||||
|
||||
mQINBFbJ6UABEADLsFg8qXTrNrYUnNS5UXlAWUH7/ZHNRgr/EIkhKAbdlzVAywTM
|
||||
fX6wo9crKzlqT3IcEOFe0RVJoh0FSNEQQlUhyJAFNlbcocsDYNqk7pDjxnUBUMM2
|
||||
U3ikLEPzRxWDhVepAVQPeloD1i8b4MJrSHnLb49PMmXg+6MHA+dzOS59onE5QDcz
|
||||
kw1RF0N0gl7693rOMP/ATefA2KPQyKCIweKB/3NbOcv4/T1XDyag0G7xYkT4stEl
|
||||
TN2P8c6HSyhWDxp2slZ04kdf17TuoeOqMO9gKE+eEC17lllLuhSrbBdfYTYt05pN
|
||||
Y1eRup+6oamoMc3ITD2U2GtY+65AHw5MxjGigpZ3kj5DwF/f2IgtDBSoXjm8aaRb
|
||||
iYMvt3kXnb3Ai/oVvSlkIQMlDDpdAQmzB0FO0MCzzykq5mQVbl3Uw3i2q5vg1IIL
|
||||
fGOB1USa0JOVRSq8C66ncijYO6Jafx3uYCGVdIypoLs332kGsyQaIatoJRbPkKT/
|
||||
Wu/DGE8kHOaCo5795HbRk0O/Up5wQP3N/OXGmrQPtbafRz9bkjXOKGtq660VJ67K
|
||||
ttgY9L1fD7jb+zDoUaY33K8Trfqaxm5aGkI6Pj3VvQSF2CAaJuEnh/c0r9UdGn0e
|
||||
e1L0yP1kUj80Qv99QFEoH2UtBrfLsXAiRvcr/PfyGTp/+Q7wkCHsHC84TwARAQAB
|
||||
tCRKZWFuLUZyYW5jb2lzIERvY2tlcyA8amZAZG9ja2VzLm9yZz6JAkAEEwEKACoC
|
||||
GwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4ACGQEFAlitGxQFCQWlmM4ACgkQeAjO
|
||||
ltOLkgEE4xAAqGOSt6U+CGdI333Yx7KaCA+XgJPsiaqfG2AIuv4Y0/LW8467uy4u
|
||||
DdbgJ3GQ6kWUZD0b/nrp74Ly5ZM9DCIZzOX9FQ3R9FBhbBS1fVfqFD2yZQv4lze9
|
||||
Bjj7EMRieRGUtVIb7BiUrmJOyIbiGktEOuqqTj7RehN/2sflv5jH2NW33+i3t/x7
|
||||
YWTAPHxieiOYO0Z0JtXe+ZXJ92LNaR+5DOsGItTSeJKzbh1oUtAcbt5DDDQKMJwb
|
||||
tIRg+9Mjj3IUqCsiFkKOfq34TXDu2paKWkdGuOJ8u2DqvgUYkqFfY4JOpWrax+Mt
|
||||
hsS6VSDIxL7H9UqaJpXWMMhUN2gFM+wy/y1OeNo5bKM4KiLbaugEvOb1RCQm2R6h
|
||||
HNcDO52KSFJMZSCzO/jjN2qJjDcLu2DAsQzWI+bzZgP+tpr3yWvW2OCCY+PdT4ZA
|
||||
5lwnd85P5x1wYhb/eoXi9QyWburu3vaNGdDWUljUkBB29l09hoDbAYPwWujLDGyT
|
||||
0j6+NWh27dLe8bnwe5YEBPHcwvuLnSBVVXY+UM/0toEWwpRdTvnxZUKKxtN+yiCA
|
||||
k82qRNXaUGaWpaL0xYPfanZSXi1dSNNEertS/BrF5PpmNdQsK1+sibNF1KKhR5ge
|
||||
2QSrjoNzL4kBgJq4ojJBcBd75p9HzheXCHdG1EHQBTeetDqiwEPbO/W0KUplYW4t
|
||||
RnJhbmNvaXMgRG9ja2VzIDxqZmRvY2tlc0BnbWFpbC5jb20+iQI9BBMBCgAnAhsD
|
||||
BQsJCAcDBRUKCQgLBRYCAwEAAh4BAheABQJYrRsbBQkFpZjOAAoJEHgIzpbTi5IB
|
||||
GKYP/09nsWnDCqv+3OKzmbHlMKCLvGU8IGU1q909sUelKmyjSFXmh3BsgR4DrfXu
|
||||
hGWtmu/mPYzCWzbK8TWYsU1O6em7YRY8lt/q/8gciSahl+xFT/G5GJHf7KFgtsSn
|
||||
QcbA18dzXKpxmTOTMEmWLh4zZlaUbaH2gmpXBQvH4smu/FV6rq5YYYDG9A3PDujr
|
||||
QmOyguD7wNvb6ahrgpTbMawsj6zLIT1pkC6t1Orz/gmYsuk47EJFfvaO3+YIUQ0D
|
||||
pFN9EkDjhcIa3vSsd+EBMbmweFB6y9gs7LmolqwiddUlYv5wGOLgiE1EJEI4bcvr
|
||||
vm8RWHziWytmpTPjzFpETaBVMC8xTt6tiNWNeTUkjbBX0Fek9GEvzAJIpe18LnM8
|
||||
raFREUriHuUwIGrrFrhj7rBAX51DiiJUguDi+842SjlzVE2SCwyjXVlglDItBPKO
|
||||
Y284KpI+wLhJCggtwtzZOQcAc5l8j3JpXjhm1tjSKggEONdBu2l7mWZRAJCBziMK
|
||||
mnUPL8q44l2hc/sDu4cCpsHW+pssGDQqtR+t/fPMGFuXd+WnfYskhyQVms44yAYJ
|
||||
Y/cx43tgYLHDx2TraTQZqh1qgmrXesS2DiT+5pCjQh0ChwTEBjGCz41WcQkD9nTL
|
||||
k3E6amPE6WAPS07bX9zkLHYYIOu8wd8nRoPKlVjhMpBvz8LE
|
||||
=2J/9
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
||||
4
packaging/debian/rclgrep/debian/watch
Normal file
4
packaging/debian/rclgrep/debian/watch
Normal file
@ -0,0 +1,4 @@
|
||||
version=4
|
||||
opts=pgpmode=auto \
|
||||
https://www.lesbonscomptes.com/recoll/download.html \
|
||||
(?:|.*/)recoll(?:[_\-]v?|)(\d[^\s/]*)\.(?:tar\.xz|txz|tar\.bz2|tbz2|tar\.gz|tgz)
|
||||
@ -60,40 +60,40 @@ index f41a9f39..dc3085a4 100755
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
diff --git filters/rcl7z filters/rcl7z
|
||||
diff --git filters/rcl7z.py filters/rcl7z.py
|
||||
index c68c8bcb..ac50c4ec 100755
|
||||
--- filters/rcl7z
|
||||
+++ filters/rcl7z
|
||||
--- filters/rcl7z.py
|
||||
+++ filters/rcl7z.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
|
||||
# 7-Zip file filter for Recoll
|
||||
|
||||
diff --git filters/rclaudio filters/rclaudio
|
||||
diff --git filters/rclaudio.py filters/rclaudio.py
|
||||
index 94ca0be7..08d6375a 100755
|
||||
--- filters/rclaudio
|
||||
+++ filters/rclaudio
|
||||
--- filters/rclaudio.py
|
||||
+++ filters/rclaudio.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
|
||||
# Audio tag filter for Recoll, using mutagen
|
||||
|
||||
diff --git filters/rclchm filters/rclchm
|
||||
diff --git filters/rclchm.py filters/rclchm.py
|
||||
index f9811c37..3bc9b16d 100755
|
||||
--- filters/rclchm
|
||||
+++ filters/rclchm
|
||||
--- filters/rclchm.py
|
||||
+++ filters/rclchm.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
||||
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
||||
|
||||
diff --git filters/rcldia filters/rcldia
|
||||
diff --git filters/rcldia.py filters/rcldia.py
|
||||
index 282148eb..a480294b 100755
|
||||
--- filters/rcldia
|
||||
+++ filters/rcldia
|
||||
--- filters/rcldia.py
|
||||
+++ filters/rcldia.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
@ -120,30 +120,30 @@ index e8fa1831..b92b185d 100755
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
diff --git filters/rclepub filters/rclepub
|
||||
diff --git filters/rclepub.py filters/rclepub.py
|
||||
index 8042d7f9..51786af1 100755
|
||||
--- filters/rclepub
|
||||
+++ filters/rclepub
|
||||
--- filters/rclepub.py
|
||||
+++ filters/rclepub.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
"""Extract Html content from an EPUB file (.epub)"""
|
||||
from __future__ import print_function
|
||||
|
||||
diff --git filters/rclepub1 filters/rclepub1
|
||||
diff --git filters/rclepub.py1 filters/rclepub.py1
|
||||
index bd44f635..a7ea6c06 100755
|
||||
--- filters/rclepub1
|
||||
+++ filters/rclepub1
|
||||
--- filters/rclepub.py1
|
||||
+++ filters/rclepub.py1
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
"""Extract Html content from an EPUB file (.chm), concatenating all sections"""
|
||||
from __future__ import print_function
|
||||
|
||||
diff --git filters/rclics filters/rclics
|
||||
diff --git filters/rclics.py filters/rclics.py
|
||||
index 0ef04f2d..de177024 100755
|
||||
--- filters/rclics
|
||||
+++ filters/rclics
|
||||
--- filters/rclics.py
|
||||
+++ filters/rclics.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
@ -160,20 +160,20 @@ index 7eb1da91..4eb6c9b0 100755
|
||||
|
||||
# Python-based Image Tag extractor for Recoll. This is less thorough
|
||||
# than the Perl-based rclimg script, but useful if you don't want to
|
||||
diff --git filters/rclinfo filters/rclinfo
|
||||
diff --git filters/rclinfo.py filters/rclinfo.py
|
||||
index f353d19e..36cf34e0 100755
|
||||
--- filters/rclinfo
|
||||
+++ filters/rclinfo
|
||||
--- filters/rclinfo.py
|
||||
+++ filters/rclinfo.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
|
||||
# Read a file in GNU info format and output its nodes as subdocs,
|
||||
# interfacing with recoll execm
|
||||
diff --git filters/rclkar filters/rclkar
|
||||
diff --git filters/rclkar.py filters/rclkar.py
|
||||
index d6570dd5..34b8d2a2 100755
|
||||
--- filters/rclkar
|
||||
+++ filters/rclkar
|
||||
--- filters/rclkar.py
|
||||
+++ filters/rclkar.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
@ -230,10 +230,10 @@ index 615455b3..1e411890 100755
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
"""
|
||||
MoinMoin - Python source parser and colorizer
|
||||
diff --git filters/rclrar filters/rclrar
|
||||
diff --git filters/rclrar.py filters/rclrar.py
|
||||
index 8f723fa5..5f6adfb0 100755
|
||||
--- filters/rclrar
|
||||
+++ filters/rclrar
|
||||
--- filters/rclrar.py
|
||||
+++ filters/rclrar.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
@ -280,10 +280,10 @@ index 8c1b8aea..cee17324 100755
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
diff --git filters/rcltar filters/rcltar
|
||||
diff --git filters/rcltar.py filters/rcltar.py
|
||||
index d8bf100d..ab4b306e 100755
|
||||
--- filters/rcltar
|
||||
+++ filters/rcltar
|
||||
--- filters/rcltar.py
|
||||
+++ filters/rcltar.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
@ -320,10 +320,10 @@ index 32a11c1a..eab3b257 100644
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
diff --git filters/rclwar filters/rclwar
|
||||
diff --git filters/rclwar.py filters/rclwar.py
|
||||
index b654f3b3..301e28e9 100755
|
||||
--- filters/rclwar
|
||||
+++ filters/rclwar
|
||||
--- filters/rclwar.py
|
||||
+++ filters/rclwar.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
@ -360,10 +360,10 @@ index 158e1222..602769af 100755
|
||||
# Copyright (C) 2016 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
diff --git filters/rclzip filters/rclzip
|
||||
diff --git filters/rclzip.py filters/rclzip.py
|
||||
index 35739625..0c597fbd 100755
|
||||
--- filters/rclzip
|
||||
+++ filters/rclzip
|
||||
--- filters/rclzip.py
|
||||
+++ filters/rclzip.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/env python2
|
||||
+#!/usr/bin/env python2.7
|
||||
|
||||
@ -12,27 +12,31 @@ usage()
|
||||
|
||||
# Adjustable things
|
||||
top=~/Recoll
|
||||
qtversion=5.14.2
|
||||
# Will probably need adjustment on M1
|
||||
path_clang=clang_64
|
||||
deploy=~/Qt/${qtversion}/${path_clang}/bin/macdeployqt
|
||||
# The possibly bogus version we have in paths (may be harcoded in the .pro)
|
||||
# qcbuildloc=Desktop_Qt_5_15_2_clang_64bit
|
||||
qcbuildloc=Qt_6_2_4_for_macOS
|
||||
|
||||
# qtversion=5.15.2
|
||||
qtversion=6.2.4
|
||||
|
||||
qt_ver_sion=`echo $qtversion | sed -e 's/\./_/g'`
|
||||
#deploy=~/Qt/${qtversion}/macos/clang_64bit/macdeployqt
|
||||
deploy=~/Qt/${qtversion}/macos/bin/macdeployqt
|
||||
|
||||
toprecoll=$top/recoll/src
|
||||
appdir=$toprecoll/build-recoll-win-Desktop_Qt_${qt_ver_sion}_${path_clang}bit-Release/recoll.app
|
||||
rclindexdir=$toprecoll/windows/build-recollindex-Desktop_Qt_${qt_ver_sion}_${path_clang}bit-Release
|
||||
appdir=$toprecoll/build-recoll-win-${qcbuildloc}-Release/recoll.app
|
||||
rclindexdir=$toprecoll/windows/build-recollindex-${qcbuildloc}-Release
|
||||
rclqdir=$toprecoll/windows/build-recollq-${qcbuildloc}-Release
|
||||
bindir=$appdir/Contents/MacOS
|
||||
datadir=$appdir/Contents/Resources
|
||||
|
||||
dmg=$appdir/../recoll.dmg
|
||||
|
||||
version=`cat $toprecoll/VERSION`
|
||||
version=`cat $toprecoll/RECOLL-VERSION.txt`
|
||||
|
||||
test -d $appdir || fatal Must first have built recoll in $appdir
|
||||
|
||||
cp $rclindexdir/recollindex $bindir || exit 1
|
||||
cp $rclqdir/recollq $bindir || exit 1
|
||||
|
||||
cp $top/antiword/antiword $bindir || exit 1
|
||||
mkdir -p $datadir/antiword || exit 1
|
||||
@ -45,7 +49,7 @@ $deploy $appdir -dmg || exit 1
|
||||
|
||||
|
||||
hash=`(cd $top/recoll;git log -n 1 | head -1 | awk '{print $2}' |cut -b 1-8)`
|
||||
|
||||
mv $dmg ~/Documents/recoll-$version-$hash.dmg || exit 1
|
||||
dte=`date +%Y%m%d`
|
||||
mv $dmg ~/Documents/recoll-$version-$dte-$hash.dmg || exit 1
|
||||
ls -l ~/Documents/recoll-$version-*.dmg
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
Summary: Desktop full text search tool with Qt GUI
|
||||
Name: recoll
|
||||
Version: 1.29.2
|
||||
Version: 1.32.7
|
||||
Release: 2%{?dist}
|
||||
Group: Applications/Databases
|
||||
License: GPLv2+
|
||||
@ -13,15 +13,26 @@ Source10: qmake-qt5.sh
|
||||
BuildRequires: aspell-devel
|
||||
BuildRequires: bison
|
||||
BuildRequires: desktop-file-utils
|
||||
# kio
|
||||
BuildRequires: kdelibs4-devel
|
||||
|
||||
#BuildRequires: kdelibs4-devel
|
||||
|
||||
# Fedora
|
||||
BuildRequires: qt5-qtbase-devel
|
||||
BuildRequires: qt5-qtwebkit-devel
|
||||
BuildRequires: extra-cmake-modules
|
||||
BuildRequires: kf5-kio-devel
|
||||
BuildRequires: python2-devel
|
||||
#BuildRequires: qt5-qtwebengine-devel
|
||||
BuildRequires: python3-devel
|
||||
BuildRequires: xapian-core-devel
|
||||
BuildRequires: kf5-kio-devel
|
||||
|
||||
# Opensuse
|
||||
#BuildRequires: libQt5Gui-devel
|
||||
#BuildRequires: libqt5-qtwebengine-devel
|
||||
#BuildRequires: python310-devel
|
||||
#BuildRequires: libxapian-devel
|
||||
#BuildRequires: kio-devel
|
||||
|
||||
BuildRequires: extra-cmake-modules
|
||||
BuildRequires: python2-devel
|
||||
BuildRequires: zlib-devel
|
||||
BuildRequires: chmlib-devel
|
||||
BuildRequires: libxslt-devel
|
||||
@ -55,7 +66,7 @@ LDFLAGS="%{?__global_ldflags}"; export LDFLAGS
|
||||
install -m755 -D %{SOURCE10} qmake-qt5.sh
|
||||
export QMAKE=qmake-qt5
|
||||
|
||||
%configure
|
||||
%configure --enable-webengine
|
||||
make %{?_smp_mflags}
|
||||
|
||||
%install
|
||||
@ -70,7 +81,7 @@ rm -f %{buildroot}/usr/share/recoll/filters/xdg-open
|
||||
|
||||
# kio_recoll -kde5
|
||||
(
|
||||
mkdir kde/kioslave/kio_recoll/build && pushd kde/kioslave/kio_recoll/build
|
||||
#mkdir kde/kioslave/kio_recoll/build && pushd kde/kioslave/kio_recoll/build
|
||||
%cmake ..
|
||||
make %{?_smp_mflags} VERBOSE=1
|
||||
make install DESTDIR=%{buildroot}
|
||||
@ -137,12 +148,12 @@ exit 0
|
||||
%{_datadir}/icons/hicolor/48x48/apps/%{name}.png
|
||||
%{_datadir}/pixmaps/%{name}.png
|
||||
%{_libdir}/recoll
|
||||
%{python_sitearch}/recoll
|
||||
%{python_sitearch}/Recoll*.egg-info
|
||||
%{python2_sitearch}/recoll
|
||||
%{python2_sitearch}/Recoll*.egg-info
|
||||
%{python3_sitearch}/recoll
|
||||
%{python3_sitearch}/Recoll*.egg-info
|
||||
%{python_sitearch}/recollchm
|
||||
%{python_sitearch}/recollchm*.egg-info
|
||||
%{python2_sitearch}/recollchm
|
||||
%{python2_sitearch}/recollchm*.egg-info
|
||||
%{python3_sitearch}/recollchm
|
||||
%{python3_sitearch}/recollchm*.egg-info
|
||||
%{_mandir}/man1/%{name}.1*
|
||||
|
||||
111
src/Makefile.am
111
src/Makefile.am
@ -4,12 +4,16 @@
|
||||
if COND_TESTMAINS
|
||||
MAYBE_TESTMAINS = testmains
|
||||
endif
|
||||
SUBDIRS = . $(MAYBE_TESTMAINS)
|
||||
if COND_RCLGREP
|
||||
MAYBE_RCLGREP = rclgrep
|
||||
endif
|
||||
SUBDIRS = . $(MAYBE_TESTMAINS) $(MAYBE_RCLGREP)
|
||||
|
||||
DIST_SUBDIRS = .
|
||||
|
||||
CXXFLAGS ?= @CXXFLAGS@
|
||||
LIBXAPIAN=@LIBXAPIAN@
|
||||
XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
|
||||
XAPIAN_LIBS=@XAPIAN_LIBS@
|
||||
XAPIAN_CFLAGS=@XAPIAN_CFLAGS@
|
||||
XSLT_CFLAGS=@XSLT_CFLAGS@
|
||||
XSLT_LIBS=@XSLT_LIBS@
|
||||
LIBICONV=@LIBICONV@
|
||||
@ -38,7 +42,7 @@ COMMONCPPFLAGS = -I. \
|
||||
AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
||||
$(COMMONCPPFLAGS) \
|
||||
$(INCICONV) \
|
||||
$(XAPIANCXXFLAGS) \
|
||||
$(XAPIAN_CFLAGS) \
|
||||
$(XSLT_CFLAGS) \
|
||||
$(X_CFLAGS) \
|
||||
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
|
||||
@ -55,7 +59,10 @@ else
|
||||
endif
|
||||
|
||||
librcldir = $(libdir)/recoll
|
||||
librcl_LTLIBRARIES = librecoll.la
|
||||
librcl_LTLIBRARIES =
|
||||
if MAKE_RECOLL_LIB
|
||||
librcl_LTLIBRARIES += librecoll.la
|
||||
endif
|
||||
|
||||
librecoll_la_SOURCES = \
|
||||
aspell/rclaspell.cpp \
|
||||
@ -287,7 +294,7 @@ AM_YFLAGS = -d
|
||||
# need it
|
||||
librecoll_la_LDFLAGS = -release $(VERSION) -no-undefined @NO_UNDEF_LINK_FLAG@
|
||||
|
||||
librecoll_la_LIBADD = $(XSLT_LIBS) $(LIBXAPIAN) $(LIBICONV) $(X_LIBX11) $(LIBTHREADS)
|
||||
librecoll_la_LIBADD = $(XSLT_LIBS) $(XAPIAN_LIBS) $(LIBICONV) $(X_LIBX11) $(LIBTHREADS)
|
||||
|
||||
# There is probably a better way to do this. The KIO needs to be linked
|
||||
# with librecoll, but librecoll is installed into a non-standard place
|
||||
@ -306,7 +313,10 @@ PicStatic: $(librecoll_la_OBJECTS)
|
||||
$(LIBTOOL) --tag=LD --mode=link gcc -g -O -o librecoll.la \
|
||||
$(librecoll_la_OBJECTS)
|
||||
|
||||
bin_PROGRAMS = recollindex
|
||||
bin_PROGRAMS =
|
||||
if MAKEINDEXER
|
||||
bin_PROGRAMS += recollindex
|
||||
endif
|
||||
if MAKECMDLINE
|
||||
bin_PROGRAMS += recollq
|
||||
endif
|
||||
@ -334,10 +344,8 @@ recollq_SOURCES = query/recollqmain.cpp
|
||||
recollq_LDADD = librecoll.la
|
||||
|
||||
xadump_SOURCES = query/xadump.cpp
|
||||
xadump_LDADD = librecoll.la $(LIBXAPIAN) $(LIBICONV)
|
||||
xadump_LDADD = librecoll.la $(XAPIAN_LIBS) $(LIBICONV)
|
||||
|
||||
# Note: I'd prefer the generated query parser files not to be distributed
|
||||
# at all, but failed to achieve this
|
||||
EXTRA_DIST = \
|
||||
bincimapmime/00README.recoll bincimapmime/AUTHORS bincimapmime/COPYING \
|
||||
\
|
||||
@ -357,8 +365,8 @@ doc/user/custom.xsl doc/user/usermanual.xml \
|
||||
filters/injectcommon.sh filters/recfiltcommon filters/rcltxtlines.py \
|
||||
\
|
||||
index/rclmon.sh \
|
||||
index/recollindex-system.service \
|
||||
index/recollindex-user.service \
|
||||
index/recollindex@.service \
|
||||
index/recollindex.service \
|
||||
\
|
||||
kde/kioslave/kio_recoll/00README.txt \
|
||||
kde/kioslave/kio_recoll/CMakeLists.txt \
|
||||
@ -369,9 +377,10 @@ kde/kioslave/kio_recoll/dirif.cpp \
|
||||
kde/kioslave/kio_recoll/htmlif.cpp \
|
||||
kde/kioslave/kio_recoll/kio_recoll.cpp \
|
||||
kde/kioslave/kio_recoll/kio_recoll.h \
|
||||
kde/kioslave/kio_recoll/recoll.json \
|
||||
kde/kioslave/kio_recoll/recoll.protocol \
|
||||
kde/kioslave/kio_recoll/recollf.protocol \
|
||||
kde/kioslave/kio_recoll/recollnolist.protocol \
|
||||
kde/kioslave/kio_recoll/recoll.protocol \
|
||||
\
|
||||
kde/kioslave/kio_recoll-kde4/00README.txt \
|
||||
kde/kioslave/kio_recoll-kde4/CMakeLists.txt \
|
||||
@ -388,6 +397,9 @@ kde/kioslave/kio_recoll-kde4/recoll.protocol \
|
||||
\
|
||||
query/location.hh query/position.hh query/stack.hh \
|
||||
\
|
||||
qtgui/actsearch.ui \
|
||||
qtgui/actsearch_w.cpp \
|
||||
qtgui/actsearch_w.h \
|
||||
qtgui/advsearch.ui \
|
||||
qtgui/advsearch_w.cpp \
|
||||
qtgui/advsearch_w.h \
|
||||
@ -407,6 +419,8 @@ qtgui/fragbuts.h \
|
||||
qtgui/guiutils.cpp \
|
||||
qtgui/guiutils.h \
|
||||
qtgui/i18n/*.qm qtgui/i18n/*.ts \
|
||||
qtgui/idxmodel.cpp \
|
||||
qtgui/idxmodel.h \
|
||||
qtgui/idxsched.h \
|
||||
qtgui/idxsched.ui \
|
||||
qtgui/images/asearch.png \
|
||||
@ -475,6 +489,7 @@ qtgui/rclm_idx.cpp \
|
||||
qtgui/rclm_menus.cpp \
|
||||
qtgui/rclm_preview.cpp \
|
||||
qtgui/rclm_saveload.cpp \
|
||||
qtgui/rclm_sidefilters.cpp \
|
||||
qtgui/rclm_view.cpp \
|
||||
qtgui/rclm_wins.cpp \
|
||||
qtgui/rclmain.ui \
|
||||
@ -567,16 +582,20 @@ python/samples/recollgui/rclmain.ui \
|
||||
python/samples/recollq.py \
|
||||
python/samples/recollqsd.py \
|
||||
\
|
||||
sampleconf/fields sampleconf/fragbuts.xml sampleconf/mimeconf \
|
||||
sampleconf/mimemap sampleconf/mimeview sampleconf/mimeview.mac \
|
||||
rclgrep/Makefile.am \
|
||||
rclgrep/rclgrep.cpp \
|
||||
\
|
||||
sampleconf/fields sampleconf/fragment-buttons.xml sampleconf/mimeconf \
|
||||
sampleconf/mimemap sampleconf/mimeview sampleconf/macos/mimeview \
|
||||
sampleconf/recoll.conf sampleconf/recoll.qss \
|
||||
sampleconf/recoll-common.css sampleconf/recoll-common.qss \
|
||||
sampleconf/recoll-dark.qss sampleconf/recoll-dark.css \
|
||||
\
|
||||
testmains/Makefile.am \
|
||||
\
|
||||
unac/AUTHORS unac/COPYING unac/README unac/README.recoll unac/unac.c \
|
||||
\
|
||||
VERSION
|
||||
RECOLL-VERSION.txt
|
||||
|
||||
# EXTRA_DIST: The Php Code does not build anymore. No need to ship it until
|
||||
# someone fixes it:
|
||||
@ -618,13 +637,13 @@ install-exec-local:: rclpychm-install
|
||||
clean-local:: rclpychm-clean
|
||||
rclpychm:
|
||||
(cd python/pychm; set -x; \
|
||||
for v in 2 3;do \
|
||||
for v in 3;do \
|
||||
test -n "`which python$${v}`" && python$${v} setup.py build;\
|
||||
done \
|
||||
)
|
||||
rclpychm-install:
|
||||
(cd python/pychm; set -x; \
|
||||
for v in 2 3;do test -n "`which python$${v}`" && \
|
||||
for v in 3;do test -n "`which python$${v}`" && \
|
||||
python$${v} setup.py install \
|
||||
--prefix=${prefix} --root=$${DESTDIR:-/} $(OPTSFORPYTHON); \
|
||||
done \
|
||||
@ -654,17 +673,19 @@ defconfdir = $(pkgdatadir)/examples
|
||||
defconf_DATA = \
|
||||
desktop/recollindex.desktop \
|
||||
index/rclmon.sh \
|
||||
index/recollindex-system.service \
|
||||
index/recollindex-user.service \
|
||||
sampleconf/fragbuts.xml \
|
||||
index/recollindex.service \
|
||||
index/recollindex@.service \
|
||||
sampleconf/fields \
|
||||
sampleconf/recoll.conf \
|
||||
sampleconf/fragment-buttons.xml \
|
||||
sampleconf/mimeconf \
|
||||
sampleconf/recoll.qss \
|
||||
sampleconf/recoll-dark.qss \
|
||||
sampleconf/recoll-dark.css \
|
||||
sampleconf/mimemap \
|
||||
sampleconf/mimeview
|
||||
sampleconf/mimeview \
|
||||
sampleconf/recoll-common.css \
|
||||
sampleconf/recoll-common.qss \
|
||||
sampleconf/recoll-dark.css \
|
||||
sampleconf/recoll-dark.qss \
|
||||
sampleconf/recoll.conf \
|
||||
sampleconf/recoll.qss
|
||||
|
||||
filterdir = $(pkgdatadir)/filters
|
||||
dist_filter_DATA = \
|
||||
@ -683,30 +704,31 @@ filters/openxml-xls-body.xsl \
|
||||
filters/openxml-word-body.xsl \
|
||||
filters/openxml-meta.xsl \
|
||||
filters/ppt-dump.py \
|
||||
filters/rcl7z \
|
||||
filters/rcl7z.py \
|
||||
filters/rclaptosidman \
|
||||
filters/rclaudio \
|
||||
filters/rclaudio.py \
|
||||
filters/rclbasehandler.py \
|
||||
filters/rclbibtex.sh \
|
||||
filters/rclcheckneedretry.sh \
|
||||
filters/rclchm \
|
||||
filters/rcldia \
|
||||
filters/rclchm.py \
|
||||
filters/rcldia.py \
|
||||
filters/rcldjvu.py \
|
||||
filters/rcldoc.py \
|
||||
filters/rcldvi \
|
||||
filters/rclepub \
|
||||
filters/rclepub1 \
|
||||
filters/rclepub.py \
|
||||
filters/rclepub1.py \
|
||||
filters/rclexec1.py \
|
||||
filters/rclexecm.py \
|
||||
filters/rclfb2.py \
|
||||
filters/rclgaim \
|
||||
filters/rclgenxslt.py \
|
||||
filters/rclhwp.py \
|
||||
filters/rclics \
|
||||
filters/rclics.py \
|
||||
filters/rclimg \
|
||||
filters/rclimg.py \
|
||||
filters/rclinfo \
|
||||
filters/rclkar \
|
||||
filters/rclinfo.py \
|
||||
filters/rclipynb.py \
|
||||
filters/rclkar.py \
|
||||
filters/rclkwd \
|
||||
filters/rcllatinclass.py \
|
||||
filters/rcllatinstops.zip \
|
||||
@ -725,21 +747,21 @@ filters/rclps \
|
||||
filters/rclpst.py \
|
||||
filters/rclpurple \
|
||||
filters/rclpython.py \
|
||||
filters/rclrar \
|
||||
filters/rclrar.py \
|
||||
filters/rclrtf.py \
|
||||
filters/rclscribus \
|
||||
filters/rclshowinfo \
|
||||
filters/rcltar \
|
||||
filters/rcltar.py \
|
||||
filters/rcltex \
|
||||
filters/rcltext.py \
|
||||
filters/rcluncomp \
|
||||
filters/rcluncomp.py \
|
||||
filters/rclwar \
|
||||
filters/rclwar.py \
|
||||
filters/rclxls.py \
|
||||
filters/rclxml.py \
|
||||
filters/rclxmp.py \
|
||||
filters/rclxslt.py \
|
||||
filters/rclzip \
|
||||
filters/rclzip.py \
|
||||
filters/recoll-we-move-files.py \
|
||||
filters/recollepub.zip \
|
||||
filters/svg.xsl \
|
||||
@ -749,6 +771,13 @@ filters/xml.xsl \
|
||||
python/recoll/recoll/conftree.py \
|
||||
python/recoll/recoll/rclconfig.py
|
||||
|
||||
if INSTALL_SYSTEMD_UNITS
|
||||
systemd_system_unitdir = @SYSTEMD_SYSTEM_UNIT_DIR@
|
||||
systemd_user_unitdir = @SYSTEMD_USER_UNIT_DIR@
|
||||
systemd_system_unit_DATA = index/recollindex@.service
|
||||
systemd_user_unit_DATA = index/recollindex.service
|
||||
endif
|
||||
|
||||
install-data-hook:
|
||||
(cd $(DESTDIR)/$(filterdir); \
|
||||
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
|
||||
@ -767,14 +796,14 @@ doc/user/usermanual.html: doc/user/usermanual.xml
|
||||
endif
|
||||
|
||||
dist_man1_MANS = doc/man/recoll.1 doc/man/recollq.1 \
|
||||
doc/man/recollindex.1 doc/man/xadump.1
|
||||
doc/man/recollindex.1 doc/man/xadump.1 doc/man/rclgrep.1
|
||||
dist_man5_MANS = doc/man/recoll.conf.5
|
||||
|
||||
dist-hook:
|
||||
(cd $(top_srcdir); find . \
|
||||
\( -name '*.pyc' -o -name '#*' -o -name '*~' \) -delete)
|
||||
if test -z "$(NOTAG)";then \
|
||||
test -z "`git status -s|grep -v recoll-$(VERSION)`"||exit 1; \
|
||||
test -z "`git status -s|grep -v recoll-$(RECOLL-VERSION.txt)`"||exit 1; \
|
||||
vers=`echo $(VERSION) | sed -e 's/~/_/g'`;\
|
||||
git tag -a RECOLL-$$vers -m "Release $$vers tagged"; \
|
||||
fi
|
||||
|
||||
@ -2763,8 +2763,8 @@ Chapter 4. Programming interface
|
||||
|
||||
If you can program and want to write an execm handler, it should not be
|
||||
too difficult to make sense of one of the existing modules. For example,
|
||||
look at rclzip which uses Zip file paths as identifiers (ipath), and
|
||||
rclics, which uses an integer index. Also have a look at the comments
|
||||
look at rclzip.py which uses Zip file paths as identifiers (ipath), and
|
||||
rclics.py, which uses an integer index. Also have a look at the comments
|
||||
inside the internfile/mh_execm.h file and possibly at the corresponding
|
||||
module.
|
||||
|
||||
@ -2819,7 +2819,7 @@ Chapter 4. Programming interface
|
||||
|
||||
text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
|
||||
|
||||
application/x-chm = execm rclchm
|
||||
application/x-chm = execm rclchm.py
|
||||
|
||||
The fragment specifies that:
|
||||
|
||||
|
||||
1
src/RECOLL-VERSION.txt
Normal file
1
src/RECOLL-VERSION.txt
Normal file
@ -0,0 +1 @@
|
||||
1.33.1
|
||||
@ -1 +0,0 @@
|
||||
1.31.0
|
||||
@ -41,6 +41,8 @@ class Aspell {
|
||||
public:
|
||||
Aspell(const RclConfig *cnf);
|
||||
~Aspell();
|
||||
Aspell(const Aspell &) = delete;
|
||||
Aspell& operator=(const Aspell &) = delete;
|
||||
|
||||
/** Check health */
|
||||
bool ok() const;
|
||||
|
||||
@ -4,15 +4,18 @@ set -x
|
||||
|
||||
aclocal
|
||||
|
||||
if test X"$HOMEBREW_ENV" != X; then
|
||||
glt=`which glibtoolize`
|
||||
fi
|
||||
if test X"$glt" != X; then
|
||||
$glt --copy
|
||||
# detect libtoolize on linux or glibtoolize in some systems
|
||||
if (libtoolize --version) < /dev/null > /dev/null 2>&1; then
|
||||
LIBTOOLIZE=libtoolize
|
||||
elif (glibtoolize --version) < /dev/null > /dev/null 2>&1; then
|
||||
LIBTOOLIZE=glibtoolize
|
||||
else
|
||||
libtoolize --copy
|
||||
echo "libtoolize or glibtoolize was not found! Please install libtool." 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
$LIBTOOLIZE --copy
|
||||
|
||||
automake --add-missing --force-missing --copy
|
||||
autoconf
|
||||
# Our ylwrap gets clobbered by the above.
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
||||
|
||||
/* Path to the aspell program */
|
||||
#define ASPELL_PROG "/opt/local/bin/aspell"
|
||||
#undef ASPELL_PROG
|
||||
|
||||
/* No X11 session monitoring support */
|
||||
#define DISABLE_X11MON 1
|
||||
@ -125,7 +125,7 @@
|
||||
#define PACKAGE_NAME "Recoll"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Recoll 1.30.2"
|
||||
#define PACKAGE_STRING "Recoll 1.33.0"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "recoll"
|
||||
@ -134,7 +134,7 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.30.2"
|
||||
#define PACKAGE_VERSION "1.33.0"
|
||||
|
||||
/* putenv parameter is const */
|
||||
/* #undef PUTENV_ARG_CONST */
|
||||
|
||||
@ -118,7 +118,7 @@
|
||||
#define PACKAGE_NAME "Recoll"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "Recoll 1.30.2"
|
||||
#define PACKAGE_STRING "Recoll 1.33.0"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "recoll"
|
||||
@ -127,13 +127,13 @@
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "1.30.2"
|
||||
#define PACKAGE_VERSION "1.33.0"
|
||||
|
||||
/* putenv parameter is const */
|
||||
/* #undef PUTENV_ARG_CONST */
|
||||
|
||||
/* Real time monitoring option */
|
||||
#undef RCL_MONITOR
|
||||
#define RCL_MONITOR 1
|
||||
|
||||
/* Split camelCase words */
|
||||
/* #undef RCL_SPLIT_CAMELCASE */
|
||||
|
||||
@ -67,4 +67,18 @@ typedef int ssize_t;
|
||||
# define PRETEND_USE(expr) ((void)(expr))
|
||||
#endif /* PRETEND_USE */
|
||||
|
||||
// It's complicated to really detect gnu gcc because other compilers define __GNUC__
|
||||
// See stackoverflow questions/38499462/how-to-tell-clang-to-stop-pretending-to-be-other-compilers
|
||||
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER)
|
||||
#define REAL_GCC __GNUC__ // probably
|
||||
#endif
|
||||
|
||||
#ifdef REAL_GCC
|
||||
// Older gcc versions pretended to supply std::regex, but the resulting programs mostly crashed.
|
||||
#include <features.h>
|
||||
#if ! __GNUC_PREREQ(6,0)
|
||||
#define NO_STD_REGEX 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* INCLUDED */
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
/* Copyright (C) 2004-2022 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -33,14 +33,11 @@
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <unordered_map>
|
||||
#include <iterator>
|
||||
|
||||
#include "cstr.h"
|
||||
#include "pathut.h"
|
||||
@ -58,6 +55,16 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Naming the directory for platform-specific default config files, overriding the top-level ones
|
||||
// E.g. /usr/share/recoll/examples/windows
|
||||
#ifdef _WIN32
|
||||
static const string confsysdir{"windows"};
|
||||
#elif defined(_APPLE__)
|
||||
static const string confsysdir{"macos"};
|
||||
#else
|
||||
static const string confsysdir;
|
||||
#endif
|
||||
|
||||
// Static, logically const, RclConfig members or module static
|
||||
// variables are initialized once from the first object build during
|
||||
// process initialization.
|
||||
@ -90,9 +97,8 @@ void RclConfig::setPlusMinus(const string& sbase, const set<string>& upd,
|
||||
stringToStrings(sbase, base);
|
||||
|
||||
vector<string> diff;
|
||||
auto it =
|
||||
set_difference(base.begin(), base.end(), upd.begin(), upd.end(),
|
||||
std::inserter(diff, diff.begin()));
|
||||
auto it = set_difference(base.begin(), base.end(), upd.begin(), upd.end(),
|
||||
std::inserter(diff, diff.begin()));
|
||||
sminus = stringsToString(diff);
|
||||
|
||||
diff.clear();
|
||||
@ -139,7 +145,7 @@ bool ParamStale::needrecompute()
|
||||
string newvalue;
|
||||
conffile->get(paramnames[i], newvalue, parent->m_keydir);
|
||||
LOGDEB1("ParamStale::needrecompute: " << paramnames[i] << " -> " <<
|
||||
newvalue << " keydir " << parent->m_keydir << endl);
|
||||
newvalue << " keydir " << parent->m_keydir << "\n");
|
||||
if (newvalue.compare(savedvalues[i])) {
|
||||
savedvalues[i] = newvalue;
|
||||
needrecomp = true;
|
||||
@ -176,8 +182,7 @@ void ParamStale::init(ConfNull *cnf)
|
||||
|
||||
bool RclConfig::isDefaultConfig() const
|
||||
{
|
||||
string defaultconf = path_cat(path_homedata(),
|
||||
path_defaultrecollconfsubdir());
|
||||
string defaultconf = path_cat(path_homedata(), path_defaultrecollconfsubdir());
|
||||
path_catslash(defaultconf);
|
||||
string specifiedconf = path_canon(m_confdir);
|
||||
path_catslash(specifiedconf);
|
||||
@ -187,8 +192,7 @@ bool RclConfig::isDefaultConfig() const
|
||||
|
||||
RclConfig::RclConfig(const RclConfig &r)
|
||||
: m_oldstpsuffstate(this, "recoll_noindex"),
|
||||
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+",
|
||||
"noContentSuffixes-"}),
|
||||
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+", "noContentSuffixes-"}),
|
||||
m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
|
||||
m_onlnstate(this, "onlyNames"),
|
||||
m_rmtstate(this, "indexedmimetypes"),
|
||||
@ -200,8 +204,7 @@ RclConfig::RclConfig(const RclConfig &r)
|
||||
|
||||
RclConfig::RclConfig(const string *argcnf)
|
||||
: m_oldstpsuffstate(this, "recoll_noindex"),
|
||||
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+",
|
||||
"noContentSuffixes-"}),
|
||||
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+", "noContentSuffixes-"}),
|
||||
m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
|
||||
m_onlnstate(this, "onlyNames"),
|
||||
m_rmtstate(this, "indexedmimetypes"),
|
||||
@ -230,8 +233,7 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
if (argcnf && !argcnf->empty()) {
|
||||
m_confdir = path_absolute(*argcnf);
|
||||
if (m_confdir.empty()) {
|
||||
m_reason =
|
||||
string("Cant turn [") + *argcnf + "] into absolute path";
|
||||
m_reason = string("Cant turn [") + *argcnf + "] into absolute path";
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
@ -249,9 +251,8 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
// this is the default conf
|
||||
if (!autoconfdir && !isDefaultConfig()) {
|
||||
if (!path_exists(m_confdir)) {
|
||||
m_reason = "Explicitly specified configuration "
|
||||
"directory must exist"
|
||||
" (won't be automatically created). Use mkdir first";
|
||||
m_reason = std::string("Explicitly specified configuration [") + m_confdir +
|
||||
"] directory must exist (won't be automatically created). Use mkdir first";
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -291,8 +292,7 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
o_localecharset = string(cstr_cp1252);
|
||||
}
|
||||
#endif
|
||||
LOGDEB1("RclConfig::getDefCharset: localecharset [" <<
|
||||
o_localecharset << "]\n");
|
||||
LOGDEB1("RclConfig::getDefCharset: localecharset [" << o_localecharset << "]\n");
|
||||
}
|
||||
|
||||
const char *cp;
|
||||
@ -310,8 +310,15 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
m_cdirs.push_back(cp);
|
||||
}
|
||||
|
||||
// Base/installation config
|
||||
m_cdirs.push_back(path_cat(m_datadir, "examples"));
|
||||
// Base/installation config, and its platform-specific overrides
|
||||
std::string defaultsdir = path_cat(m_datadir, "examples");
|
||||
if (!confsysdir.empty()) {
|
||||
std::string sdir = path_cat(defaultsdir, confsysdir);
|
||||
if (path_isdir(sdir)) {
|
||||
m_cdirs.push_back(sdir);
|
||||
}
|
||||
}
|
||||
m_cdirs.push_back(defaultsdir);
|
||||
|
||||
string cnferrloc;
|
||||
for (const auto& dir : m_cdirs) {
|
||||
@ -339,17 +346,14 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
// there are several. This only uses the distributed file, not any
|
||||
// local customization (too complicated).
|
||||
if (mime_suffixes.empty()) {
|
||||
ConfSimple mm(
|
||||
path_cat(path_cat(m_datadir, "examples"), "mimemap").c_str());
|
||||
ConfSimple mm(path_cat(path_cat(m_datadir, "examples"), "mimemap").c_str());
|
||||
vector<ConfLine> order = mm.getlines();
|
||||
for (const auto& entry: order) {
|
||||
if (entry.m_kind == ConfLine::CFL_VAR) {
|
||||
LOGDEB1("CONFIG: " << entry.m_data << " -> " << entry.m_value <<
|
||||
endl);
|
||||
LOGDEB1("CONFIG: " << entry.m_data << " -> " << entry.m_value << "\n");
|
||||
// Remember: insert() only does anything for new keys,
|
||||
// so we only have the first value in the map
|
||||
mime_suffixes.insert(
|
||||
pair<string,string>(entry.m_value, entry.m_data));
|
||||
mime_suffixes.insert(pair<string,string>(entry.m_value, entry.m_data));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -384,9 +388,9 @@ RclConfig::RclConfig(const string *argcnf)
|
||||
|
||||
bool RclConfig::updateMainConfig()
|
||||
{
|
||||
ConfStack<ConfTree> *newconf =
|
||||
new ConfStack<ConfTree>("recoll.conf", m_cdirs, true);
|
||||
ConfStack<ConfTree> *newconf = new ConfStack<ConfTree>("recoll.conf", m_cdirs, true);
|
||||
if (newconf == 0 || !newconf->ok()) {
|
||||
std::cerr << "updateMainConfig: new Confstack not ok\n";
|
||||
if (m_conf)
|
||||
return false;
|
||||
m_ok = false;
|
||||
@ -516,8 +520,7 @@ bool RclConfig::getConfParam(const string &name, vector<int> *vip,
|
||||
char *ep;
|
||||
vip->push_back(strtol(vs[i].c_str(), &ep, 0));
|
||||
if (ep == vs[i].c_str()) {
|
||||
LOGDEB("RclConfig::getConfParam: bad int value in [" << name <<
|
||||
"]\n");
|
||||
LOGDEB("RclConfig::getConfParam: bad int value in [" << name << "]\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -586,12 +589,10 @@ void RclConfig::initThrConf()
|
||||
out:
|
||||
ostringstream sconf;
|
||||
for (unsigned int i = 0; i < 3; i++) {
|
||||
sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second <<
|
||||
") ";
|
||||
sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second << ") ";
|
||||
}
|
||||
|
||||
LOGDEB("RclConfig::initThrConf: chosen config (ql,nt): " << sconf.str() <<
|
||||
"\n");
|
||||
LOGDEB("RclConfig::initThrConf: chosen config (ql,nt): " << sconf.str() << "\n");
|
||||
}
|
||||
|
||||
pair<int,int> RclConfig::getThrConf(ThrStage who) const
|
||||
@ -684,7 +685,7 @@ public:
|
||||
class SuffCmp {
|
||||
public:
|
||||
int operator()(const SfString& s1, const SfString& s2) const {
|
||||
//cout << "Comparing " << s1.m_str << " and " << s2.m_str << endl;
|
||||
//cout << "Comparing " << s1.m_str << " and " << s2.m_str << "\n";
|
||||
string::const_reverse_iterator
|
||||
r1 = s1.m_str.rbegin(), re1 = s1.m_str.rend(),
|
||||
r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
|
||||
@ -734,8 +735,7 @@ vector<string>& RclConfig::getStopSuffixes()
|
||||
m_maxsufflen = int(entry.length());
|
||||
}
|
||||
}
|
||||
LOGDEB1("RclConfig::getStopSuffixes: ->" <<
|
||||
stringsToString(m_stopsuffvec) << endl);
|
||||
LOGDEB1("RclConfig::getStopSuffixes: ->" << stringsToString(m_stopsuffvec) << "\n");
|
||||
return m_stopsuffvec;
|
||||
}
|
||||
|
||||
@ -845,16 +845,22 @@ string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes, const
|
||||
if (!m_excludeMTypes.empty() && m_excludeMTypes.count(stringtolower(mtype))) {
|
||||
IdxDiags::theDiags().record(IdxDiags::ExcludedMime, fn, mtype);
|
||||
LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " in excluded mime list (fn " <<
|
||||
fn << ")\n");
|
||||
fn << ")\n");
|
||||
return hs;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mimeconf->get(mtype, hs, "index")) {
|
||||
if (mtype.find("text/") == 0) {
|
||||
bool alltext{false};
|
||||
getConfParam("textunknownasplain", &alltext);
|
||||
if (alltext && mimeconf->get("text/plain", hs, "index")) {
|
||||
return hs;
|
||||
}
|
||||
}
|
||||
if (mtype != "inode/directory") {
|
||||
IdxDiags::theDiags().record(IdxDiags::NoHandler, fn, mtype);
|
||||
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " <<
|
||||
fn << ")\n");
|
||||
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " << fn << ")\n");
|
||||
}
|
||||
}
|
||||
return hs;
|
||||
@ -873,12 +879,11 @@ const vector<MDReaper>& RclConfig::getMDReapers()
|
||||
ConfSimple attrs;
|
||||
valueSplitAttributes(sreapers, value, attrs);
|
||||
vector<string> nmlst = attrs.getNames(cstr_null);
|
||||
for (vector<string>::const_iterator it = nmlst.begin();
|
||||
it != nmlst.end(); it++) {
|
||||
for (const auto& nm : nmlst) {
|
||||
MDReaper reaper;
|
||||
reaper.fieldname = fieldCanon(*it);
|
||||
reaper.fieldname = fieldCanon(nm);
|
||||
string s;
|
||||
attrs.get(*it, s);
|
||||
attrs.get(nm, s);
|
||||
stringToStrings(s, reaper.cmdv);
|
||||
m_mdreapers.push_back(reaper);
|
||||
}
|
||||
@ -904,11 +909,17 @@ bool RclConfig::getGuiFilter(const string& catfiltername, string& frag) const
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RclConfig::valueSplitAttributes(const string& whole, string& value,
|
||||
ConfSimple& attrs)
|
||||
bool RclConfig::valueSplitAttributes(const string& whole, string& value, ConfSimple& attrs)
|
||||
{
|
||||
/* There is currently no way to escape a semi-colon */
|
||||
string::size_type semicol0 = whole.find_first_of(";");
|
||||
bool inquote{false};
|
||||
string::size_type semicol0;
|
||||
for (semicol0 = 0; semicol0 < whole.size(); semicol0++) {
|
||||
if (whole[semicol0] == '"') {
|
||||
inquote = !inquote;
|
||||
} else if (whole[semicol0] == ';' && !inquote) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
value = whole.substr(0, semicol0);
|
||||
trimstring(value);
|
||||
string attrstr;
|
||||
@ -1014,15 +1025,14 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
valuetype = FieldTraits::INT;
|
||||
} else {
|
||||
LOGERR("readFieldsConfig: bad type for value for " <<
|
||||
fieldname << " : " << tval << endl);
|
||||
fieldname << " : " << tval << "\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int valuelen = (int)attrs.getInt("len", 0);
|
||||
// Find or insert traits entry
|
||||
const auto pit =
|
||||
m_fldtotraits.insert(
|
||||
pair<string, FieldTraits>(canonic, FieldTraits())).first;
|
||||
m_fldtotraits.insert(pair<string, FieldTraits>(canonic, FieldTraits())).first;
|
||||
pit->second.valueslot = valueslot;
|
||||
pit->second.valuetype = valuetype;
|
||||
pit->second.valuelen = valuelen;
|
||||
@ -1099,8 +1109,7 @@ bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp,
|
||||
pit->second.pfx << "]\n");
|
||||
return true;
|
||||
} else {
|
||||
LOGDEB1("RclConfig::getFieldTraits: no prefix for field [" << fld <<
|
||||
"]\n");
|
||||
LOGDEB1("RclConfig::getFieldTraits: no prefix for field [" << fld << "]\n");
|
||||
*ftpp = 0;
|
||||
return false;
|
||||
}
|
||||
@ -1122,8 +1131,7 @@ string RclConfig::fieldCanon(const string& f) const
|
||||
string fld = stringtolower(f);
|
||||
const auto it = m_aliastocanon.find(fld);
|
||||
if (it != m_aliastocanon.end()) {
|
||||
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second <<
|
||||
"]\n");
|
||||
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second << "]\n");
|
||||
return it->second;
|
||||
}
|
||||
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << fld << "]\n");
|
||||
@ -1134,8 +1142,7 @@ string RclConfig::fieldQCanon(const string& f) const
|
||||
{
|
||||
const auto it = m_aliastoqcanon.find(stringtolower(f));
|
||||
if (it != m_aliastoqcanon.end()) {
|
||||
LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> [" << it->second <<
|
||||
"]\n");
|
||||
LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> [" << it->second << "]\n");
|
||||
return it->second;
|
||||
}
|
||||
return fieldCanon(f);
|
||||
@ -1165,15 +1172,14 @@ set<string> RclConfig::getMimeViewerAllEx() const
|
||||
|
||||
string base, plus, minus;
|
||||
mimeview->get("xallexcepts", base, "");
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): base: " << base << endl);
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): base: " << base << "\n");
|
||||
mimeview->get("xallexcepts+", plus, "");
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): plus: " << plus << endl);
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): plus: " << plus << "\n");
|
||||
mimeview->get("xallexcepts-", minus, "");
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): minus: " << minus << endl);
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): minus: " << minus << "\n");
|
||||
|
||||
computeBasePlusMinus(res, base, plus, minus);
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): res: " << stringsToString(res)
|
||||
<< endl);
|
||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): res: " << stringsToString(res) << "\n");
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1200,11 +1206,9 @@ bool RclConfig::setMimeViewerAllEx(const set<string>& allex)
|
||||
return true;
|
||||
}
|
||||
|
||||
string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag,
|
||||
bool useall) const
|
||||
string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag, bool useall) const
|
||||
{
|
||||
LOGDEB2("RclConfig::getMimeViewerDef: mtype [" << mtype << "] apptag ["
|
||||
<< apptag << "]\n");
|
||||
LOGDEB2("RclConfig::getMimeViewerDef: mtype [" << mtype << "] apptag [" << apptag << "]\n");
|
||||
string hs;
|
||||
if (mimeview == 0)
|
||||
return hs;
|
||||
@ -1231,9 +1235,18 @@ string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag,
|
||||
// Fallthrough to normal case.
|
||||
}
|
||||
|
||||
if (apptag.empty() || !mimeview->get(mtype + string("|") + apptag,
|
||||
hs, "view"))
|
||||
if (apptag.empty() || !mimeview->get(mtype + string("|") + apptag, hs, "view"))
|
||||
mimeview->get(mtype, hs, "view");
|
||||
|
||||
// Last try for text/xxx if alltext is set
|
||||
if (hs.empty() && mtype.find("text/") == 0 && mtype != "text/plain") {
|
||||
bool alltext{false};
|
||||
getConfParam("textunknownasplain", &alltext);
|
||||
if (alltext) {
|
||||
return getMimeViewerDef("text/plain", apptag, useall);
|
||||
}
|
||||
}
|
||||
|
||||
return hs;
|
||||
}
|
||||
|
||||
@ -1242,9 +1255,8 @@ bool RclConfig::getMimeViewerDefs(vector<pair<string, string> >& defs) const
|
||||
if (mimeview == 0)
|
||||
return false;
|
||||
vector<string>tps = mimeview->getNames("view");
|
||||
for (vector<string>::const_iterator it = tps.begin();
|
||||
it != tps.end();it++) {
|
||||
defs.push_back(pair<string, string>(*it, getMimeViewerDef(*it, "", 0)));
|
||||
for (const auto& tp : tps) {
|
||||
defs.push_back(pair<string, string>(tp, getMimeViewerDef(tp, "", 0)));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1398,17 +1410,39 @@ string RclConfig::getIdxStatusFile() const
|
||||
// Thanks to user Madhu for this fix.
|
||||
string RclConfig::getPidfile() const
|
||||
{
|
||||
const char *p = getenv("XDG_RUNTIME_DIR");
|
||||
if (p) {
|
||||
string base = path_canon(p);
|
||||
string digest, hex;
|
||||
string cfdir = path_canon(getConfDir());
|
||||
path_catslash(cfdir);
|
||||
MD5String(cfdir, digest);
|
||||
MD5HexPrint(digest, hex);
|
||||
return path_cat(base, "/recoll-" + hex + "-index.pid");
|
||||
}
|
||||
return path_cat(getCacheDir(), "index.pid");
|
||||
static string fn;
|
||||
if (fn.empty()) {
|
||||
#ifndef _WIN32
|
||||
const char *p = getenv("XDG_RUNTIME_DIR");
|
||||
string rundir;
|
||||
if (nullptr == p) {
|
||||
// Problem is, we may have been launched outside the desktop, maybe by cron. Basing
|
||||
// everything on XDG_RUNTIME_DIR was a mistake, sometimes resulting in different pidfiles
|
||||
// being used by recollindex instances. So explicitely test for /run/user/$uid, still
|
||||
// leaving open the remote possibility that XDG_RUNTIME_DIR would be set to something
|
||||
// else...
|
||||
rundir = path_cat("/run/user", lltodecstr(getuid()));
|
||||
if (path_isdir(rundir)) {
|
||||
p = rundir.c_str();
|
||||
}
|
||||
}
|
||||
if (p) {
|
||||
string base = path_canon(p);
|
||||
string digest, hex;
|
||||
string cfdir = path_canon(getConfDir());
|
||||
path_catslash(cfdir);
|
||||
MD5String(cfdir, digest);
|
||||
MD5HexPrint(digest, hex);
|
||||
fn = path_cat(base, "recoll-" + hex + "-index.pid");
|
||||
goto out;
|
||||
}
|
||||
#endif // ! _WIN32
|
||||
|
||||
fn = path_cat(getCacheDir(), "index.pid");
|
||||
out:
|
||||
LOGINF("RclConfig: pid/lock file: " << fn << "\n");
|
||||
}
|
||||
return fn;
|
||||
}
|
||||
|
||||
|
||||
@ -1441,7 +1475,7 @@ static string path_diffstems(const string& p1, const string& p2,
|
||||
break;
|
||||
}
|
||||
}
|
||||
//cerr << "Common length = " << cl << endl;
|
||||
//cerr << "Common length = " << cl << "\n";
|
||||
if (cl == 0) {
|
||||
reason = "Input paths are empty or have no common part";
|
||||
return reason;
|
||||
@ -1473,13 +1507,12 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
||||
cur_confdir = m_confdir;
|
||||
}
|
||||
LOGDEB1("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
|
||||
" cur_confdir " << cur_confdir << endl);
|
||||
string reason = path_diffstems(orig_confdir, cur_confdir,
|
||||
confstemorg, confstemrep);
|
||||
" cur_confdir " << cur_confdir << "\n");
|
||||
string reason = path_diffstems(orig_confdir, cur_confdir, confstemorg, confstemrep);
|
||||
if (!reason.empty()) {
|
||||
LOGERR("urlrewrite: path_diffstems failed: " << reason <<
|
||||
" : orig_confdir [" << orig_confdir <<
|
||||
"] cur_confdir [" << cur_confdir << endl);
|
||||
"] cur_confdir [" << cur_confdir << "\n");
|
||||
confstemorg = confstemrep = "";
|
||||
}
|
||||
}
|
||||
@ -1487,8 +1520,7 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
||||
// Do path translations exist for this index ?
|
||||
bool needptrans = true;
|
||||
if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
|
||||
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
|
||||
m_ptrans << ")\n");
|
||||
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " << m_ptrans << ")\n");
|
||||
needptrans = false;
|
||||
}
|
||||
|
||||
@ -1639,6 +1671,7 @@ vector<string> RclConfig::getDaemSkippedPaths() const
|
||||
// and filtersdir from the config file to the PATH, then use execmd::which()
|
||||
string RclConfig::findFilter(const string &icmd) const
|
||||
{
|
||||
LOGDEB2("findFilter: " << icmd << "\n");
|
||||
// If the path is absolute, this is it
|
||||
if (path_isabsolute(icmd))
|
||||
return icmd;
|
||||
@ -1686,13 +1719,19 @@ bool RclConfig::processFilterCmd(std::vector<std::string>& cmd) const
|
||||
LOGDEB0("processFilterCmd: in: " << stringsToString(cmd) << "\n");
|
||||
auto it = cmd.begin();
|
||||
|
||||
// Special-case python and perl on windows: we need to also locate the
|
||||
// first argument which is the script name "python somescript.py".
|
||||
// On Unix, thanks to #!, we usually just run "somescript.py", but need
|
||||
// the same change if we ever want to use the same cmd line as windows
|
||||
bool hasinterp = !stringlowercmp("python", *it) ||
|
||||
!stringlowercmp("perl", *it);
|
||||
|
||||
#ifdef _WIN32
|
||||
// Special-case interpreters on windows: we used to have an additional 1st argument "python" in
|
||||
// mimeconf, but we now rely on the .py extension for better sharing of mimeconf.
|
||||
std::string ext = path_suffix(*it);
|
||||
if ("py" == ext) {
|
||||
it = cmd.insert(it, findFilter("python"));
|
||||
it++;
|
||||
} else if ("pl" == ext) {
|
||||
it = cmd.insert(it, findFilter("perl"));
|
||||
it++;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Note that, if the cmd vector size is 1, post-incrementing the
|
||||
// iterator in the following statement, which works on x86, leads
|
||||
// to a crash on ARM with gcc 6 and 8 (at least), which does not
|
||||
@ -1700,25 +1739,15 @@ bool RclConfig::processFilterCmd(std::vector<std::string>& cmd) const
|
||||
// whatever... We do it later then.
|
||||
*it = findFilter(*it);
|
||||
|
||||
if (hasinterp) {
|
||||
if (cmd.size() < 2) {
|
||||
LOGERR("processFilterCmd: python/perl cmd: no script?. [" <<
|
||||
stringsToString(cmd) << "]\n");
|
||||
return false;
|
||||
} else {
|
||||
++it;
|
||||
*it = findFilter(*it);
|
||||
}
|
||||
}
|
||||
LOGDEB0("processFilterCmd: out: " << stringsToString(cmd) << "\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RclConfig::pythonCmd(const std::string& scriptname,
|
||||
std::vector<std::string>& cmd) const
|
||||
// This now does nothing more than processFilterCmd (after we changed to relying on the py extension)
|
||||
bool RclConfig::pythonCmd(const std::string& scriptname, std::vector<std::string>& cmd) const
|
||||
{
|
||||
#ifdef _WIN32
|
||||
cmd = {"python", scriptname};
|
||||
cmd = {scriptname};
|
||||
#else
|
||||
cmd = {scriptname};
|
||||
#endif
|
||||
|
||||
@ -103,6 +103,14 @@ public:
|
||||
freeAll();
|
||||
}
|
||||
|
||||
RclConfig& operator=(const RclConfig &r) {
|
||||
if (this != &r) {
|
||||
freeAll();
|
||||
initFrom(r);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Return a writable clone of the main config. This belongs to the
|
||||
// caller (must delete it when done)
|
||||
ConfNull *cloneMainConfig();
|
||||
@ -251,12 +259,18 @@ public:
|
||||
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false,
|
||||
const std::string& fn = std::string());
|
||||
|
||||
/** For lines like: "name = some value; attr1 = value1; attr2 = val2"
|
||||
/** For lines like: [name = some value; attr1 = value1; attr2 = val2]
|
||||
* Separate the value and store the attributes in a ConfSimple
|
||||
* @param whole the raw value. No way to escape a semi-colon in there.
|
||||
*
|
||||
* In the value part, semi-colons inside double quotes are ignored, and double quotes are
|
||||
* conserved. In the common case where the string is then processed by stringToStrings() to
|
||||
* build a command line, this allows having semi-colons inside arguments. However, no backslash
|
||||
* escaping is possible, so that, for example "bla\"1;2\"" would not work (the value part
|
||||
* would stop at the semi-colon).
|
||||
*
|
||||
* @param whole the raw value.
|
||||
*/
|
||||
static bool valueSplitAttributes(const string& whole, string& value,
|
||||
ConfSimple& attrs) ;
|
||||
static bool valueSplitAttributes(const string& whole, string& value, ConfSimple& attrs) ;
|
||||
|
||||
/** Compute difference between 'base' and 'changed', as elements to be
|
||||
* added and substracted from base. Input and output strings are in
|
||||
@ -362,14 +376,6 @@ public:
|
||||
return o_origcwd;
|
||||
}
|
||||
|
||||
RclConfig& operator=(const RclConfig &r) {
|
||||
if (this != &r) {
|
||||
freeAll();
|
||||
initFrom(r);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend class ParamStale;
|
||||
|
||||
private:
|
||||
|
||||
@ -312,7 +312,7 @@ RclConfig *recollinit(int flags,
|
||||
#if defined(MACPORTS)
|
||||
PATH = string("/opt/local/bin/") + ":" + PATH;
|
||||
#elif defined(HOMEBREW)
|
||||
PATH = string("/usr/local/bin/") + ":" + PATH;
|
||||
PATH = string("/opt/homebrew/bin:/usr/local/bin/") + ":" + PATH;
|
||||
#else
|
||||
// Native qt build. Add our own directory to the path so that
|
||||
// recoll finds recollindex pkgdatadir:
|
||||
|
||||
@ -81,6 +81,8 @@ unsigned int TextSplit::o_CJKNgramLen{2};
|
||||
bool TextSplit::o_noNumbers{false};
|
||||
bool TextSplit::o_deHyphenate{false};
|
||||
int TextSplit::o_maxWordLength{40};
|
||||
int TextSplit::o_maxWordsInSpan{6};
|
||||
|
||||
static const int o_CJKMaxNgramLen{5};
|
||||
bool o_exthangultagger{false};
|
||||
|
||||
@ -90,6 +92,7 @@ static char underscoreatend = '_';
|
||||
void TextSplit::staticConfInit(RclConfig *config)
|
||||
{
|
||||
config->getConfParam("maxtermlength", &o_maxWordLength);
|
||||
config->getConfParam("maxwordsinspan", &o_maxWordsInSpan);
|
||||
|
||||
bool bvalue{false};
|
||||
if (config->getConfParam("nocjk", &bvalue) && bvalue == true) {
|
||||
@ -206,32 +209,26 @@ public:
|
||||
};
|
||||
static const CharClassInit charClassInitInstance;
|
||||
|
||||
static inline int whatcc(unsigned int c, char *asciirep = nullptr)
|
||||
static inline bool isvisiblewhite(int c)
|
||||
{
|
||||
return visiblewhite.find(c) != visiblewhite.end();
|
||||
}
|
||||
|
||||
static inline int whatcc(unsigned int c)
|
||||
{
|
||||
if (c <= 127) {
|
||||
return charclasses[c];
|
||||
} else {
|
||||
if (c == 0x2010) {
|
||||
// Special treatment for hyphen: handle as ascii minus. See
|
||||
// doc/notes/minus-hyphen-dash.txt
|
||||
if (asciirep)
|
||||
*asciirep = '-';
|
||||
return c;
|
||||
} else if (c == 0x2019 || c == 0x275c || c == 0x02bc) {
|
||||
// Things sometimes replacing a single quote. Use single
|
||||
// quote so that span processing works ok
|
||||
if (asciirep)
|
||||
*asciirep = '\'';
|
||||
if (c == 0x2010 || c == 0x2019 || c == 0x275c || c == 0x02bc) {
|
||||
return c;
|
||||
} else if (sskip.find(c) != sskip.end()) {
|
||||
return SKIP;
|
||||
} else if (spunc.find(c) != spunc.end()) {
|
||||
return SPACE;
|
||||
} else {
|
||||
vector<unsigned int>::iterator it =
|
||||
lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
|
||||
if (it == vpuncblocks.end())
|
||||
return LETTER;
|
||||
auto it = lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
|
||||
if (it == vpuncblocks.end())
|
||||
return LETTER;
|
||||
if (c == *it)
|
||||
return SPACE;
|
||||
if ((it - vpuncblocks.begin()) % 2 == 1) {
|
||||
@ -245,16 +242,16 @@ static inline int whatcc(unsigned int c, char *asciirep = nullptr)
|
||||
|
||||
// testing whatcc...
|
||||
#if 0
|
||||
unsigned int testvalues[] = {'a', '0', 0x80, 0xbf, 0xc0, 0x05c3, 0x1000,
|
||||
0x2000, 0x2001, 0x206e, 0x206f, 0x20d0, 0x2399,
|
||||
0x2400, 0x2401, 0x243f, 0x2440, 0xff65};
|
||||
int ntest = sizeof(testvalues) / sizeof(int);
|
||||
for (int i = 0; i < ntest; i++) {
|
||||
int ret = whatcc(testvalues[i]);
|
||||
printf("Tested value 0x%x, returned value %d %s\n",
|
||||
testvalues[i], ret, ret == LETTER ? "LETTER" :
|
||||
ret == SPACE ? "SPACE" : "OTHER");
|
||||
}
|
||||
unsigned int testvalues[] = {'a', '0', 0x80, 0xbf, 0xc0, 0x05c3, 0x1000,
|
||||
0x2000, 0x2001, 0x206e, 0x206f, 0x20d0, 0x2399,
|
||||
0x2400, 0x2401, 0x243f, 0x2440, 0xff65};
|
||||
int ntest = sizeof(testvalues) / sizeof(int);
|
||||
for (int i = 0; i < ntest; i++) {
|
||||
int ret = whatcc(testvalues[i]);
|
||||
printf("Tested value 0x%x, returned value %d %s\n",
|
||||
testvalues[i], ret, ret == LETTER ? "LETTER" :
|
||||
ret == SPACE ? "SPACE" : "OTHER");
|
||||
}
|
||||
#endif
|
||||
|
||||
// CJK Unicode character detection. CJK text is indexed using an n-gram
|
||||
@ -287,16 +284,16 @@ static inline int whatcc(unsigned int c, char *asciirep = nullptr)
|
||||
// FF00..FFEF; Halfwidth and Fullwidth Forms
|
||||
// 20000..2A6DF; CJK Unified Ideographs Extension B
|
||||
// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||
#define UNICODE_IS_CJK(p) \
|
||||
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
||||
((p) >= 0x2E80 && (p) <= 0x2EFF) || \
|
||||
((p) >= 0x3000 && (p) <= 0x9FFF) || \
|
||||
((p) >= 0xA700 && (p) <= 0xA71F) || \
|
||||
((p) >= 0xAC00 && (p) <= 0xD7AF) || \
|
||||
((p) >= 0xF900 && (p) <= 0xFAFF) || \
|
||||
((p) >= 0xFE30 && (p) <= 0xFE4F) || \
|
||||
((p) >= 0xFF00 && (p) <= 0xFFEF) || \
|
||||
((p) >= 0x20000 && (p) <= 0x2A6DF) || \
|
||||
#define UNICODE_IS_CJK(p) \
|
||||
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
||||
((p) >= 0x2E80 && (p) <= 0x2EFF) || \
|
||||
((p) >= 0x3000 && (p) <= 0x9FFF) || \
|
||||
((p) >= 0xA700 && (p) <= 0xA71F) || \
|
||||
((p) >= 0xAC00 && (p) <= 0xD7AF) || \
|
||||
((p) >= 0xF900 && (p) <= 0xFAFF) || \
|
||||
((p) >= 0xFE30 && (p) <= 0xFE4F) || \
|
||||
((p) >= 0xFF00 && (p) <= 0xFFEF) || \
|
||||
((p) >= 0x20000 && (p) <= 0x2A6DF) || \
|
||||
((p) >= 0x2F800 && (p) <= 0x2FA1F))
|
||||
|
||||
// We should probably map 'fullwidth ascii variants' and 'halfwidth
|
||||
@ -304,9 +301,9 @@ static inline int whatcc(unsigned int c, char *asciirep = nullptr)
|
||||
// filter, KuromojiNormalizeFilter.java
|
||||
// 309F is Hiragana.
|
||||
#ifdef KATAKANA_AS_WORDS
|
||||
#define UNICODE_IS_KATAKANA(p) \
|
||||
((p) != 0x309F && \
|
||||
(((p) >= 0x3099 && (p) <= 0x30FF) || \
|
||||
#define UNICODE_IS_KATAKANA(p) \
|
||||
((p) != 0x309F && \
|
||||
(((p) >= 0x3099 && (p) <= 0x30FF) || \
|
||||
((p) >= 0x31F0 && (p) <= 0x31FF)))
|
||||
#else
|
||||
#define UNICODE_IS_KATAKANA(p) false
|
||||
@ -315,14 +312,14 @@ static inline int whatcc(unsigned int c, char *asciirep = nullptr)
|
||||
#ifdef HANGUL_AS_WORDS
|
||||
// If no external tagger is configured, we process HANGUL as generic
|
||||
// cjk (n-grams)
|
||||
#define UNICODE_IS_HANGUL(p) ( \
|
||||
o_exthangultagger && \
|
||||
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
||||
((p) >= 0x3130 && (p) <= 0x318F) || \
|
||||
((p) >= 0x3200 && (p) <= 0x321e) || \
|
||||
((p) >= 0x3248 && (p) <= 0x327F) || \
|
||||
((p) >= 0x3281 && (p) <= 0x32BF) || \
|
||||
((p) >= 0xAC00 && (p) <= 0xD7AF)) \
|
||||
#define UNICODE_IS_HANGUL(p) ( \
|
||||
o_exthangultagger && \
|
||||
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
||||
((p) >= 0x3130 && (p) <= 0x318F) || \
|
||||
((p) >= 0x3200 && (p) <= 0x321e) || \
|
||||
((p) >= 0x3248 && (p) <= 0x327F) || \
|
||||
((p) >= 0x3281 && (p) <= 0x32BF) || \
|
||||
((p) >= 0xAC00 && (p) <= 0xD7AF)) \
|
||||
)
|
||||
#else
|
||||
#define UNICODE_IS_HANGUL(p) false
|
||||
@ -351,19 +348,16 @@ bool TextSplit::isNGRAMMED(int c)
|
||||
}
|
||||
|
||||
|
||||
// This is used to detect katakana/other transitions, which must
|
||||
// trigger a word split (there is not always a separator, and katakana
|
||||
// is otherwise treated like other, in the same routine, unless cjk
|
||||
// This is used to detect katakana/other transitions, which must trigger a word split (there is not
|
||||
// always a separator, and katakana is otherwise treated like other, in the same routine, unless cjk
|
||||
// which has its span reader causing a word break)
|
||||
enum CharSpanClass {CSC_HANGUL, CSC_CJK, CSC_KATAKANA, CSC_OTHER};
|
||||
std::vector<CharFlags> csc_names {CHARFLAGENTRY(CSC_HANGUL),
|
||||
CHARFLAGENTRY(CSC_CJK), CHARFLAGENTRY(CSC_KATAKANA),
|
||||
CHARFLAGENTRY(CSC_OTHER)};
|
||||
std::vector<CharFlags> csc_names {CHARFLAGENTRY(CSC_HANGUL), CHARFLAGENTRY(CSC_CJK),
|
||||
CHARFLAGENTRY(CSC_KATAKANA), CHARFLAGENTRY(CSC_OTHER)};
|
||||
|
||||
// Final term checkpoint: do some checking (the kind which is simpler
|
||||
// to do here than in the main loop), then send term to our client.
|
||||
inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
||||
size_t btstart, size_t btend)
|
||||
// Final term checkpoint: do some checking (the kind which is simpler to do here than in the main
|
||||
// loop), then send term to our client.
|
||||
inline bool TextSplit::emitterm(bool isspan, string &w, int pos, size_t btstart, size_t btend)
|
||||
{
|
||||
LOGDEB2("TextSplit::emitterm: [" << w << "] pos " << pos << "\n");
|
||||
|
||||
@ -378,39 +372,38 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
||||
PRETEND_USE(isspan);
|
||||
#endif
|
||||
|
||||
if (l > 0 && l <= o_maxWordLength) {
|
||||
// 1 byte word: we index single ascii letters and digits, but
|
||||
// nothing else. We might want to turn this into a test for a
|
||||
// single utf8 character instead ?
|
||||
if (l == 1) {
|
||||
unsigned int c = ((unsigned int)w[0]) & 0xff;
|
||||
if (charclasses[c] != A_ULETTER && charclasses[c] != A_LLETTER &&
|
||||
charclasses[c] != DIGIT &&
|
||||
(!(m_flags & TXTS_KEEPWILD) || charclasses[c] != WILD)
|
||||
) {
|
||||
//cerr << "ERASING single letter term " << c << endl;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (pos != m_prevpos || l != m_prevlen) {
|
||||
bool ret = takeword(w, pos, int(btstart), int(btend));
|
||||
m_prevpos = pos;
|
||||
m_prevlen = int(w.length());
|
||||
return ret;
|
||||
}
|
||||
LOGDEB2("TextSplit::emitterm:dup: [" << w << "] pos " << pos << "\n");
|
||||
if (l == 0 || l > o_maxWordLength) {
|
||||
return true;
|
||||
}
|
||||
if (l == 1) {
|
||||
// 1 byte word: we index single ascii letters and digits, but nothing else. We might want to
|
||||
// turn this into a test for a single utf8 character instead ?
|
||||
unsigned int c = ((unsigned int)w[0]) & 0xff;
|
||||
if (charclasses[c] != A_ULETTER && charclasses[c] != A_LLETTER &&
|
||||
charclasses[c] != DIGIT &&
|
||||
(!(m_flags & TXTS_KEEPWILD) || charclasses[c] != WILD)
|
||||
) {
|
||||
//cerr << "ERASING single letter term " << c << endl;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (pos != m_prevpos || l != m_prevlen) {
|
||||
bool ret = takeword(w, pos, int(btstart), int(btend));
|
||||
m_prevpos = pos;
|
||||
m_prevlen = int(w.length());
|
||||
return ret;
|
||||
}
|
||||
LOGDEB2("TextSplit::emitterm:dup: [" << w << "] pos " << pos << "\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an acronym/abbreviation ie I.B.M. This only works with
|
||||
// ascii (no non-ascii utf-8 acronym are possible)
|
||||
// Check for an acronym/abbreviation ie I.B.M. This only works with ascii (we do not detect
|
||||
// non-ascii utf-8 acronyms)
|
||||
bool TextSplit::span_is_acronym(string *acronym)
|
||||
{
|
||||
bool acron = false;
|
||||
|
||||
if (m_wordLen != m_span.length() &&
|
||||
m_span.length() > 2 && m_span.length() <= 20) {
|
||||
if (m_wordLen != m_span.length() && m_span.length() > 2 && m_span.length() <= 20) {
|
||||
acron = true;
|
||||
// Check odd chars are '.'
|
||||
for (unsigned int i = 1 ; i < m_span.length(); i += 2) {
|
||||
@ -439,27 +432,23 @@ bool TextSplit::span_is_acronym(string *acronym)
|
||||
}
|
||||
|
||||
|
||||
// Generate terms from span. Have to take into account the
|
||||
// flags: ONLYSPANS, NOSPANS, noNumbers
|
||||
// Generate terms from span. Have to take into account the flags: ONLYSPANS, NOSPANS, noNumbers
|
||||
bool TextSplit::words_from_span(size_t bp)
|
||||
{
|
||||
#if 0
|
||||
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
||||
m_words_in_span.size() << " : ";
|
||||
cerr << "Span: [" << m_span << "] " << " bp " << bp <<
|
||||
" w_i_s size: " << m_words_in_span.size() << " : ";
|
||||
for (unsigned int i = 0; i < m_words_in_span.size(); i++) {
|
||||
cerr << " [" << m_words_in_span[i].first << " " <<
|
||||
m_words_in_span[i].second << "] ";
|
||||
cerr << " [" << m_words_in_span[i].first << " " << m_words_in_span[i].second << "] ";
|
||||
|
||||
}
|
||||
cerr << endl;
|
||||
#endif
|
||||
int spanwords = int(m_words_in_span.size());
|
||||
// It seems that something like: tv_combo-sample_util.Po@am_quote
|
||||
// can get the splitter to call doemit with a span of '@' and
|
||||
// words_in_span==0, which then causes a crash when accessing
|
||||
// words_in_span[0] if the stl assertions are active (e.g. Fedora
|
||||
// RPM build). Not too sure what the right fix would be, but for
|
||||
// now, just defend against it
|
||||
// It seems that something like: tv_combo-sample_util.Po@am_quote can get the splitter to call
|
||||
// doemit with a span of '@' and words_in_span==0, which then causes a crash when accessing
|
||||
// words_in_span[0] if the stl assertions are active (e.g. Fedora RPM build). Not too sure what
|
||||
// the right fix would be, but for now, just defend against it
|
||||
if (spanwords == 0) {
|
||||
return true;
|
||||
}
|
||||
@ -467,21 +456,17 @@ bool TextSplit::words_from_span(size_t bp)
|
||||
// Byte position of the span start
|
||||
size_t spboffs = bp - m_span.size();
|
||||
|
||||
if (o_deHyphenate && spanwords == 2 &&
|
||||
m_span[m_words_in_span[0].second] == '-') {
|
||||
if (o_deHyphenate && spanwords == 2 && m_span[m_words_in_span[0].second] == '-') {
|
||||
unsigned int s0 = m_words_in_span[0].first;
|
||||
unsigned int l0 = m_words_in_span[0].second - m_words_in_span[0].first;
|
||||
unsigned int s1 = m_words_in_span[1].first;
|
||||
unsigned int l1 = m_words_in_span[1].second - m_words_in_span[1].first;
|
||||
string word = m_span.substr(s0, l0) + m_span.substr(s1, l1);
|
||||
if (l0 && l1)
|
||||
emitterm(false, word,
|
||||
m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
|
||||
emitterm(false, word, m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
|
||||
}
|
||||
|
||||
for (int i = 0;
|
||||
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
||||
i++) {
|
||||
for (int i = 0; i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); i++) {
|
||||
|
||||
int deb = m_words_in_span[i].first;
|
||||
bool noposinc = m_words_in_span[i].second == deb;
|
||||
@ -490,8 +475,7 @@ bool TextSplit::words_from_span(size_t bp)
|
||||
j++) {
|
||||
|
||||
int fin = m_words_in_span[j].second;
|
||||
//cerr << "i " << i << " j " << j << " deb " << deb <<
|
||||
//" fin " << fin << endl;
|
||||
//cerr << "i " << i << " j " << j << " deb " << deb << " fin " << fin << endl;
|
||||
if (fin - deb > int(m_span.size()))
|
||||
break;
|
||||
string word(m_span.substr(deb, fin-deb));
|
||||
@ -519,7 +503,7 @@ bool TextSplit::words_from_span(size_t bp)
|
||||
*
|
||||
* @return true if ok, false for error. Splitting should stop in this case.
|
||||
* @param spanerase Set if the current span is at its end. Process it.
|
||||
* @param bp The current BYTE position in the stream
|
||||
* @param bp The current BYTE position in the stream (it's beyond the current span data).
|
||||
*/
|
||||
inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
||||
{
|
||||
@ -532,7 +516,7 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
||||
if (m_wordLen) {
|
||||
// We have a current word. Remember it
|
||||
|
||||
if (m_words_in_span.size() >= 6) {
|
||||
if (int(m_words_in_span.size()) >= o_maxWordsInSpan) {
|
||||
// Limit max span word count
|
||||
spanerase = true;
|
||||
}
|
||||
@ -550,38 +534,13 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Span is done (too long or span-terminating character). Produce
|
||||
// terms and reset it.
|
||||
// Span is done (too long or span-terminating character). Produce terms and reset it.
|
||||
string acronym;
|
||||
if (span_is_acronym(&acronym)) {
|
||||
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Maybe trim at end. These are chars that we might keep
|
||||
// inside a span, but not at the end.
|
||||
string::size_type trimsz{0};
|
||||
while (trimsz < m_span.length()) {
|
||||
auto c = m_span[m_span.length() - 1 - trimsz];
|
||||
if (c == '.' || c == '-' || c == ',' || c == '@' || c == '\'' ||
|
||||
c == underscoreatend) {
|
||||
trimsz++;
|
||||
if (m_words_in_span.size() &&
|
||||
m_words_in_span.back().second > int(m_span.size())) {
|
||||
m_words_in_span.back().second = int(m_span.size());
|
||||
}
|
||||
if (--bp < 0) {
|
||||
bp = 0;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (trimsz > 0) {
|
||||
m_span.resize(m_span.length() - trimsz);
|
||||
}
|
||||
|
||||
if (!words_from_span(bp)) {
|
||||
return false;
|
||||
}
|
||||
@ -640,6 +599,7 @@ bool TextSplit::text_to_words(const string &in)
|
||||
clearsplitstate();
|
||||
|
||||
bool pagepending = false;
|
||||
bool nlpending = false;
|
||||
bool softhyphenpending = false;
|
||||
|
||||
// Running count of non-alphanum chars. Reset when we see one;
|
||||
@ -713,8 +673,7 @@ bool TextSplit::text_to_words(const string &in)
|
||||
prev_csc = csc;
|
||||
#endif
|
||||
|
||||
char asciirep = 0;
|
||||
int cc = whatcc(c, &asciirep);
|
||||
int cc = whatcc(c);
|
||||
|
||||
switch (cc) {
|
||||
case SKIP:
|
||||
@ -750,6 +709,10 @@ bool TextSplit::text_to_words(const string &in)
|
||||
pagepending = false;
|
||||
newpage(m_wordpos);
|
||||
}
|
||||
if (nlpending) {
|
||||
nlpending = false;
|
||||
newline(m_wordpos);
|
||||
}
|
||||
break;
|
||||
|
||||
case WILD:
|
||||
@ -773,7 +736,7 @@ bool TextSplit::text_to_words(const string &in)
|
||||
}
|
||||
} else if (m_inNumber) {
|
||||
if ((m_span[m_span.length() - 1] == 'e' ||
|
||||
m_span[m_span.length() - 1] == 'E')) {
|
||||
m_span[m_span.length() - 1] == 'E')) {
|
||||
if (isdigit(whatcc(it[it.getCpos()+1]), m_flags)) {
|
||||
m_wordLen += it.appendchartostring(m_span);
|
||||
STATS_INC_WORDCHARS;
|
||||
@ -781,17 +744,24 @@ bool TextSplit::text_to_words(const string &in)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int nextc = it[it.getCpos()+1];
|
||||
if (cc == '+') {
|
||||
int nextc = it[it.getCpos()+1];
|
||||
if (nextc == '+' || nextc == -1 || visiblewhite.find(nextc)
|
||||
!= visiblewhite.end()) {
|
||||
if (nextc == '+' || nextc == -1 || isvisiblewhite(nextc)) {
|
||||
// someword++[+...] !
|
||||
m_wordLen += it.appendchartostring(m_span);
|
||||
STATS_INC_WORDCHARS;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Treat '-' inside span as glue char
|
||||
// Note about dangling hyphens: we always strip '-' found before whitespace,
|
||||
// even before a newline, then generate two terms, before and after the line
|
||||
// break. We have no way to know if '-' is there because a word was broken by
|
||||
// justification or if it was part of an actual compound word (would need a
|
||||
// dictionary to check). As soft-hyphen *should* be used if the '-' is not part
|
||||
// of the text.
|
||||
if (nextc == -1 || isvisiblewhite(nextc)) {
|
||||
goto SPACE;
|
||||
}
|
||||
if (!doemit(false, it.getBpos()))
|
||||
return false;
|
||||
m_inNumber = false;
|
||||
@ -827,8 +797,7 @@ bool TextSplit::text_to_words(const string &in)
|
||||
m_inNumber = true;
|
||||
m_wordLen += it.appendchartostring(m_span);
|
||||
} else {
|
||||
m_words_in_span.
|
||||
push_back(pair<int,int>(m_wordStart, m_wordStart));
|
||||
m_words_in_span.push_back(pair<int,int>(m_wordStart, m_wordStart));
|
||||
m_wordStart += it.appendchartostring(m_span);
|
||||
}
|
||||
STATS_INC_WORDCHARS;
|
||||
@ -845,38 +814,28 @@ bool TextSplit::text_to_words(const string &in)
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x2010:
|
||||
case 0x2019:
|
||||
case 0x2010: // hyphen
|
||||
case 0x2019: // variations on single quote
|
||||
case 0x275c:
|
||||
case 0x02bc:
|
||||
// Unicode chars which we replace with ascii for
|
||||
// processing (2010 -> -,others -> '). It happens that
|
||||
// they all work as glue chars and use the same code, but
|
||||
// there might be cases needing different processing.
|
||||
// Hyphen is replaced with ascii minus
|
||||
if (m_wordLen) {
|
||||
// Inside span: glue char
|
||||
if (!doemit(false, it.getBpos()))
|
||||
return false;
|
||||
m_inNumber = false;
|
||||
m_span += asciirep;
|
||||
m_wordStart++;
|
||||
break;
|
||||
}
|
||||
goto SPACE;
|
||||
|
||||
case '@':
|
||||
case '_': // If underscoreasletter is set, we'll never get this
|
||||
case '\'':
|
||||
{
|
||||
// If in word, potential span: o'brien, jf@dockes.org,
|
||||
// else just ignore
|
||||
int nextc = it[it.getCpos()+1];
|
||||
if (nextc == -1 || isvisiblewhite(nextc)) {
|
||||
goto SPACE;
|
||||
}
|
||||
if (m_wordLen) {
|
||||
if (!doemit(false, it.getBpos()))
|
||||
return false;
|
||||
m_inNumber = false;
|
||||
m_wordStart += it.appendchartostring(m_span);
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case '#': {
|
||||
int w = whatcc(it[it.getCpos()+1]);
|
||||
@ -899,19 +858,10 @@ bool TextSplit::text_to_words(const string &in)
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
nlpending = true;
|
||||
/* FALLTHROUGH */
|
||||
case '\r':
|
||||
if (m_span.length() && *m_span.rbegin() == '-') {
|
||||
// if '-' is the last char before end of line, we
|
||||
// strip it. We have no way to know if this is added
|
||||
// because of the line split or if it was part of an
|
||||
// actual compound word (would need a dictionary to
|
||||
// check). As soft-hyphen *should* be used if the '-'
|
||||
// is not part of the text, it is better to properly
|
||||
// process a real compound word, and produce wrong
|
||||
// output from wrong text. The word-emitting routine
|
||||
// will strip the trailing '-'.
|
||||
goto SPACE;
|
||||
} else if (softhyphenpending) {
|
||||
if (softhyphenpending) {
|
||||
// Don't reset soft-hyphen
|
||||
continue;
|
||||
} else {
|
||||
@ -1107,7 +1057,7 @@ bool TextSplit::cjk_to_words(Utf8Iter& it, unsigned int *cp)
|
||||
|
||||
// Specialization for countWords
|
||||
class TextSplitCW : public TextSplit {
|
||||
public:
|
||||
public:
|
||||
int wcnt;
|
||||
TextSplitCW(Flags flags) : TextSplit(flags), wcnt(0) {}
|
||||
bool takeword(const string &, int, int, int) {
|
||||
@ -1132,7 +1082,7 @@ bool TextSplit::hasVisibleWhite(const string &in)
|
||||
LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
|
||||
return false;
|
||||
}
|
||||
if (visiblewhite.find(c) != visiblewhite.end())
|
||||
if (isvisiblewhite(c))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -1157,7 +1107,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case '"':
|
||||
case '"':
|
||||
switch(state) {
|
||||
case SPACE: state = INQUOTE; continue;
|
||||
case TOKEN: goto push_char;
|
||||
@ -1166,7 +1116,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
||||
state = SPACE; continue;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
case '\\':
|
||||
switch(state) {
|
||||
case SPACE:
|
||||
case TOKEN: state=TOKEN; goto push_char;
|
||||
@ -1175,25 +1125,25 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
||||
}
|
||||
break;
|
||||
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\r':
|
||||
switch(state) {
|
||||
case SPACE: continue;
|
||||
case TOKEN: tokens.push_back(current); current.clear();
|
||||
case SPACE: continue;
|
||||
case TOKEN: tokens.push_back(current); current.clear();
|
||||
state = SPACE; continue;
|
||||
case INQUOTE:
|
||||
case ESCAPE: goto push_char;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
default:
|
||||
switch(state) {
|
||||
case ESCAPE: state = INQUOTE; break;
|
||||
case SPACE: state = TOKEN; break;
|
||||
case TOKEN:
|
||||
case INQUOTE: break;
|
||||
case ESCAPE: state = INQUOTE; break;
|
||||
case SPACE: state = TOKEN; break;
|
||||
case TOKEN:
|
||||
case INQUOTE: break;
|
||||
}
|
||||
push_char:
|
||||
it.appendchartostring(current);
|
||||
@ -1214,4 +1164,3 @@ bool TextSplit::stringToStrings(const string &s, vector<string> &tokens)
|
||||
{
|
||||
return u8stringToStrings<vector<string> >(s, tokens);
|
||||
}
|
||||
|
||||
|
||||
@ -50,6 +50,8 @@ public:
|
||||
TextSplit(Flags flags = Flags(TXTS_NONE))
|
||||
: m_flags(flags) {}
|
||||
virtual ~TextSplit() {}
|
||||
TextSplit(const TextSplit&) = delete;
|
||||
TextSplit& operator=(const TextSplit&) = delete;
|
||||
|
||||
/** Call at program initialization to read non default values from the
|
||||
configuration */
|
||||
@ -71,6 +73,9 @@ public:
|
||||
* just don't know about pages. */
|
||||
virtual void newpage(int /*pos*/) {}
|
||||
|
||||
/** Called when we encounter newline \n 0x0a. Override to use the event. */
|
||||
virtual void newline(int /*pos*/) {}
|
||||
|
||||
// Static utility functions:
|
||||
|
||||
/** Count words in string, as the splitter would generate them */
|
||||
@ -157,12 +162,16 @@ private:
|
||||
static bool o_deHyphenate; // false
|
||||
static unsigned int o_CJKNgramLen; // 2
|
||||
static int o_maxWordLength; // 40
|
||||
static int o_maxWordsInSpan; // 6
|
||||
|
||||
Flags m_flags;
|
||||
|
||||
// Current span. Might be jf.dockes@wanadoo.f
|
||||
std::string m_span;
|
||||
|
||||
// Words in span: byte positions of start and end of words in m_span. For example:
|
||||
// 0 4 9
|
||||
// bill@some.com -> (0,4) (5,9) (10,13)
|
||||
std::vector <std::pair<int, int> > m_words_in_span;
|
||||
|
||||
// Current word: no punctuation at all in there. Byte offset
|
||||
|
||||
@ -133,7 +133,7 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
||||
unordered_map<string, string> args;
|
||||
|
||||
args.insert(pair<string,string>{"data", string()});
|
||||
string& inputdata{args.begin()->second};
|
||||
string& inputdata(args.begin()->second);
|
||||
|
||||
// We send the tagger name every time but it's only used the first
|
||||
// one: can't change it after init. We could avoid sending it
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2004-2019 J.F.Dockes
|
||||
/* Copyright (C) 2004-2021 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -29,7 +29,7 @@
|
||||
using namespace std;
|
||||
|
||||
bool unacmaybefold(const string &in, string &out,
|
||||
const char *encoding, UnacOp what)
|
||||
const char *encoding, UnacOp what)
|
||||
{
|
||||
char *cout = 0;
|
||||
size_t out_len;
|
||||
@ -37,16 +37,13 @@ bool unacmaybefold(const string &in, string &out,
|
||||
|
||||
switch (what) {
|
||||
case UNACOP_UNAC:
|
||||
status = unac_string(encoding, in.c_str(), in.length(),
|
||||
&cout, &out_len);
|
||||
status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||
break;
|
||||
case UNACOP_UNACFOLD:
|
||||
status = unacfold_string(encoding, in.c_str(), in.length(),
|
||||
&cout, &out_len);
|
||||
status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||
break;
|
||||
case UNACOP_FOLD:
|
||||
status = fold_string(encoding, in.c_str(), in.length(),
|
||||
&cout, &out_len);
|
||||
status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@ -34,6 +34,8 @@ class WebStore {
|
||||
public:
|
||||
WebStore(RclConfig *config);
|
||||
~WebStore();
|
||||
WebStore(const WebStore&) = delete;
|
||||
WebStore& operator=(const WebStore&) = delete;
|
||||
|
||||
bool getFromCache(const std::string& udi, Rcl::Doc &doc, std::string& data,
|
||||
std::string *hittype = 0);
|
||||
|
||||
287
src/configure.ac
287
src/configure.ac
@ -1,7 +1,7 @@
|
||||
AC_INIT([Recoll], m4_esyscmd_s(cat VERSION))
|
||||
AC_INIT([Recoll],[m4_esyscmd_s(cat RECOLL-VERSION.txt)])
|
||||
AC_CONFIG_HEADERS([common/autoconfig.h])
|
||||
AH_BOTTOM([#include "conf_post.h"])
|
||||
AC_PREREQ(2.53)
|
||||
AC_PREREQ([2.69])
|
||||
AC_CONFIG_SRCDIR(index/recollindex.cpp)
|
||||
|
||||
AM_INIT_AUTOMAKE([1.10 no-define subdir-objects foreign])
|
||||
@ -21,7 +21,7 @@ if test C$CXX = C ; then
|
||||
AC_MSG_ERROR([C++ compiler needed. Please install one (ie: gnu g++)])
|
||||
fi
|
||||
AC_LANG_PUSH([C++])
|
||||
AC_TRY_LINK([],[], rcl_link_ok=yes, rcl_link_ok=no)
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[]])],[rcl_link_ok=yes],[rcl_link_ok=no])
|
||||
if test "$rcl_link_ok" = "no" ; then
|
||||
AC_MSG_ERROR([No working C++ compiler was found])
|
||||
fi
|
||||
@ -42,7 +42,7 @@ esac
|
||||
|
||||
AC_PROG_YACC
|
||||
|
||||
AC_PROG_LIBTOOL
|
||||
LT_INIT
|
||||
AC_C_BIGENDIAN
|
||||
|
||||
AC_SYS_LARGEFILE
|
||||
@ -53,8 +53,7 @@ AC_CHECK_HEADERS([sys/param.h, spawn.h])
|
||||
|
||||
if test "x$ac_cv_func_posix_spawn" = xyes; then :
|
||||
AC_ARG_ENABLE(posix_spawn,
|
||||
AC_HELP_STRING([--enable-posix_spawn],
|
||||
[Enable the use of posix_spawn().]),
|
||||
AS_HELP_STRING([--enable-posix_spawn],[Enable the use of posix_spawn().]),
|
||||
posixSpawnEnabled=$enableval, posixSpawnEnabled=no)
|
||||
fi
|
||||
if test X$posixSpawnEnabled = Xyes ; then
|
||||
@ -69,11 +68,35 @@ AC_CHECK_HEADERS([sys/mount.h sys/statfs.h sys/statvfs.h sys/vfs.h malloc.h mall
|
||||
|
||||
AC_CHECK_FUNCS([posix_spawn setrlimit kqueue vsnprintf malloc_trim posix_fadvise])
|
||||
|
||||
AC_CHECK_FUNCS(mkdtemp)
|
||||
AC_CHECK_LIB([pthread], [pthread_create], [], [])
|
||||
AC_SEARCH_LIBS([dlopen], [dl], [], [])
|
||||
if test X$ac_cv_search_function != Xno ; then
|
||||
AC_DEFINE(HAVE_DLOPEN, 1, [dlopen function is available])
|
||||
fi
|
||||
AC_CHECK_LIB([z], [zlibVersion], [], [])
|
||||
|
||||
############# Putenv
|
||||
AC_MSG_CHECKING(for type of string parameter to putenv)
|
||||
AC_LANG_PUSH([C++])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdlib.h>
|
||||
]], [[
|
||||
putenv((const char *)0);
|
||||
]])],[rcl_putenv_string_const="1"],[rcl_putenv_string_const="0"])
|
||||
if test X$rcl_putenv_string_const = X1 ; then
|
||||
AC_DEFINE(PUTENV_ARG_CONST, 1, [putenv parameter is const])
|
||||
fi
|
||||
AC_LANG_POP([C++])
|
||||
|
||||
|
||||
PKG_CHECK_MODULES([XSLT], [libxslt], [], AC_MSG_ERROR([libxslt]))
|
||||
|
||||
|
||||
# Use specific 'file' command ? (Useful on solaris to specify
|
||||
# /usr/local/bin/file instead of the system's which doesn't understand '-i'
|
||||
AC_ARG_WITH(file-command,
|
||||
AC_HELP_STRING([--with-file-command],
|
||||
[Specify version of 'file' command (ie: --with-file-command=/usr/local/bin/file)]),
|
||||
AS_HELP_STRING([--with-file-command],[Specify version of 'file' command (ie: --with-file-command=/usr/local/bin/file)]),
|
||||
withFileCommand=$withval, withFileCommand=file)
|
||||
case $withFileCommand in
|
||||
file)
|
||||
@ -96,8 +119,7 @@ AC_DEFINE(USE_SYSTEM_FILE_COMMAND, 1,
|
||||
# we do compile the aspell module using an internal copy of aspell.h
|
||||
# Only --with-aspell=no will completely disable aspell support
|
||||
AC_ARG_WITH(aspell,
|
||||
AC_HELP_STRING([--without-aspell],
|
||||
[Disable use of aspell spelling package to provide term expansion to other spellings]),
|
||||
AS_HELP_STRING([--without-aspell],[Disable use of aspell spelling package to provide term expansion to other spellings]),
|
||||
withAspell=$withval, withAspell=yes)
|
||||
case $withAspell in
|
||||
no);;
|
||||
@ -126,8 +148,7 @@ fi
|
||||
|
||||
# Real time monitoring with inotify
|
||||
AC_ARG_WITH(inotify,
|
||||
AC_HELP_STRING([--with-inotify],
|
||||
[Use inotify for almost real time indexing of modified files (the default
|
||||
AS_HELP_STRING([--with-inotify],[Use inotify for almost real time indexing of modified files (the default
|
||||
is yes on Linux).]),
|
||||
withInotify=$withval, withInotify=$inot_default)
|
||||
|
||||
@ -141,8 +162,7 @@ fi
|
||||
|
||||
# Real time monitoring with FAM
|
||||
AC_ARG_WITH(fam,
|
||||
AC_HELP_STRING([--with-fam],
|
||||
[Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
|
||||
AS_HELP_STRING([--with-fam],[Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
|
||||
withFam=$withval, withFam=yes)
|
||||
|
||||
if test X$withFam != Xno -a X$withInotify != Xno ; then
|
||||
@ -206,21 +226,15 @@ if test X$idxthreadsEnabled = Xyes ; then
|
||||
AC_DEFINE(IDX_THREADS, 1, [Use multiple threads for indexing])
|
||||
fi
|
||||
|
||||
AC_ARG_ENABLE(testmains,
|
||||
AC_HELP_STRING([--enable-testmains],
|
||||
[Enable building small test drivers. These are not unit tests.]),
|
||||
buildtestmains=$enableval, buildtestmains=no)
|
||||
AM_CONDITIONAL([COND_TESTMAINS], [test "$buildtestmains" = yes])
|
||||
|
||||
# Enable CamelCase word splitting. This is optional because it causes
|
||||
# problems with phrases: with camelcase enabled, "MySQL manual"
|
||||
# will be matched by "MySQL manual" and "my sql manual" but not
|
||||
# "mysql manual" (which would need increased slack as manual is now at pos
|
||||
# 2 instead of 1
|
||||
AC_ARG_ENABLE(camelcase,
|
||||
AC_HELP_STRING([--enable-camelcase],
|
||||
[Enable splitting camelCase words. This is not enabled by default as
|
||||
this makes phrase matches more difficult: you need to use matching
|
||||
AS_HELP_STRING([--enable-camelcase],
|
||||
[Enable splitting camelCase words. This is not enabled by default as
|
||||
it makes phrase matches more difficult: you need to use matching
|
||||
case in the phrase query to get a match. Ie querying for
|
||||
"MySQL manual" and "my sql manual" are the same, but not the same as
|
||||
"mysql manual" (in phrases only and you could raise the phrase slack to
|
||||
@ -230,109 +244,46 @@ if test X$camelcaseEnabled = Xyes ; then
|
||||
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
|
||||
fi
|
||||
|
||||
|
||||
AC_ARG_ENABLE(testmains,
|
||||
AS_HELP_STRING([--enable-testmains],[Enable building small test drivers. These are not unit tests.]),
|
||||
buildtestmains=$enableval, buildtestmains=no)
|
||||
AM_CONDITIONAL([COND_TESTMAINS], [test "$buildtestmains" = yes])
|
||||
|
||||
AC_ARG_ENABLE(rclgrep,
|
||||
AS_HELP_STRING([--enable-rclgrep],[Enable building the index-less search tool.]),
|
||||
buildrclgrep=$enableval, buildrclgrep=no)
|
||||
AM_CONDITIONAL([COND_RCLGREP], [test "$buildrclgrep" = yes])
|
||||
|
||||
# Disable building the python module.
|
||||
AC_ARG_ENABLE(python-module,
|
||||
AC_HELP_STRING([--disable-python-module],
|
||||
[Do not build the Python module.]),
|
||||
AS_HELP_STRING([--disable-python-module],[Do not build the Python module.]),
|
||||
pythonEnabled=$enableval, pythonEnabled=yes)
|
||||
|
||||
AM_CONDITIONAL(MAKEPYTHON, [test X$pythonEnabled = Xyes])
|
||||
|
||||
# Disable building the libchm python wrapper
|
||||
AC_ARG_ENABLE(python-chm, AC_HELP_STRING([--disable-python-chm],
|
||||
[Do not build the libchm Python wrapper.]),
|
||||
AC_ARG_ENABLE(python-chm,
|
||||
AS_HELP_STRING([--disable-python-chm], [Do not build the libchm Python wrapper.]),
|
||||
pythonChmEnabled=$enableval, pythonChmEnabled=yes)
|
||||
|
||||
if test X$pythonChmEnabled = Xyes; then
|
||||
AC_CHECK_LIB([chm], [chm_resolve_object], [],
|
||||
[AC_MSG_ERROR([--enable-python-chm is set but libchm is not found])])
|
||||
fi
|
||||
|
||||
AM_CONDITIONAL(MAKEPYTHONCHM, [test X$pythonChmEnabled = Xyes])
|
||||
|
||||
|
||||
AC_CHECK_FUNCS(mkdtemp)
|
||||
AC_CHECK_LIB([pthread], [pthread_create], [], [])
|
||||
AC_SEARCH_LIBS([dlopen], [dl], [], [])
|
||||
if test X$ac_cv_search_function != Xno ; then
|
||||
AC_DEFINE(HAVE_DLOPEN, 1, [dlopen function is available])
|
||||
fi
|
||||
AC_CHECK_LIB([z], [zlibVersion], [], [])
|
||||
|
||||
############# Putenv
|
||||
AC_MSG_CHECKING(for type of string parameter to putenv)
|
||||
AC_LANG_PUSH([C++])
|
||||
AC_TRY_COMPILE([
|
||||
#include <stdlib.h>
|
||||
],[
|
||||
putenv((const char *)0);
|
||||
], rcl_putenv_string_const="1", rcl_putenv_string_const="0")
|
||||
if test X$rcl_putenv_string_const = X1 ; then
|
||||
AC_DEFINE(PUTENV_ARG_CONST, 1, [putenv parameter is const])
|
||||
fi
|
||||
AC_LANG_POP([C++])
|
||||
|
||||
|
||||
#### Look for Xapian. Done in a strange way to work around autoconf
|
||||
# cache
|
||||
XAPIAN_CONFIG=${XAPIAN_CONFIG:-no}
|
||||
if test "$XAPIAN_CONFIG" = "no"; then
|
||||
AC_PATH_PROG(XAPIAN_CONFIG0, [xapian-config], no)
|
||||
XAPIAN_CONFIG=$XAPIAN_CONFIG0
|
||||
fi
|
||||
if test "$XAPIAN_CONFIG" = "no"; then
|
||||
AC_PATH_PROG(XAPIAN_CONFIG1, [xapian-config-1.3], no)
|
||||
XAPIAN_CONFIG=$XAPIAN_CONFIG1
|
||||
fi
|
||||
if test "$XAPIAN_CONFIG" = "no"; then
|
||||
AC_PATH_PROG(XAPIAN_CONFIG2, [xapian-config-1.1], no)
|
||||
XAPIAN_CONFIG=$XAPIAN_CONFIG2
|
||||
fi
|
||||
|
||||
if test "$XAPIAN_CONFIG" = "no" ; then
|
||||
AC_MSG_ERROR([Cannot find xapian-config command in $PATH. Is
|
||||
xapian-core installed ?])
|
||||
exit 1
|
||||
fi
|
||||
LIBXAPIAN=`$XAPIAN_CONFIG --libs`
|
||||
# The --static thing fails with older Xapians. Happily enough they don't
|
||||
# need it either (because there are no needed libraries (no uuid and we
|
||||
# deal explicitly with libz)
|
||||
LIBXAPIANSTATICEXTRA=`$XAPIAN_CONFIG --static --libs 2> /dev/null`
|
||||
# Workaround for problem in xapian-config in some versions: wrongly lists
|
||||
# libstdc++.la in the lib list
|
||||
for i in $LIBXAPIAN ; do
|
||||
case $i in
|
||||
*stdc++*|-lm|-lgcc_s|-lc);;
|
||||
*) tmpxaplib="$tmpxaplib $i";;
|
||||
esac
|
||||
done
|
||||
LIBXAPIAN=$tmpxaplib
|
||||
LIBXAPIANDIR=`$XAPIAN_CONFIG --libs | awk '{print $1}'`
|
||||
case A"$LIBXAPIANDIR" in
|
||||
A-L*) LIBXAPIANDIR=`echo $LIBXAPIANDIR | sed -e 's/-L//'`;;
|
||||
*) LIBXAPIANDIR="";;
|
||||
esac
|
||||
XAPIANCXXFLAGS=`$XAPIAN_CONFIG --cxxflags`
|
||||
|
||||
#echo XAPIAN_CONFIG: $XAPIAN_CONFIG
|
||||
#echo LIBXAPIAN: $LIBXAPIAN
|
||||
#echo LIBXAPIANDIR: $LIBXAPIANDIR
|
||||
#echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA
|
||||
#echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS
|
||||
|
||||
|
||||
PKG_CHECK_MODULES([XSLT], [libxslt], [], AC_MSG_ERROR([libxslt]))
|
||||
AC_ARG_ENABLE(indexer,
|
||||
AS_HELP_STRING([--disable-indexer],[Disable building the recollindex indexer.]),
|
||||
enableINDEXER=$enableval, enableINDEXER="yes")
|
||||
AM_CONDITIONAL(MAKEINDEXER, [test X$enableINDEXER = Xyes])
|
||||
|
||||
AC_ARG_ENABLE(xadump,
|
||||
AC_HELP_STRING([--enable-xadump],
|
||||
[Enable building the xadump low level Xapian access program.]),
|
||||
AS_HELP_STRING([--enable-xadump],[Enable building the xadump low level Xapian access program.]),
|
||||
enableXADUMP=$enableval, enableXADUMP="no")
|
||||
AM_CONDITIONAL(MAKEXADUMP, [test X$enableXADUMP = Xyes])
|
||||
|
||||
AC_ARG_ENABLE(userdoc,
|
||||
AC_HELP_STRING([--disable-userdoc],
|
||||
[Disable building the user manual. (Avoids the need for docbook xml/xsl files and TeX tools.]),
|
||||
AS_HELP_STRING([--disable-userdoc],[Disable building the user manual. (Avoids the need for docbook xml/xsl files and TeX tools.]),
|
||||
enableUserdoc=$enableval, enableUserdoc="yes")
|
||||
AM_CONDITIONAL(MAKEUSERDOC, [test X$enableUserdoc = Xyes])
|
||||
|
||||
@ -360,14 +311,12 @@ AM_CONDITIONAL(MAKEUSERDOC, [test X$enableUserdoc = Xyes])
|
||||
# will have failed, and we tell the user to check his environment.
|
||||
#
|
||||
AC_ARG_ENABLE(qtgui,
|
||||
AC_HELP_STRING([--disable-qtgui],
|
||||
[Disable the QT-based graphical user interface.]),
|
||||
AS_HELP_STRING([--disable-qtgui],[Disable the QT-based graphical user interface.]),
|
||||
enableQT=$enableval, enableQT="yes")
|
||||
AM_CONDITIONAL(MAKEQT, [test X$enableQT = Xyes])
|
||||
|
||||
AC_ARG_ENABLE(recollq,
|
||||
AC_HELP_STRING([--enable-recollq],
|
||||
[Enable building the recollq command line query tool (recoll -t without
|
||||
AS_HELP_STRING([--enable-recollq],[Enable building the recollq command line query tool (recoll -t without
|
||||
need for Qt). This is done by default if --disable-qtgui is set but this
|
||||
option enables forcing it.]),
|
||||
enableRECOLLQ=$enableval, enableRECOLLQ="no")
|
||||
@ -398,28 +347,11 @@ if test X$enableQT = Xyes ; then
|
||||
qt development files and tools and/or set the QTDIR environment variable?])
|
||||
fi
|
||||
QMAKE=$QMAKEPATH
|
||||
|
||||
# Check Qt version
|
||||
qmakevers="`${QMAKE} --version 2>&1`"
|
||||
#echo "qmake version: $qmakevers"
|
||||
v4=`expr "$qmakevers" : '.*Qt[ ][ ]*version[ ][ ]*4.*'`
|
||||
v5=`expr "$qmakevers" : '.*Qt[ ][ ]*version[ ][ ]*5.*'`
|
||||
if test X$v4 = X0 -a X$v5 = X0; then
|
||||
AC_MSG_ERROR([Bad qt/qmake version string (not 4 or 5?): $qmakevers])
|
||||
else
|
||||
if test X$v4 != X0 ; then
|
||||
AC_MSG_ERROR([Qt version (from qmake found with QMAKE/QTDIR/PATH) is 4 but Recoll now needs version 5])
|
||||
else
|
||||
AC_MSG_NOTICE([using qt version 5 user interface])
|
||||
fi
|
||||
QTGUI=qtgui
|
||||
fi
|
||||
|
||||
QTGUI=qtgui
|
||||
|
||||
##### Using Qt webkit for reslist display? Else Qt textbrowser
|
||||
AC_ARG_ENABLE(webkit,
|
||||
AC_HELP_STRING([--disable-webkit],
|
||||
[Disable use of qt-webkit (only meaningful if qtgui is enabled).]),
|
||||
AS_HELP_STRING([--disable-webkit],[Disable use of qt-webkit (only meaningful if qtgui is enabled).]),
|
||||
enableWebkit=$enableval, enableWebkit="yes")
|
||||
|
||||
if test "$enableWebkit" = "yes" ; then
|
||||
@ -431,8 +363,7 @@ if test X$enableQT = Xyes ; then
|
||||
fi
|
||||
|
||||
AC_ARG_ENABLE(webengine,
|
||||
AC_HELP_STRING([--enable-webengine],
|
||||
[Enable use of qt-webengine (only meaningful if qtgui is enabled), in
|
||||
AS_HELP_STRING([--enable-webengine],[Enable use of qt-webengine (only meaningful if qtgui is enabled), in
|
||||
place or qt-webkit.]),
|
||||
enableWebengine=$enableval, enableWebengine="no")
|
||||
|
||||
@ -448,8 +379,7 @@ if test X$enableQT = Xyes ; then
|
||||
|
||||
##### Using QZeitGeist lib ? Default no for now
|
||||
AC_ARG_WITH(qzeitgeist,
|
||||
AC_HELP_STRING([--with-qzeitgeist],
|
||||
[Enable the use of the qzeitgeist library to send zeitgeist events.]),
|
||||
AS_HELP_STRING([--with-qzeitgeist],[Enable the use of the qzeitgeist library to send zeitgeist events.]),
|
||||
withQZeitgeist=$withval, withQZeitgeist="no")
|
||||
|
||||
case "$withQZeitgeist" in
|
||||
@ -466,22 +396,73 @@ if test X$enableQT = Xyes ; then
|
||||
QMAKE_DISABLE_ZEITGEIST=""
|
||||
fi
|
||||
|
||||
# Retain debugging symbols in GUI recoll ? This makes it enormous (~50MB)
|
||||
AC_ARG_ENABLE(guidebug,
|
||||
AS_HELP_STRING([--enable-guidebug],[Generate and retain debug symbols in GUI program (makes the file very big).]),
|
||||
enableGuiDebug=$enableval, enableGuiDebug="no")
|
||||
|
||||
if test "$enableGuiDebug" = "yes" ; then
|
||||
QMAKE_ENABLE_GUIDEBUG=""
|
||||
else
|
||||
QMAKE_ENABLE_GUIDEBUG="#"
|
||||
fi
|
||||
|
||||
AC_CONFIG_FILES($QTGUI/recoll.pro)
|
||||
|
||||
##################### End QT stuff
|
||||
fi
|
||||
|
||||
### X11: this is needed for the session monitoring code (in recollindex -m)
|
||||
AC_ARG_ENABLE(x11mon,
|
||||
AC_HELP_STRING([--disable-x11mon],
|
||||
[Disable recollindex support for X11 session monitoring.]),
|
||||
enableX11mon=$enableval, enableX11mon="yes")
|
||||
dnl Borrow a macro definition from pkg.config,
|
||||
dnl for older installs that lack it.
|
||||
m4_ifndef([PKG_CHECK_VAR], [
|
||||
dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
|
||||
dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
|
||||
dnl -------------------------------------------
|
||||
dnl Retrieves the value of the pkg-config variable for the given module.
|
||||
AC_DEFUN([PKG_CHECK_VAR],
|
||||
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
|
||||
AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
|
||||
|
||||
if test X$withInotify = Xno -a X$withFam = Xno ; then
|
||||
enableX11mon=no
|
||||
_PKG_CONFIG([$1], [variable="][$3]["], [$2])
|
||||
AS_VAR_COPY([$1], [pkg_cv_][$1])
|
||||
|
||||
AS_VAR_IF([$1], [""], [$5], [$4])dnl
|
||||
])dnl PKG_CHECK_VAR
|
||||
])
|
||||
|
||||
### Systemd
|
||||
AC_ARG_WITH([systemd],
|
||||
AS_HELP_STRING([--without-systemd],[Disable installation of the systemd unit files.]))
|
||||
AC_ARG_WITH([system-unit-dir],
|
||||
AS_HELP_STRING([--with-system-unit-dir=DIR],[Install location for systemd system unit files]),
|
||||
[SYSTEMD_SYSTEM_UNIT_DIR="$withval"],
|
||||
[PKG_CHECK_VAR([SYSTEMD_SYSTEM_UNIT_DIR], [systemd], [systemdsystemunitdir])])
|
||||
AC_ARG_WITH([user-unit-dir],
|
||||
AS_HELP_STRING([--with-user-unit-dir=DIR],[Install location for systemd user unit files]),
|
||||
[SYSTEMD_USER_UNIT_DIR="$withval"],
|
||||
[PKG_CHECK_VAR([SYSTEMD_USER_UNIT_DIR], [systemd], [systemduserunitdir])])
|
||||
|
||||
if test X$enableINDEXER = Xno -o "x$SYSTEMD_SYSTEM_UNIT_DIR" = "x" -o \
|
||||
"x$SYSTEMD_USER_UNIT_DIR" = "x"; then
|
||||
with_systemd="no"
|
||||
fi
|
||||
|
||||
if test "$enableX11mon" = "yes" ; then
|
||||
AM_CONDITIONAL([INSTALL_SYSTEMD_UNITS], [test "X$with_systemd" != "Xno"])
|
||||
|
||||
### X11: this is needed for the session monitoring code (in recollindex -m)
|
||||
AC_ARG_ENABLE(x11mon,
|
||||
AS_HELP_STRING([--disable-x11mon],[Disable recollindex support for X11 session monitoring.]),
|
||||
enableX11mon=$enableval, enableX11mon="yes")
|
||||
|
||||
if test X$enableINDEXER = Xno ; then
|
||||
enableX11mon=no
|
||||
else
|
||||
if test X$withInotify = Xno -a X$withFam = Xno ; then
|
||||
enableX11mon=no
|
||||
fi
|
||||
fi
|
||||
|
||||
if test "$enableX11mon" = yes ; then
|
||||
AC_PATH_XTRA
|
||||
X_LIBX11=-lX11
|
||||
else
|
||||
@ -491,6 +472,17 @@ fi
|
||||
#echo X_CFLAGS "'$X_CFLAGS'" X_PRE_LIBS "'$X_PRE_LIBS'" X_LIBS \
|
||||
# "'$X_LIBS'" X_LIBX11 "'$X_LIBX11'" X_EXTRA_LIBS "'$X_EXTRA_LIBS'"
|
||||
|
||||
# Check if anything needs Xapian. We also need to build the shared lib if this is the case.
|
||||
xapian_needed=yes
|
||||
if test X$buildtestmains = Xno -a X$pythonEnabled = Xno -a X$enableINDEXER = Xno \
|
||||
-a X$enableXADUMP = Xno -a X$enableQT = Xno -a X$enableRECOLLQ = Xno ; then
|
||||
xapian_needed=no
|
||||
fi
|
||||
if test X$xapian_needed = Xyes; then
|
||||
PKG_CHECK_MODULES([XAPIAN], xapian-core, [], AC_MSG_ERROR([libxapian]))
|
||||
fi
|
||||
AM_CONDITIONAL([MAKE_RECOLL_LIB], [test X$xapian_needed = Xyes])
|
||||
|
||||
# For communicating the value of RECOLL_DATADIR to non-make-based
|
||||
# subpackages like python-recoll, we have to expand prefix in here, because
|
||||
# things like "datadir = ${prefix}/share" (which is what we'd get by
|
||||
@ -514,17 +506,17 @@ AC_SUBST(X_LIBX11)
|
||||
AC_SUBST(X_EXTRA_LIBS)
|
||||
AC_SUBST(INCICONV)
|
||||
AC_SUBST(LIBICONV)
|
||||
AC_SUBST(LIBXAPIAN)
|
||||
AC_SUBST(LIBXAPIANDIR)
|
||||
AC_SUBST(LIBXAPIANSTATICEXTRA)
|
||||
AC_SUBST(XAPIAN_LIBS)
|
||||
AC_SUBST(XAPIAN_CFLAGS)
|
||||
AC_SUBST(LIBFAM)
|
||||
AC_SUBST(QMAKE)
|
||||
AC_SUBST(QTGUI)
|
||||
AC_SUBST(XAPIANCXXFLAGS)
|
||||
AC_SUBST(QMAKE_ENABLE_WEBKIT)
|
||||
AC_SUBST(QMAKE_DISABLE_WEBKIT)
|
||||
AC_SUBST(QMAKE_ENABLE_WEBENGINE)
|
||||
AC_SUBST(QMAKE_DISABLE_WEBENGINE)
|
||||
AC_SUBST(QMAKE_ENABLE_GUIDEBUG)
|
||||
AC_SUBST(QMAKE_DISABLE_GUIDEBUG)
|
||||
AC_SUBST(QMAKE_ENABLE_ZEITGEIST)
|
||||
AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
|
||||
AC_SUBST(LIBQZEITGEIST)
|
||||
@ -532,6 +524,8 @@ AC_SUBST(RCLVERSION)
|
||||
AC_SUBST(RCLLIBVERSION)
|
||||
AC_SUBST(XSLT_CFLAGS)
|
||||
AC_SUBST(XSLT_LIBS)
|
||||
AC_SUBST([SYSTEMD_SYSTEM_UNIT_DIR])
|
||||
AC_SUBST([SYSTEMD_USER_UNIT_DIR])
|
||||
|
||||
AC_CONFIG_FILES([Makefile python/recoll/setup.py
|
||||
python/pychm/setup.py])
|
||||
@ -539,5 +533,8 @@ AC_CONFIG_FILES([Makefile python/recoll/setup.py
|
||||
if test X$buildtestmains = Xyes ; then
|
||||
AC_CONFIG_FILES([testmains/Makefile])
|
||||
fi
|
||||
if test X$buildrclgrep = Xyes ; then
|
||||
AC_CONFIG_FILES([rclgrep/Makefile])
|
||||
fi
|
||||
|
||||
AC_OUTPUT
|
||||
|
||||
12
src/doc/man/rclgrep.1
Normal file
12
src/doc/man/rclgrep.1
Normal file
@ -0,0 +1,12 @@
|
||||
.TH RCLGREP 1 "20 September 2022"
|
||||
.SH NAME
|
||||
rclgrep \- grep-like program based on the recoll data extraction functions
|
||||
.SH SYNOPSIS
|
||||
.B rclgrep
|
||||
[
|
||||
.B \--config
|
||||
<configdir>
|
||||
]
|
||||
|
||||
.SH DESCRIPTION
|
||||
Some bla bla
|
||||
@ -148,7 +148,7 @@ not set, the daemon uses skippedPaths.
|
||||
.TP
|
||||
.BI "zipUseSkippedNames = "bool
|
||||
Use skippedNames inside Zip archives. Fetched
|
||||
directly by the rclzip handler. Skip the patterns defined by skippedNames
|
||||
directly by the rclzip.py handler. Skip the patterns defined by skippedNames
|
||||
inside Zip archives. Can be redefined for subdirectories.
|
||||
See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
||||
|
||||
@ -195,7 +195,7 @@ lets you turn off md5 computation for selected types. It is global (no
|
||||
redefinition for subtrees). At the moment, it only has an effect for
|
||||
external handlers (exec and execm). The file types can be specified by
|
||||
listing either MIME types (e.g. audio/mpeg) or handler names
|
||||
(e.g. rclaudio).
|
||||
(e.g. rclaudio.py).
|
||||
.TP
|
||||
.BI "compressedfilemaxkbs = "int
|
||||
Size limit for compressed
|
||||
@ -613,8 +613,7 @@ location before copy, to allow path translation computations. For
|
||||
example if a dataset originally indexed as '/home/me/mydata/config' has
|
||||
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
||||
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
||||
curidxconfdir (as set in the copied configuration) would be
|
||||
'/media/me/mydata/config'.
|
||||
curidxconfdir (as set in the copied configuration) would be '/media/me/mydata/config'.
|
||||
.TP
|
||||
.BI "idxrundir = "dfn
|
||||
Indexing process current directory. The input
|
||||
|
||||
@ -59,6 +59,10 @@ recollq \- command line / standard output Recoll query command.
|
||||
.B \-F
|
||||
<quoted space separated field name list>
|
||||
]
|
||||
[
|
||||
.B \--extract-to
|
||||
<file path>
|
||||
]
|
||||
<query string>
|
||||
|
||||
.B recollq \-P
|
||||
@ -120,9 +124,10 @@ sorts the results according to the specified field. Use
|
||||
for descending order.
|
||||
.PP
|
||||
.B \-n
|
||||
<cnt>
|
||||
<[first-]cnt>
|
||||
can be used to set the maximum number of results that should be
|
||||
printed. The default is 2000. Use a value of 0 for no limit.
|
||||
printed. The default is 2000. Use a value of 0 for no limit. If the argument is of the form
|
||||
first-cnt, it also defines the first result to output (from 0).
|
||||
.PP
|
||||
.B \-s
|
||||
<language>
|
||||
@ -144,6 +149,11 @@ base64 and separated by one space character. Empty fields are indicated by
|
||||
consecutive space characters. There is one additional space character at
|
||||
the end of each line.
|
||||
.PP
|
||||
.B \--extract-to
|
||||
<file path>
|
||||
Will extract the first result document of the query to the argument path, which must not exist. Use
|
||||
-n first-cnt to select the document.
|
||||
.PP
|
||||
.B recollq \-P
|
||||
(Period) will print the minimum and maximum modification years for
|
||||
documents in the index.
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
#XSLDIR="/opt/local/share/xsl/docbook-xsl/"
|
||||
#Linux
|
||||
XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"
|
||||
UTILBUILDS=/home/dockes/tmp/builds/medocutils/
|
||||
|
||||
|
||||
# Options common to the single-file and chunked versions
|
||||
@ -48,11 +49,10 @@ index.html: usermanual.xml recoll.conf.xml
|
||||
usermanual.pdf: usermanual.xml recoll.conf.xml
|
||||
dblatex --xslt-opts="--xinclude" -tpdf $<
|
||||
|
||||
UTILBUILDS=/home/dockes/tmp/builds/medocutils/
|
||||
recoll-conf-xml:
|
||||
$(UTILBUILDS)/confxml --docbook \
|
||||
recoll.conf.xml: ../../sampleconf/recoll.conf
|
||||
test -x $(UTILBUILDS)/confxml && $(UTILBUILDS)/confxml --docbook \
|
||||
--idprefix=RCL.INSTALL.CONFIG.RECOLLCONF \
|
||||
../../sampleconf/recoll.conf > recoll.conf.xml
|
||||
../../sampleconf/recoll.conf > recoll.conf.xml || touch recoll.conf.xml
|
||||
|
||||
# Generating a restructured text version, for uploading to readthedocs.
|
||||
# Does not really work, the internal links are botched. pandoc
|
||||
@ -65,7 +65,7 @@ recoll-conf-xml:
|
||||
# script.
|
||||
# Also could not get readthedocs to generate the left pane TOC? could
|
||||
# probably be fixed...
|
||||
#usermanual-rst: recoll-conf-xml
|
||||
#usermanual-rst: recoll.conf.xml
|
||||
# tail -n +2 recoll.conf.xml > rcl-conf-tail.xml
|
||||
# sed -e '/xi:include/r rcl-conf-tail.xml' \
|
||||
# < usermanual.xml > full-man.xml
|
||||
|
||||
@ -8,28 +8,34 @@
|
||||
<listitem><para>Space-separated list of files or
|
||||
directories to recursively index. Default to ~ (indexes
|
||||
$HOME). You can use symbolic links in the list, they will be followed,
|
||||
independently of the value of the followLinks variable.</para></listitem></varlistentry>
|
||||
independently of the value of the followLinks variable.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONITORDIRS">
|
||||
<term><varname>monitordirs</varname></term>
|
||||
<listitem><para>Space-separated list of files or directories to monitor for
|
||||
updates. When running the real-time indexer, this allows monitoring only a
|
||||
subset of the whole indexed area. The elements must be included in the
|
||||
tree defined by the 'topdirs' members.</para></listitem></varlistentry>
|
||||
tree defined by the 'topdirs' members.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES">
|
||||
<term><varname>skippedNames</varname></term>
|
||||
<listitem><para>Files and directories which should be ignored.
|
||||
White space separated list of wildcard patterns (simple ones, not paths,
|
||||
must contain no / ), which will be tested against file and directory
|
||||
names. The list in the default configuration does not exclude hidden
|
||||
directories (names beginning with a dot), which means that it may index
|
||||
quite a few things that you do not want. On the other hand, email user
|
||||
agents like Thunderbird usually store messages in hidden directories, and
|
||||
you probably want this indexed. One possible solution is to have ".*" in
|
||||
"skippedNames", and add things like "~/.thunderbird" "~/.evolution" to
|
||||
"topdirs". Not even the file names are indexed for patterns in this
|
||||
list, see the "noContentSuffixes" variable for an alternative approach
|
||||
which indexes the file names. Can be redefined for any
|
||||
subtree.</para></listitem></varlistentry>
|
||||
<listitem><para>Files and directories which should be ignored. White space separated list of wildcard patterns (simple ones, not paths, must contain no
|
||||
'/' characters), which will be tested against file and directory names.
|
||||
</para><para>
|
||||
Have a look at the default configuration for the initial value, some entries may not suit your
|
||||
situation. The easiest way to see it is through the GUI Index configuration "local parameters"
|
||||
panel.
|
||||
</para><para>
|
||||
The list in the default configuration does not exclude hidden directories (names beginning with a
|
||||
dot), which means that it may index quite a few things that you do not want. On the other hand,
|
||||
email user agents like Thunderbird usually store messages in hidden directories, and you probably
|
||||
want this indexed. One possible solution is to have ".*" in "skippedNames", and add things like
|
||||
"~/.thunderbird" "~/.evolution" to "topdirs".
|
||||
</para><para>
|
||||
Not even the file names are indexed for patterns in this list, see the "noContentSuffixes"
|
||||
variable for an alternative approach which indexes the file names. Can be redefined for any
|
||||
subtree.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES-">
|
||||
<term><varname>skippedNames-</varname></term>
|
||||
<listitem><para>List of name endings to remove from the default skippedNames
|
||||
@ -42,7 +48,8 @@ list. </para></listitem></varlistentry>
|
||||
<term><varname>onlyNames</varname></term>
|
||||
<listitem><para>Regular file name filter patterns If this is set, only the file names not in skippedNames and
|
||||
matching one of the patterns will be considered for indexing. Can be
|
||||
redefined per subtree. Does not apply to directories.</para></listitem></varlistentry>
|
||||
redefined per subtree. Does not apply to directories.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES">
|
||||
<term><varname>noContentSuffixes</varname></term>
|
||||
<listitem><para>List of name endings (not necessarily dot-separated suffixes) for
|
||||
@ -53,7 +60,8 @@ which will go away in a future release (the move from mimemap to
|
||||
recoll.conf allows editing the list through the GUI). This is different
|
||||
from skippedNames because these are name ending matches only (not
|
||||
wildcard patterns), and the file name itself gets indexed normally. This
|
||||
can be redefined for subdirectories.</para></listitem></varlistentry>
|
||||
can be redefined for subdirectories.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES-">
|
||||
<term><varname>noContentSuffixes-</varname></term>
|
||||
<listitem><para>List of name endings to remove from the default noContentSuffixes
|
||||
@ -64,19 +72,26 @@ list. </para></listitem></varlistentry>
|
||||
list. </para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHS">
|
||||
<term><varname>skippedPaths</varname></term>
|
||||
<listitem><para>Absolute paths we should not go into. Space-separated list of wildcard expressions for absolute
|
||||
filesystem paths. Must be defined at the top level of the configuration
|
||||
file, not in a subsection. Can contain files and directories. The database and
|
||||
configuration directories will automatically be added. The expressions
|
||||
are matched using 'fnmatch(3)' with the FNM_PATHNAME flag set by
|
||||
default. This means that '/' characters must be matched explicitly. You
|
||||
can set 'skippedPathsFnmPathname' to 0 to disable the use of FNM_PATHNAME
|
||||
(meaning that '/*/dir3' will match '/dir1/dir2/dir3'). The default value
|
||||
contains the usual mount point for removable media to remind you that it
|
||||
is a bad idea to have Recoll work on these (esp. with the monitor: media
|
||||
gets indexed on mount, all data gets erased on unmount). Explicitly
|
||||
adding '/media/xxx' to the 'topdirs' variable will override
|
||||
this.</para></listitem></varlistentry>
|
||||
<listitem><para>Absolute paths we should not go into. Space-separated list of wildcard expressions for absolute filesystem paths (for files or
|
||||
directories). The variable must be defined at the top level of the configuration file, not in a
|
||||
subsection.
|
||||
</para><para>
|
||||
Any value in the list must be textually consistent with the values in topdirs, no attempts are
|
||||
made to resolve symbolic links. In practise, if, as is frequently the case, /home is a link to
|
||||
/usr/home, your default topdirs will have a single entry '~' which will be translated to
|
||||
'/home/yourlogin'. In this case, any skippedPaths entry should start with '/home/yourlogin' *not*
|
||||
with '/usr/home/yourlogin'.
|
||||
</para><para>
|
||||
The index and configuration directories will automatically be added to the list.
|
||||
</para><para>
|
||||
The expressions are matched using 'fnmatch(3)' with the FNM_PATHNAME flag set by default. This
|
||||
means that '/' characters must be matched explicitly. You can set 'skippedPathsFnmPathname' to 0
|
||||
to disable the use of FNM_PATHNAME (meaning that '/*/dir3' will match '/dir1/dir2/dir3').
|
||||
</para><para>
|
||||
The default value contains the usual mount point for removable media to remind you that it is in
|
||||
most cases a bad idea to have Recoll work on these Explicitly adding '/media/xxx' to the 'topdirs'
|
||||
variable will override this.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHSFNMPATHNAME">
|
||||
<term><varname>skippedPathsFnmPathname</varname></term>
|
||||
<listitem><para>Set to 0 to
|
||||
@ -85,17 +100,19 @@ paths. </para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOWALKFN">
|
||||
<term><varname>nowalkfn</varname></term>
|
||||
<listitem><para>File name which will cause its parent directory to be skipped. Any directory containing a file with this name will be skipped as
|
||||
if it was part of the skippedPaths list. Ex: .recoll-noindex</para></listitem></varlistentry>
|
||||
if it was part of the skippedPaths list. Ex: .recoll-noindex
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMSKIPPEDPATHS">
|
||||
<term><varname>daemSkippedPaths</varname></term>
|
||||
<listitem><para>skippedPaths equivalent specific to
|
||||
real time indexing. This enables having parts of the tree
|
||||
which are initially indexed but not monitored. If daemSkippedPaths is
|
||||
not set, the daemon uses skippedPaths.</para></listitem></varlistentry>
|
||||
not set, the daemon uses skippedPaths.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ZIPUSESKIPPEDNAMES">
|
||||
<term><varname>zipUseSkippedNames</varname></term>
|
||||
<listitem><para>Use skippedNames inside Zip archives. Fetched
|
||||
directly by the rclzip handler. Skip the patterns defined by skippedNames
|
||||
directly by the rclzip.py handler. Skip the patterns defined by skippedNames
|
||||
inside Zip archives. Can be redefined for subdirectories.
|
||||
See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
||||
</para></listitem></varlistentry>
|
||||
@ -117,7 +134,8 @@ multiple indexing of linked files. No effort is made to avoid duplication
|
||||
when this option is set to true. This option can be set individually for
|
||||
each of the 'topdirs' members by using sections. It can not be changed
|
||||
below the 'topdirs' level. Links in the 'topdirs' list itself are always
|
||||
followed.</para></listitem></varlistentry>
|
||||
followed.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXEDMIMETYPES">
|
||||
<term><varname>indexedmimetypes</varname></term>
|
||||
<listitem><para>Restrictive list of
|
||||
@ -126,14 +144,16 @@ supported types are indexed). If it is set, only the types from the list
|
||||
will have their contents indexed. The names will be indexed anyway if
|
||||
indexallfilenames is set (default). MIME type names should be taken from
|
||||
the mimemap file (the values may be different from xdg-mime or file -i
|
||||
output in some cases). Can be redefined for subtrees.</para></listitem></varlistentry>
|
||||
output in some cases). Can be redefined for subtrees.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.EXCLUDEDMIMETYPES">
|
||||
<term><varname>excludedmimetypes</varname></term>
|
||||
<listitem><para>List of excluded MIME
|
||||
types. Lets you exclude some types from indexing. MIME type
|
||||
names should be taken from the mimemap file (the values may be different
|
||||
from xdg-mime or file -i output in some cases) Can be redefined for
|
||||
subtrees.</para></listitem></varlistentry>
|
||||
subtrees.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOMD5TYPES">
|
||||
<term><varname>nomd5types</varname></term>
|
||||
<listitem><para>Don't compute md5 for these types. md5 checksums are used only for deduplicating results, and can be
|
||||
@ -142,32 +162,43 @@ lets you turn off md5 computation for selected types. It is global (no
|
||||
redefinition for subtrees). At the moment, it only has an effect for
|
||||
external handlers (exec and execm). The file types can be specified by
|
||||
listing either MIME types (e.g. audio/mpeg) or handler names
|
||||
(e.g. rclaudio).</para></listitem></varlistentry>
|
||||
(e.g. rclaudio.py).
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.COMPRESSEDFILEMAXKBS">
|
||||
<term><varname>compressedfilemaxkbs</varname></term>
|
||||
<listitem><para>Size limit for compressed
|
||||
files. We need to decompress these in a
|
||||
temporary directory for identification, which can be wasteful in some
|
||||
cases. Limit the waste. Negative means no limit. 0 results in no
|
||||
processing of any compressed file. Default 50 MB.</para></listitem></varlistentry>
|
||||
processing of any compressed file. Default 100 MB.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS">
|
||||
<term><varname>textfilemaxmbs</varname></term>
|
||||
<listitem><para>Size limit for text
|
||||
files. Mostly for skipping monster
|
||||
logs. Default 20 MB.</para></listitem></varlistentry>
|
||||
<listitem><para>Size limit for text files. Mostly for skipping monster logs. Default 20 MB. Use a value of -1 to
|
||||
disable.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTUNKNOWNASPLAIN">
|
||||
<term><varname>textunknownasplain</varname></term>
|
||||
<listitem><para>Process unknown text/xxx files as text/plain Allows indexing misc. text files identified as text/whatever by 'file' or 'xdg-mime'
|
||||
without having to explicitely set config entries for them. This works fine for indexing (but will
|
||||
cause processing of a lot of garbage though), but the documents indexed this way will be opened by
|
||||
the desktop viewer, even if text/plain has a specific editor.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES">
|
||||
<term><varname>indexallfilenames</varname></term>
|
||||
<listitem><para>Index the file names of
|
||||
unprocessed files Index the names of files the contents of
|
||||
which we don't index because of an excluded or unsupported MIME
|
||||
type.</para></listitem></varlistentry>
|
||||
type.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.USESYSTEMFILECOMMAND">
|
||||
<term><varname>usesystemfilecommand</varname></term>
|
||||
<listitem><para>Use a system command
|
||||
for file MIME type guessing as a final step in file type
|
||||
identification This is generally useful, but will usually
|
||||
cause the indexing of many bogus 'text' files. See 'systemfilecommand'
|
||||
for the command used.</para></listitem></varlistentry>
|
||||
for the command used.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SYSTEMFILECOMMAND">
|
||||
<term><varname>systemfilecommand</varname></term>
|
||||
<listitem><para>Command used to guess
|
||||
@ -175,12 +206,14 @@ MIME types if the internal methods fails This should be a
|
||||
"file -i" workalike. The file path will be added as a last parameter to
|
||||
the command line. "xdg-mime" works better than the traditional "file"
|
||||
command, and is now the configured default (with a hard-coded fallback to
|
||||
"file")</para></listitem></varlistentry>
|
||||
"file")
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PROCESSWEBQUEUE">
|
||||
<term><varname>processwebqueue</varname></term>
|
||||
<listitem><para>Decide if we process the
|
||||
Web queue. The queue is a directory where the Recoll Web
|
||||
browser plugins create the copies of visited pages.</para></listitem></varlistentry>
|
||||
browser plugins create the copies of visited pages.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEPAGEKBS">
|
||||
<term><varname>textfilepagekbs</varname></term>
|
||||
<listitem><para>Page size for text
|
||||
@ -189,12 +222,14 @@ into documents of approximately this size. Will reduce memory usage at
|
||||
index time and help with loading data in the preview window at query
|
||||
time. Particularly useful with very big files, such as application or
|
||||
system logs. Also see textfilemaxmbs and
|
||||
compressedfilemaxkbs.</para></listitem></varlistentry>
|
||||
compressedfilemaxkbs.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MEMBERMAXKBS">
|
||||
<term><varname>membermaxkbs</varname></term>
|
||||
<listitem><para>Size limit for archive
|
||||
members. This is passed to the filters in the environment
|
||||
as RECOLL_FILTER_MAXMEMBERKB.</para></listitem></varlistentry>
|
||||
as RECOLL_FILTER_MAXMEMBERKB.
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.TERMS">
|
||||
<title>Parameters affecting how we generate terms and organize the index </title><variablelist>
|
||||
@ -206,28 +241,34 @@ searches sensitive to case and diacritics can be performed, but the index
|
||||
will be bigger, and some marginal weirdness may sometimes occur. The
|
||||
default is a stripped index. When using multiple indexes for a search,
|
||||
this parameter must be defined identically for all. Changing the value
|
||||
implies an index reset.</para></listitem></varlistentry>
|
||||
implies an index reset.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTOREDOCTEXT">
|
||||
<term><varname>indexStoreDocText</varname></term>
|
||||
<listitem><para>Decide if we store the
|
||||
documents' text content in the index. Storing the text
|
||||
allows extracting snippets from it at query time, instead of building
|
||||
them from index position data.
|
||||
</para><para>
|
||||
Newer Xapian index formats have rendered our use of positions list
|
||||
unacceptably slow in some cases. The last Xapian index format with good
|
||||
performance for the old method is Chert, which is default for 1.2, still
|
||||
supported but not default in 1.4 and will be dropped in 1.6.
|
||||
</para><para>
|
||||
The stored document text is translated from its original format to UTF-8
|
||||
plain text, but not stripped of upper-case, diacritics, or punctuation
|
||||
signs. Storing it increases the index size by 10-20% typically, but also
|
||||
allows for nicer snippets, so it may be worth enabling it even if not
|
||||
strictly needed for performance if you can afford the space.
|
||||
</para><para>
|
||||
The variable only has an effect when creating an index, meaning that the
|
||||
xapiandb directory must not exist yet. Its exact effect depends on the
|
||||
Xapian version.
|
||||
</para><para>
|
||||
For Xapian 1.4, if the variable is set to 0, the Chert format will be
|
||||
used, and the text will not be stored. If the variable is 1, Glass will
|
||||
be used, and the text stored.
|
||||
</para><para>
|
||||
For Xapian 1.2, and for versions after 1.5 and newer, the index format is
|
||||
always the default, but the variable controls if the text is stored or
|
||||
not, and the abstract generation method. With Xapian 1.5 and later, and
|
||||
@ -244,26 +285,31 @@ still be). Numbers are often quite interesting to search for, and this
|
||||
should probably not be set except for special situations, ie, scientific
|
||||
documents with huge amounts of numbers in them, where setting nonumbers
|
||||
will reduce the index size. This can only be set for a whole index, not
|
||||
for a subtree.</para></listitem></varlistentry>
|
||||
for a subtree.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEHYPHENATE">
|
||||
<term><varname>dehyphenate</varname></term>
|
||||
<listitem><para>Determines if we index 'coworker'
|
||||
also when the input is 'co-worker'. This is new
|
||||
in version 1.22, and on by default. Setting the variable to off allows
|
||||
restoring the previous behaviour.</para></listitem></varlistentry>
|
||||
restoring the previous behaviour.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.BACKSLASHASLETTER">
|
||||
<term><varname>backslashasletter</varname></term>
|
||||
<listitem><para>Process backslash as normal letter. This may make sense for people wanting to index TeX commands as
|
||||
such but is not of much general use.</para></listitem></varlistentry>
|
||||
such but is not of much general use.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNDERSCOREASLETTER">
|
||||
<term><varname>underscoreasletter</varname></term>
|
||||
<listitem><para>Process underscore as normal letter. This makes sense in so many cases that one wonders if it should
|
||||
not be the default.</para></listitem></varlistentry>
|
||||
not be the default.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMLENGTH">
|
||||
<term><varname>maxtermlength</varname></term>
|
||||
<listitem><para>Maximum term length. Words longer than this will be discarded.
|
||||
The default is 40 and used to be hard-coded, but it can now be
|
||||
adjusted. You need an index reset if you change the value.</para></listitem></varlistentry>
|
||||
adjusted. You need an index reset if you change the value.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCJK">
|
||||
<term><varname>nocjk</varname></term>
|
||||
<listitem><para>Decides if specific East Asian
|
||||
@ -271,20 +317,23 @@ adjusted. You need an index reset if you change the value.</para></listitem></va
|
||||
off. This will save a small amount of CPU if you have no CJK
|
||||
documents. If your document base does include such text but you are not
|
||||
interested in searching it, setting nocjk may be a
|
||||
significant time and space saver.</para></listitem></varlistentry>
|
||||
significant time and space saver.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CJKNGRAMLEN">
|
||||
<term><varname>cjkngramlen</varname></term>
|
||||
<listitem><para>This lets you adjust the size of
|
||||
n-grams used for indexing CJK text. The default value of 2 is
|
||||
probably appropriate in most cases. A value of 3 would allow more precision
|
||||
and efficiency on longer words, but the index will be approximately twice
|
||||
as large.</para></listitem></varlistentry>
|
||||
as large.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTEMMINGLANGUAGES">
|
||||
<term><varname>indexstemminglanguages</varname></term>
|
||||
<listitem><para>Languages for which to create stemming expansion
|
||||
data. Stemmer names can be found by executing 'recollindex
|
||||
-l', or this can also be set from a list in the GUI. The values are full
|
||||
language names, e.g. english, french...</para></listitem></varlistentry>
|
||||
language names, e.g. english, french...
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEFAULTCHARSET">
|
||||
<term><varname>defaultcharset</varname></term>
|
||||
<listitem><para>Default character
|
||||
@ -295,37 +344,39 @@ set, the default character set is the one defined by the NLS environment
|
||||
($LC_ALL, $LC_CTYPE, $LANG), or ultimately iso-8859-1 (cp-1252 in fact).
|
||||
If for some reason you want a general default which does not match your
|
||||
LANG and is not 8859-1, use this variable. This can be redefined for any
|
||||
sub-directory.</para></listitem></varlistentry>
|
||||
sub-directory.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNAC_EXCEPT_TRANS">
|
||||
<term><varname>unac_except_trans</varname></term>
|
||||
<listitem><para>A list of characters,
|
||||
encoded in UTF-8, which should be handled specially
|
||||
when converting text to unaccented lowercase. For
|
||||
example, in Swedish, the letter a with diaeresis has full alphabet
|
||||
citizenship and should not be turned into an a.
|
||||
Each element in the space-separated list has the special character as
|
||||
first element and the translation following. The handling of both the
|
||||
lowercase and upper-case versions of a character should be specified, as
|
||||
appartenance to the list will turn-off both standard accent and case
|
||||
processing. The value is global and affects both indexing and querying.
|
||||
<listitem><para>A list of characters, encoded in UTF-8, which should be handled specially when converting
|
||||
text to unaccented lowercase. For example, in Swedish, the letter a with diaeresis has full alphabet citizenship and
|
||||
should not be turned into an a. Each element in the space-separated list has the special
|
||||
character as first element and the translation following. The handling of both the lowercase and
|
||||
upper-case versions of a character should be specified, as appartenance to the list will turn-off
|
||||
both standard accent and case processing. The value is global and affects both indexing and
|
||||
querying. We also convert a few confusing Unicode characters (quotes, hyphen) to their ASCII
|
||||
equivalent to avoid "invisible" search failures.
|
||||
</para><para>
|
||||
Examples:
|
||||
Swedish:
|
||||
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl åå Åå
|
||||
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl åå Åå ’' ❜' ʼ' ‐-
|
||||
. German:
|
||||
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl
|
||||
In French, you probably want to decompose oe and ae and nobody would type
|
||||
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl ’' ❜' ʼ' ‐-
|
||||
. French: you probably want to decompose oe and ae and nobody would type
|
||||
a German ß
|
||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
|
||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl ’' ❜' ʼ' ‐-
|
||||
. The default for all until someone protests follows. These decompositions
|
||||
are not performed by unac, but it is unlikely that someone would type the
|
||||
composed forms in a search.
|
||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl</para></listitem></varlistentry>
|
||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl ’' ❜' ʼ' ‐-
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAILDEFCHARSET">
|
||||
<term><varname>maildefcharset</varname></term>
|
||||
<listitem><para>Overrides the default
|
||||
character set for email messages which don't specify
|
||||
one. This is mainly useful for readpst (libpst) dumps,
|
||||
which are utf-8 but do not say so.</para></listitem></varlistentry>
|
||||
which are utf-8 but do not say so.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOCALFIELDS">
|
||||
<term><varname>localfields</varname></term>
|
||||
<listitem><para>Set fields on all files
|
||||
@ -333,7 +384,8 @@ which are utf-8 but do not say so.</para></listitem></varlistentry>
|
||||
name = value ; attr1 = val1 ; [...]
|
||||
value is empty so this needs an initial semi-colon. This is useful, e.g.,
|
||||
for setting the rclaptg field for application selection inside
|
||||
mimeview.</para></listitem></varlistentry>
|
||||
mimeview.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESTMODIFUSEMTIME">
|
||||
<term><varname>testmodifusemtime</varname></term>
|
||||
<listitem><para>Use mtime instead of
|
||||
@ -355,12 +407,12 @@ undetected). Perform a full index reset after changing this.
|
||||
<term><varname>noxattrfields</varname></term>
|
||||
<listitem><para>Disable extended attributes
|
||||
conversion to metadata fields. This probably needs to be
|
||||
set if testmodifusemtime is set.</para></listitem></varlistentry>
|
||||
set if testmodifusemtime is set.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.METADATACMDS">
|
||||
<term><varname>metadatacmds</varname></term>
|
||||
<listitem><para>Define commands to
|
||||
gather external metadata, e.g. tmsu tags.
|
||||
There can be several entries, separated by semi-colons, each defining
|
||||
gather external metadata, e.g. tmsu tags. There can be several entries, separated by semi-colons, each defining
|
||||
which field name the data goes into and the command to use. Don't forget the
|
||||
initial semi-colon. All the field names must be different. You can use
|
||||
aliases in the "field" file if necessary.
|
||||
@ -385,13 +437,15 @@ cachedir is ~/.cache/recoll, the default dbdir would be
|
||||
mboxcachedir, aspellDicDir, which can still be individually specified to
|
||||
override cachedir. Note that if you have multiple configurations, each
|
||||
must have a different cachedir, there is no automatic computation of a
|
||||
subpath under cachedir.</para></listitem></varlistentry>
|
||||
subpath under cachedir.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXFSOCCUPPC">
|
||||
<term><varname>maxfsoccuppc</varname></term>
|
||||
<listitem><para>Maximum file system occupation
|
||||
over which we stop indexing. The value is a percentage,
|
||||
corresponding to what the "Capacity" df output column shows. The default
|
||||
value is 0, meaning no checking.</para></listitem></varlistentry>
|
||||
value is 0, meaning no checking.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DBDIR">
|
||||
<term><varname>dbdir</varname></term>
|
||||
<listitem><para>Xapian database directory
|
||||
@ -399,36 +453,43 @@ location. This will be created on first indexing. If the
|
||||
value is not an absolute path, it will be interpreted as relative to
|
||||
cachedir if set, or the configuration directory (-c argument or
|
||||
$RECOLL_CONFDIR). If nothing is specified, the default is then
|
||||
~/.recoll/xapiandb/</para></listitem></varlistentry>
|
||||
~/.recoll/xapiandb/
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXSTATUSFILE">
|
||||
<term><varname>idxstatusfile</varname></term>
|
||||
<listitem><para>Name of the scratch file where the indexer process updates its
|
||||
status. Default: idxstatus.txt inside the configuration
|
||||
directory.</para></listitem></varlistentry>
|
||||
directory.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEDIR">
|
||||
<term><varname>mboxcachedir</varname></term>
|
||||
<listitem><para>Directory location for storing mbox message offsets cache
|
||||
files. This is normally 'mboxcache' under cachedir if set,
|
||||
or else under the configuration directory, but it may be useful to share
|
||||
a directory between different configurations.</para></listitem></varlistentry>
|
||||
a directory between different configurations.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEMINMBS">
|
||||
<term><varname>mboxcacheminmbs</varname></term>
|
||||
<listitem><para>Minimum mbox file size over which we cache the offsets. There is really no sense in caching offsets for small files. The
|
||||
default is 5 MB.</para></listitem></varlistentry>
|
||||
default is 5 MB.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXMAXMSGMBS">
|
||||
<term><varname>mboxmaxmsgmbs</varname></term>
|
||||
<listitem><para>Maximum mbox member message size in megabytes. Size over which we assume that the mbox format is bad or we
|
||||
misinterpreted it, at which point we just stop processing the file.</para></listitem></varlistentry>
|
||||
misinterpreted it, at which point we just stop processing the file.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEDIR">
|
||||
<term><varname>webcachedir</varname></term>
|
||||
<listitem><para>Directory where we store the archived web pages. This is only used by the web history indexing code
|
||||
Default: cachedir/webcache if cachedir is set, else
|
||||
$RECOLL_CONFDIR/webcache</para></listitem></varlistentry>
|
||||
$RECOLL_CONFDIR/webcache
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEMAXMBS">
|
||||
<term><varname>webcachemaxmbs</varname></term>
|
||||
<listitem><para>Maximum size in MB of the Web archive. This is only used by the web history indexing code.
|
||||
Default: 40 MB.
|
||||
Reducing the size will not physically truncate the file.</para></listitem></varlistentry>
|
||||
Reducing the size will not physically truncate the file.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR">
|
||||
<term><varname>webqueuedir</varname></term>
|
||||
<listitem><para>The path to the Web indexing queue. This used to be
|
||||
@ -436,29 +497,42 @@ hard-coded in the old plugin as ~/.recollweb/ToIndex so there would be no
|
||||
need or possibility to change it, but the WebExtensions plugin now downloads
|
||||
the files to the user Downloads directory, and a script moves them to
|
||||
webqueuedir. The script reads this value from the config so it has become
|
||||
possible to change it.</para></listitem></varlistentry>
|
||||
possible to change it.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBDOWNLOADSDIR">
|
||||
<term><varname>webdownloadsdir</varname></term>
|
||||
<listitem><para>The path to browser downloads directory. This is
|
||||
where the new browser add-on extension has to create the files. They are
|
||||
then moved by a script to webqueuedir.</para></listitem></varlistentry>
|
||||
then moved by a script to webqueuedir.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEKEEPINTERVAL">
|
||||
<term><varname>webcachekeepinterval</varname></term>
|
||||
<listitem><para>Page recycle interval By default, only one instance of an URL is kept in the cache. This
|
||||
can be changed by setting this to a value determining at what frequency
|
||||
we keep multiple instances ('day', 'week', 'month',
|
||||
'year'). Note that increasing the interval will not erase existing
|
||||
entries.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLDICDIR">
|
||||
<term><varname>aspellDicDir</varname></term>
|
||||
<listitem><para>Aspell dictionary storage directory location. The
|
||||
aspell dictionary (aspdict.(lang).rws) is normally stored in the
|
||||
directory specified by cachedir if set, or under the configuration
|
||||
directory.</para></listitem></varlistentry>
|
||||
directory.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERSDIR">
|
||||
<term><varname>filtersdir</varname></term>
|
||||
<listitem><para>Directory location for executable input handlers. If
|
||||
RECOLL_FILTERSDIR is set in the environment, we use it instead. Defaults
|
||||
to $prefix/share/recoll/filters. Can be redefined for
|
||||
subdirectories.</para></listitem></varlistentry>
|
||||
subdirectories.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ICONSDIR">
|
||||
<term><varname>iconsdir</varname></term>
|
||||
<listitem><para>Directory location for icons. The only reason to
|
||||
change this would be if you want to change the icons displayed in the
|
||||
result list. Defaults to $prefix/share/recoll/images</para></listitem></varlistentry>
|
||||
result list. Defaults to $prefix/share/recoll/images
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PERFS">
|
||||
<title>Parameters affecting indexing performance and resource usage </title><variablelist>
|
||||
@ -476,20 +550,24 @@ value (from this file) is now 50 MB, and should be ok in many cases.
|
||||
You can set it as low as 10 to conserve memory, but if you are looking
|
||||
for maximum speed, you may want to experiment with values between 20 and
|
||||
200. In my experience, values beyond this are always counterproductive. If
|
||||
you find otherwise, please drop me a note.</para></listitem></varlistentry>
|
||||
you find otherwise, please drop me a note.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXSECONDS">
|
||||
<term><varname>filtermaxseconds</varname></term>
|
||||
<listitem><para>Maximum external filter execution time in
|
||||
seconds. Default 1200 (20mn). Set to 0 for no limit. This
|
||||
is mainly to avoid infinite loops in postscript files
|
||||
(loop.ps)</para></listitem></varlistentry>
|
||||
(loop.ps)
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXMBYTES">
|
||||
<term><varname>filtermaxmbytes</varname></term>
|
||||
<listitem><para>Maximum virtual memory space for filter processes
|
||||
(setrlimit(RLIMIT_AS)), in megabytes. Note that this
|
||||
includes any mapped libs (there is no reliable Linux way to limit the
|
||||
data space only), so we need to be a bit generous here. Anything over
|
||||
2000 will be ignored on 32 bits machines.</para></listitem></varlistentry>
|
||||
(setrlimit(RLIMIT_AS)), in megabytes. Note that this includes any mapped libs (there is no reliable
|
||||
Linux way to limit the data space only), so we need to be a bit generous
|
||||
here. Anything over 2000 will be ignored on 32 bits machines. The
|
||||
previous default value of 2000 would prevent java pdftk to work when
|
||||
executed from Python rclpdf.py.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRQSIZES">
|
||||
<term><varname>thrQSizes</varname></term>
|
||||
<listitem><para>Stage input queues configuration. There are three
|
||||
@ -501,7 +579,8 @@ next stage. In practise, deep queues have not been shown to increase
|
||||
performance. Default: a value of 0 for the first queue tells Recoll to
|
||||
perform autoconfiguration based on the detected number of CPUs (no need
|
||||
for the two other values in this case). Use thrQSizes = -1 -1 -1 to
|
||||
disable multithreading entirely.</para></listitem></varlistentry>
|
||||
disable multithreading entirely.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRTCOUNTS">
|
||||
<term><varname>thrTCounts</varname></term>
|
||||
<listitem><para>Number of threads used for each indexing stage. The
|
||||
@ -511,7 +590,8 @@ in thrQSizes: if the first queue depth is 0, all counts are ignored
|
||||
(autoconfigured); if a value of -1 is used for a queue depth, the
|
||||
corresponding thread count is ignored. It makes no sense to use a value
|
||||
other than 1 for the last stage because updating the Xapian index is
|
||||
necessarily single-threaded (and protected by a mutex).</para></listitem></varlistentry>
|
||||
necessarily single-threaded (and protected by a mutex).
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.MISC">
|
||||
<title>Miscellaneous parameters </title><variablelist>
|
||||
@ -519,7 +599,8 @@ necessarily single-threaded (and protected by a mutex).</para></listitem></varli
|
||||
<term><varname>loglevel</varname></term>
|
||||
<listitem><para>Log file verbosity 1-6. A value of 2 will print
|
||||
only errors and warnings. 3 will print information like document updates,
|
||||
4 is quite verbose and 6 very verbose.</para></listitem></varlistentry>
|
||||
4 is quite verbose and 6 very verbose.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOGFILENAME">
|
||||
<term><varname>logfilename</varname></term>
|
||||
<listitem><para>Log file destination. Use 'stderr' (default) to write to the
|
||||
@ -530,16 +611,25 @@ console. </para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXLOGFILENAME">
|
||||
<term><varname>idxlogfilename</varname></term>
|
||||
<listitem><para>Override logfilename for the indexer. </para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.HELPERLOGFILENAME">
|
||||
<term><varname>helperlogfilename</varname></term>
|
||||
<listitem><para>Destination file for external helpers standard error output. The external program error output is left alone by default,
|
||||
e.g. going to the terminal when the recoll[index] program is executed
|
||||
from the command line. Use /dev/null or a file inside a non-existent
|
||||
directory to completely suppress the output.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGLEVEL">
|
||||
<term><varname>daemloglevel</varname></term>
|
||||
<listitem><para>Override loglevel for the indexer in real time
|
||||
mode. The default is to use the idx... values if set, else
|
||||
the log... values.</para></listitem></varlistentry>
|
||||
the log... values.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGFILENAME">
|
||||
<term><varname>daemlogfilename</varname></term>
|
||||
<listitem><para>Override logfilename for the indexer in real time
|
||||
mode. The default is to use the idx... values if set, else
|
||||
the log... values.</para></listitem></varlistentry>
|
||||
the log... values.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PYLOGLEVEL">
|
||||
<term><varname>pyloglevel</varname></term>
|
||||
<listitem><para>Override loglevel for the python module. </para></listitem></varlistentry>
|
||||
@ -552,7 +642,8 @@ the log... values.</para></listitem></varlistentry>
|
||||
configuration directory inside the directory tree makes it possible to
|
||||
provide automatic query time path translations once the data set has
|
||||
moved (for example, because it has been mounted on another
|
||||
location).</para></listitem></varlistentry>
|
||||
location).
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
|
||||
<term><varname>curidxconfdir</varname></term>
|
||||
<listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
|
||||
@ -564,7 +655,8 @@ example if a dataset originally indexed as '/home/me/mydata/config' has
|
||||
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
||||
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
||||
curidxconfdir (as set in the copied configuration) would be
|
||||
'/media/me/mydata/config'.</para></listitem></varlistentry>
|
||||
'/media/me/mydata/config'.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
|
||||
<term><varname>idxrundir</varname></term>
|
||||
<listitem><para>Indexing process current directory. The input
|
||||
@ -573,17 +665,22 @@ makes sense to have recollindex chdir to some temporary directory. If the
|
||||
value is empty, the current directory is not changed. If the
|
||||
value is (literal) tmp, we use the temporary directory as set by the
|
||||
environment (RECOLL_TMPDIR else TMPDIR else /tmp). If the value is an
|
||||
absolute path to a directory, we go there.</para></listitem></varlistentry>
|
||||
absolute path to a directory, we go there.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CHECKNEEDRETRYINDEXSCRIPT">
|
||||
<term><varname>checkneedretryindexscript</varname></term>
|
||||
<listitem><para>Script used to heuristically check if we need to retry indexing
|
||||
files which previously failed. The default script checks
|
||||
the modified dates on /usr/bin and /usr/local/bin. A relative path will
|
||||
be looked up in the filters dirs, then in the path. Use an absolute path
|
||||
to do otherwise.</para></listitem></varlistentry>
|
||||
to do otherwise.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.RECOLLHELPERPATH">
|
||||
<term><varname>recollhelperpath</varname></term>
|
||||
<listitem><para>Additional places to search for helper executables. This is only used on Windows for now.</para></listitem></varlistentry>
|
||||
<listitem><para>Additional places to search for helper executables. This is used, e.g., on Windows by the Python code, and on Mac OS by the bundled recoll.app
|
||||
(because I could find no reliable way to tell launchd to set the PATH). The example below is for
|
||||
Windows. Use ':' as entry separator for Mac and Ux-like systems, ';' is for Windows only.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXABSMLEN">
|
||||
<term><varname>idxabsmlen</varname></term>
|
||||
<listitem><para>Length of abstracts we store while indexing. Recoll stores an abstract for each indexed file.
|
||||
@ -595,57 +692,72 @@ defines the size of the stored abstract. The default value is 250
|
||||
bytes. The search interface gives you the choice to display this stored
|
||||
text or a synthetic abstract built by extracting text around the search
|
||||
terms. If you always prefer the synthetic abstract, you can reduce this
|
||||
value and save a little space.</para></listitem></varlistentry>
|
||||
value and save a little space.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXMETASTOREDLEN">
|
||||
<term><varname>idxmetastoredlen</varname></term>
|
||||
<listitem><para>Truncation length of stored metadata fields. This
|
||||
does not affect indexing (the whole field is processed anyway), just the
|
||||
amount of data stored in the index for the purpose of displaying fields
|
||||
inside result lists or previews. The default value is 150 bytes which
|
||||
may be too low if you have custom fields.</para></listitem></varlistentry>
|
||||
may be too low if you have custom fields.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXTEXTTRUNCATELEN">
|
||||
<term><varname>idxtexttruncatelen</varname></term>
|
||||
<listitem><para>Truncation length for all document texts. Only index
|
||||
the beginning of documents. This is not recommended except if you are
|
||||
sure that the interesting keywords are at the top and have severe disk
|
||||
space issues.</para></listitem></varlistentry>
|
||||
space issues.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXSYNONYMS">
|
||||
<term><varname>idxsynonyms</varname></term>
|
||||
<listitem><para>Name of the index-time synonyms file. This is used for indexing multiword synonyms as single terms,
|
||||
which in turn is only useful if you want to perform proximity searches
|
||||
with such terms.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLLANGUAGE">
|
||||
<term><varname>aspellLanguage</varname></term>
|
||||
<listitem><para>Language definitions to use when creating the aspell
|
||||
dictionary. The value must match a set of aspell language
|
||||
definition files. You can type "aspell dicts" to see a list The default
|
||||
if this is not set is to use the NLS environment to guess the value. The
|
||||
values are the 2-letter language codes (e.g. 'en', 'fr'...)</para></listitem></varlistentry>
|
||||
values are the 2-letter language codes (e.g. 'en', 'fr'...)
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLADDCREATEPARAM">
|
||||
<term><varname>aspellAddCreateParam</varname></term>
|
||||
<listitem><para>Additional option and parameter to aspell dictionary creation
|
||||
command. Some aspell packages may need an additional option
|
||||
(e.g. on Debian Jessie: --local-data-dir=/usr/lib/aspell). See Debian bug
|
||||
772415.</para></listitem></varlistentry>
|
||||
772415.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLKEEPSTDERR">
|
||||
<term><varname>aspellKeepStderr</varname></term>
|
||||
<listitem><para>Set this to have a look at aspell dictionary creation
|
||||
errors. There are always many, so this is mostly for
|
||||
debugging.</para></listitem></varlistentry>
|
||||
debugging.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOASPELL">
|
||||
<term><varname>noaspell</varname></term>
|
||||
<listitem><para>Disable aspell use. The aspell dictionary generation
|
||||
takes time, and some combinations of aspell version, language, and local
|
||||
terms, result in aspell crashing, so it sometimes makes sense to just
|
||||
disable the thing.</para></listitem></varlistentry>
|
||||
disable the thing.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONAUXINTERVAL">
|
||||
<term><varname>monauxinterval</varname></term>
|
||||
<listitem><para>Auxiliary database update interval. The real time
|
||||
indexer only updates the auxiliary databases (stemdb, aspell)
|
||||
periodically, because it would be too costly to do it for every document
|
||||
change. The default period is one hour.</para></listitem></varlistentry>
|
||||
change. The default period is one hour.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIXINTERVAL">
|
||||
<term><varname>monixinterval</varname></term>
|
||||
<listitem><para>Minimum interval (seconds) between processings of the indexing
|
||||
queue. The real time indexer does not process each event
|
||||
when it comes in, but lets the queue accumulate, to diminish overhead and
|
||||
to aggregate multiple events affecting the same file. Default 30
|
||||
S.</para></listitem></varlistentry>
|
||||
S.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONDELAYPATTERNS">
|
||||
<term><varname>mondelaypatterns</varname></term>
|
||||
<listitem><para>Timing parameters for the real time indexing. Definitions for files which get a longer delay before reindexing
|
||||
@ -654,21 +766,25 @@ reindexed once in a while. A list of wildcardPattern:seconds pairs. The
|
||||
patterns are matched with fnmatch(pattern, path, 0) You can quote entries
|
||||
containing white space with double quotes (quote the whole entry, not the
|
||||
pattern). The default is empty.
|
||||
Example: mondelaypatterns = *.log:20 "*with spaces.*:30"</para></listitem></varlistentry>
|
||||
Example: mondelaypatterns = *.log:20 "*with spaces.*:30"
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXNICEPRIO">
|
||||
<term><varname>idxniceprio</varname></term>
|
||||
<listitem><para>"nice" process priority for the indexing processes. Default: 19
|
||||
(lowest) Appeared with 1.26.5. Prior versions were fixed at 19.</para></listitem></varlistentry>
|
||||
(lowest) Appeared with 1.26.5. Prior versions were fixed at 19.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASS">
|
||||
<term><varname>monioniceclass</varname></term>
|
||||
<listitem><para>ionice class for the indexing process. Despite the misleading name, and on platforms where this is
|
||||
supported, this affects all indexing processes,
|
||||
not only the real time/monitoring ones. The default value is 3 (use
|
||||
lowest "Idle" priority).</para></listitem></varlistentry>
|
||||
lowest "Idle" priority).
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASSDATA">
|
||||
<term><varname>monioniceclassdata</varname></term>
|
||||
<listitem><para>ionice class level parameter if the class supports it. The default is empty, as the default "Idle" class has no
|
||||
levels.</para></listitem></varlistentry>
|
||||
levels.
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.QUERY">
|
||||
<title>Query-time parameters (no impact on the index) </title><variablelist>
|
||||
@ -677,7 +793,8 @@ levels.</para></listitem></varlistentry>
|
||||
<listitem><para>auto-trigger diacritics sensitivity (raw index only). IF the index is not stripped, decide if we automatically trigger
|
||||
diacritics sensitivity if the search term has accented characters (not in
|
||||
unac_except_trans). Else you need to use the query language and the "D"
|
||||
modifier to specify diacritics sensitivity. Default is no.</para></listitem></varlistentry>
|
||||
modifier to specify diacritics sensitivity. Default is no.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.AUTOCASESENS">
|
||||
<term><varname>autocasesens</varname></term>
|
||||
<listitem><para>auto-trigger case sensitivity (raw index only). IF
|
||||
@ -685,40 +802,46 @@ the index is not stripped (see indexStripChars), decide if we
|
||||
automatically trigger character case sensitivity if the search term has
|
||||
upper-case characters in any but the first position. Else you need to use
|
||||
the query language and the "C" modifier to specify character-case
|
||||
sensitivity. Default is yes.</para></listitem></varlistentry>
|
||||
sensitivity. Default is yes.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMEXPAND">
|
||||
<term><varname>maxTermExpand</varname></term>
|
||||
<listitem><para>Maximum query expansion count
|
||||
for a single term (e.g.: when using wildcards). This only
|
||||
affects queries, not indexing. We used to not limit this at all (except
|
||||
for filenames where the limit was too low at 1000), but it is
|
||||
unreasonable with a big index. Default 10000.</para></listitem></varlistentry>
|
||||
unreasonable with a big index. Default 10000.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXXAPIANCLAUSES">
|
||||
<term><varname>maxXapianClauses</varname></term>
|
||||
<listitem><para>Maximum number of clauses
|
||||
we add to a single Xapian query. This only affects queries,
|
||||
not indexing. In some cases, the result of term expansion can be
|
||||
multiplicative, and we want to avoid eating all the memory. Default
|
||||
50000.</para></listitem></varlistentry>
|
||||
50000.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SNIPPETMAXPOSWALK">
|
||||
<term><varname>snippetMaxPosWalk</varname></term>
|
||||
<listitem><para>Maximum number of positions we walk while populating a snippet for
|
||||
the result list. The default of 1,000,000 may be
|
||||
insufficient for very big documents, the consequence would be snippets
|
||||
with possibly meaning-altering missing words.</para></listitem></varlistentry>
|
||||
with possibly meaning-altering missing words.
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PDF">
|
||||
<title>Parameters for the PDF input script </title><variablelist>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFOCR">
|
||||
<term><varname>pdfocr</varname></term>
|
||||
<listitem><para>Attempt OCR of PDF files with no text content. This can be defined in subdirectories. The default is off because
|
||||
OCR is so very slow.</para></listitem></varlistentry>
|
||||
OCR is so very slow.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFATTACH">
|
||||
<term><varname>pdfattach</varname></term>
|
||||
<listitem><para>Enable PDF attachment extraction by executing pdftk (if
|
||||
available). This is
|
||||
normally disabled, because it does slow down PDF indexing a bit even if
|
||||
not one attachment is ever found.</para></listitem></varlistentry>
|
||||
not one attachment is ever found.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETA">
|
||||
<term><varname>pdfextrameta</varname></term>
|
||||
<listitem><para>Extract text from selected XMP metadata tags. This
|
||||
@ -726,7 +849,8 @@ is a space-separated list of qualified XMP tag names. Each element can also
|
||||
include a translation to a Recoll field name, separated by a '|'
|
||||
character. If the second element is absent, the tag name is used as the
|
||||
Recoll field names. You will also need to add specifications to the
|
||||
"fields" file to direct processing of the extracted data.</para></listitem></varlistentry>
|
||||
"fields" file to direct processing of the extracted data.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETAFIX">
|
||||
<term><varname>pdfextrametafix</varname></term>
|
||||
<listitem><para>Define name of XMP field editing script. This
|
||||
@ -735,7 +859,8 @@ values. The script should define a 'MetaFixer' class with a metafix()
|
||||
method which will be called with the qualified tag name and value of each
|
||||
selected field, for editing or erasing. A new instance is created for
|
||||
each document, so that the object can keep state for, e.g. eliminating
|
||||
duplicate values.</para></listitem></varlistentry>
|
||||
duplicate values.
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.OCR">
|
||||
<title>Parameters for OCR processing </title><variablelist>
|
||||
@ -747,17 +872,20 @@ the input file. Modules for tesseract (tesseract) and ABBYY FineReader
|
||||
(abbyy) are present in the standard distribution. For compatibility with
|
||||
the previous version, if this is not defined at all, the default value is
|
||||
"tesseract". Use an explicit empty value if needed. A value of "abbyy
|
||||
tesseract" will try everything.</para></listitem></varlistentry>
|
||||
tesseract" will try everything.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.OCRCACHEDIR">
|
||||
<term><varname>ocrcachedir</varname></term>
|
||||
<listitem><para>Location for caching OCR data. The default if this is empty or undefined is to store the cached
|
||||
OCR data under $RECOLL_CONFDIR/ocrcache.</para></listitem></varlistentry>
|
||||
OCR data under $RECOLL_CONFDIR/ocrcache.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTLANG">
|
||||
<term><varname>tesseractlang</varname></term>
|
||||
<listitem><para>Language to assume for tesseract OCR. Important for improving the OCR accuracy. This can also be set
|
||||
through the contents of a file in
|
||||
the currently processed directory. See the rclocrtesseract.py
|
||||
script. Example values: eng, fra... See the tesseract documentation.</para></listitem></varlistentry>
|
||||
script. Example values: eng, fra... See the tesseract documentation.
|
||||
</para></listitem></varlistentry>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTCMD">
|
||||
<term><varname>tesseractcmd</varname></term>
|
||||
<listitem><para>Path for the tesseract command. Do not quote. This is mostly useful on Windows, or for specifying a non-default
|
||||
@ -776,11 +904,19 @@ script. Typical values: English, French... See the ABBYY documentation.
|
||||
<listitem><para>Path for the abbyy command The ABBY directory is usually not in the path, so you should set this.
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.MISCHANDLERPARAMS">
|
||||
<title>Parameters for specific handlers </title><variablelist>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ORGMODESUBDOCS">
|
||||
<term><varname>orgmodesubdocs</varname></term>
|
||||
<listitem><para>Index org-mode level 1 sections as separate sub-documents This is the default. If set to false, org-mode files will be indexed as plain text
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS">
|
||||
<title>Parameters set for specific locations </title><variablelist>
|
||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MHMBOXQUIRKS">
|
||||
<term><varname>mhmboxquirks</varname></term>
|
||||
<listitem><para>Enable thunderbird/mozilla-seamonkey mbox format quirks Set this for the directory where the email mbox files are
|
||||
stored.</para></listitem></varlistentry>
|
||||
stored.
|
||||
</para></listitem></varlistentry>
|
||||
</variablelist></sect3>
|
||||
</sect2>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -20,8 +20,6 @@
|
||||
# All data is binary. This is important for Python3
|
||||
# All parameter names are converted to and processed as str/unicode
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
@ -29,25 +27,13 @@ import shutil
|
||||
import getopt
|
||||
import traceback
|
||||
|
||||
PY3 = sys.version > '3'
|
||||
|
||||
if PY3:
|
||||
def makebytes(data):
|
||||
if data is None:
|
||||
return b""
|
||||
if isinstance(data, bytes):
|
||||
return data
|
||||
else:
|
||||
return data.encode("UTF-8")
|
||||
else:
|
||||
def makebytes(data):
|
||||
if data is None:
|
||||
return ""
|
||||
if isinstance(data, unicode):
|
||||
return data.encode("UTF-8")
|
||||
else:
|
||||
return data
|
||||
|
||||
def makebytes(data):
|
||||
if data is None:
|
||||
return b""
|
||||
if isinstance(data, bytes):
|
||||
return data
|
||||
else:
|
||||
return data.encode("UTF-8")
|
||||
|
||||
############################################
|
||||
# CmdTalk implements the communication protocol with the master
|
||||
@ -116,10 +102,7 @@ class CmdTalk(object):
|
||||
# followed by data. The param name is returned as str/unicode, the data
|
||||
# as bytes
|
||||
def readparam(self):
|
||||
if PY3:
|
||||
inf = self.infile.buffer
|
||||
else:
|
||||
inf = self.infile
|
||||
inf = self.infile.buffer
|
||||
s = inf.readline()
|
||||
if s == b'':
|
||||
if self.exitfunc:
|
||||
@ -143,7 +126,7 @@ class CmdTalk(object):
|
||||
(paramsize, len(paramdata)), 1, 1)
|
||||
else:
|
||||
paramdata = b''
|
||||
if PY3 and not self.nodecodeinput:
|
||||
if not self.nodecodeinput:
|
||||
try:
|
||||
paramdata = paramdata.decode('utf-8')
|
||||
except Exception as ex:
|
||||
@ -154,18 +137,11 @@ class CmdTalk(object):
|
||||
# (paramname, paramsize, paramdata))
|
||||
return (paramname, paramdata)
|
||||
|
||||
if PY3:
|
||||
def senditem(self, nm, data):
|
||||
data = makebytes(data)
|
||||
l = len(data)
|
||||
self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
|
||||
self.breakwrite(self.outfile.buffer, data)
|
||||
else:
|
||||
def senditem(self, nm, data):
|
||||
data = makebytes(data)
|
||||
l = len(data)
|
||||
self.outfile.write(makebytes("%s: %d\n" % (nm, l)))
|
||||
self.breakwrite(self.outfile, data)
|
||||
def senditem(self, nm, data):
|
||||
data = makebytes(data)
|
||||
l = len(data)
|
||||
self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
|
||||
self.breakwrite(self.outfile.buffer, data)
|
||||
|
||||
# Send answer: document, ipath, possible eof.
|
||||
def answer(self, outfields):
|
||||
@ -242,7 +218,7 @@ def main(proto, processor):
|
||||
params[args[2*i]] = args[2*i+1]
|
||||
res = processor.process(params)
|
||||
|
||||
ioout = sys.stdout.buffer if PY3 else sys.stdout
|
||||
ioout = sys.stdout.buffer
|
||||
|
||||
for nm,value in res.items():
|
||||
#self.log("Senditem: [%s] -> [%s]" % (nm, value))
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
#################################
|
||||
# Copyright (C) 2020 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# 7-Zip file filter for Recoll
|
||||
|
||||
# Thanks to Recoll user Martin Ziegler
|
||||
# This is a modified version of rclzip, with some help from rcltar
|
||||
# This is a modified version of rclzip.py, with some help from rcltar.py
|
||||
#
|
||||
# Normally using py7zr https://github.com/miurahr/py7zr
|
||||
#
|
||||
@ -238,7 +238,7 @@ class AudioTagExtractor(RclBaseHandler):
|
||||
if tagname.startswith('APIC:'):
|
||||
#self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
|
||||
return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
|
||||
elif 'audio/x-flac' in mime:
|
||||
elif 'audio/flac' in mime:
|
||||
if mutf.pictures:
|
||||
return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
|
||||
elif 'audio/mp4' in mime:
|
||||
@ -351,6 +351,11 @@ class AudioTagExtractor(RclBaseHandler):
|
||||
# Metadata tags. The names vary depending on the file type. We
|
||||
# just have a big translation dictionary for all
|
||||
for tag,val in mutf.items():
|
||||
#print(f"TAG {tag} VAL {val}", file=sys.stderr)
|
||||
# Mutagen sends out COMM==eng= with tag COMM::eng We don't know what to do with the
|
||||
# language (or possible other attributes), so get rid of it for now:
|
||||
if tag.find("COMM::") == 0:
|
||||
tag = "COMM"
|
||||
if tag.find('TXXX:') == 0:
|
||||
tag = tag[5:].upper()
|
||||
elif tag.find('TXX:') == 0:
|
||||
@ -18,8 +18,6 @@
|
||||
# Base for extractor classes. With some common generic implementations
|
||||
# for the boilerplate functions.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import rclexecm
|
||||
|
||||
@ -17,9 +17,15 @@
|
||||
# with retry set).
|
||||
#
|
||||
|
||||
# If $HOME does not exist, there is nothing we can do (happens, for example when run as upmpdcli)
|
||||
if test ! -d "$HOME" ; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Bin dirs to be tested:
|
||||
bindirs="/usr/bin /usr/local/bin $HOME/bin /opt/*/bin"
|
||||
|
||||
|
||||
rfiledir=$HOME/.config/Recoll.org
|
||||
rfile=$rfiledir/needidxretrydate
|
||||
nrfile=$rfiledir/tneedidxretrydate
|
||||
|
||||
@ -1,12 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import print_function
|
||||
|
||||
# dia (http://live.gnome.org/Dia) file filter for recoll
|
||||
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
||||
#
|
||||
# add the following to ~/.recoll/mimeconf into the [index] section:
|
||||
# application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8
|
||||
# application/x-dia-diagram = execm rcldia.py;mimetype=text/plain;charset=utf-8
|
||||
# and into the [icons] section:
|
||||
# application/x-dia-diagram = drawing
|
||||
# and finally under [categories]:
|
||||
@ -17,8 +17,6 @@
|
||||
|
||||
# Recoll DJVU extractor
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
"""Extract Html content from an EPUB file (.epub)"""
|
||||
from __future__ import print_function
|
||||
|
||||
rclepub_html_mtype = "text/html"
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract Html content from an EPUB file (.chm), concatenating all sections"""
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
@ -26,8 +26,6 @@
|
||||
# this would be to slow. So this helps implementing a permanent script
|
||||
# to repeatedly execute single commands.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import subprocess
|
||||
import rclexecm
|
||||
from rclbasehandler import RclBaseHandler
|
||||
|
||||
@ -20,8 +20,6 @@
|
||||
# All data is binary. This is important for Python3
|
||||
# All parameter names are converted to and processed as str/unicode
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
@ -30,7 +28,6 @@ import getopt
|
||||
import rclconfig
|
||||
import cmdtalk
|
||||
|
||||
PY3 = (sys.version > '3')
|
||||
_g_mswindows = (sys.platform == "win32")
|
||||
_g_execdir = os.path.dirname(sys.argv[0])
|
||||
|
||||
@ -62,12 +59,11 @@ def makebytes(data):
|
||||
# Possibly decode binary file name for use as subprocess argument,
|
||||
# depending on platform.
|
||||
def subprocfile(fn):
|
||||
# On Windows PY3 the list2cmdline() method in subprocess assumes that
|
||||
# all args are str, and we receive file names as UTF-8. So we need
|
||||
# to convert.
|
||||
# On Unix all list elements get converted to bytes in the C
|
||||
# _posixsubprocess module, nothing to do.
|
||||
if PY3 and _g_mswindows and type(fn) != type(''):
|
||||
# On Windows Python 3 the list2cmdline() method in subprocess assumes that all args are str, and
|
||||
# we receive file names as UTF-8. So we need to convert.
|
||||
# On Unix all list elements get converted to bytes in the C _posixsubprocess module, nothing to
|
||||
# do.
|
||||
if _g_mswindows and type(fn) != type(''):
|
||||
return fn.decode('UTF-8')
|
||||
else:
|
||||
return fn
|
||||
@ -265,19 +261,30 @@ def execPythonScript(icmd):
|
||||
|
||||
# Temp dir helper
|
||||
class SafeTmpDir:
|
||||
def __init__(self, em):
|
||||
def __init__(self, tag, em=None):
|
||||
self.tag = tag
|
||||
self.em = em
|
||||
self.toptmp = ""
|
||||
self.tmpdir = ""
|
||||
self.toptmp = None
|
||||
self.tmpdir = None
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
if self.toptmp:
|
||||
shutil.rmtree(self.tmpdir, True)
|
||||
if self.toptmp:
|
||||
try:
|
||||
if self.tmpdir:
|
||||
shutil.rmtree(self.tmpdir, True)
|
||||
os.rmdir(self.toptmp)
|
||||
except Exception as err:
|
||||
self.em.rclog("delete dir failed for " + self.toptmp)
|
||||
except Exception as err:
|
||||
if self.em:
|
||||
self.em.rclog("delete dir failed for " + self.toptmp)
|
||||
|
||||
def vacuumdir(self):
|
||||
if self.tmpdir:
|
||||
for fn in os.listdir(self.tmpdir):
|
||||
path = os.path.join(self.tmpdir, fn)
|
||||
if os.path.isfile(path):
|
||||
os.unlink(path)
|
||||
return True
|
||||
|
||||
def getpath(self):
|
||||
if not self.tmpdir:
|
||||
envrcltmp = os.getenv('RECOLL_TMPDIR')
|
||||
@ -286,7 +293,7 @@ class SafeTmpDir:
|
||||
else:
|
||||
self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
|
||||
|
||||
self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
|
||||
self.tmpdir = os.path.join(self.toptmp, self.tag)
|
||||
os.makedirs(self.tmpdir)
|
||||
|
||||
return self.tmpdir
|
||||
@ -305,8 +312,7 @@ def main(proto, extract):
|
||||
# Not running the main loop: either acting as single filter (when called
|
||||
# from other filter for example), or debugging
|
||||
def usage():
|
||||
print("Usage: rclexecm.py [-d] [-s] [-i ipath] <filename>",
|
||||
file=sys.stderr)
|
||||
print("Usage: rclexecm.py [-d] [-f] [-h] [-i ipath] [-s] <filename>", file=sys.stderr)
|
||||
print(" rclexecm.py -w <prog>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
@ -361,7 +367,7 @@ def main(proto, extract):
|
||||
|
||||
params = {'filename' : makebytes(path)}
|
||||
|
||||
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
|
||||
# Some filters (e.g. rclaudio.py) need/get a MIME type from the indexer.
|
||||
# We make a half-assed attempt to emulate:
|
||||
mimetype = _g_config.mimeType(path)
|
||||
if not mimetype and not _g_mswindows:
|
||||
@ -373,10 +379,7 @@ def main(proto, extract):
|
||||
print("Open error", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if PY3:
|
||||
ioout = sys.stdout.buffer
|
||||
else:
|
||||
ioout = sys.stdout
|
||||
ioout = sys.stdout.buffer
|
||||
if ipath != b"" or actAsSingle:
|
||||
params['ipath'] = ipath
|
||||
ok, data, ipath, eof = extract.getipath(params)
|
||||
|
||||
@ -16,8 +16,6 @@
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
######################################
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import rclxslt
|
||||
|
||||
@ -18,8 +18,6 @@
|
||||
|
||||
# Base class for simple (one stylesheet) xslt-based handlers
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import rclxslt
|
||||
import gzip
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2020 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import print_function
|
||||
|
||||
# Read an ICS file, break it into "documents" which are events, todos,
|
||||
# or journal entries, and interface with recoll execm
|
||||
@ -6,7 +6,6 @@
|
||||
#
|
||||
# Uses pyexiv2. Also tried Pillow, found it useless for tags.
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
@ -3,8 +3,6 @@
|
||||
# Read a file in GNU info format and output its nodes as subdocs,
|
||||
# interfacing with recoll execm
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
import os
|
||||
@ -141,7 +139,7 @@ class InfoSimpleSplitter:
|
||||
if name == b'File':
|
||||
infofile = value
|
||||
except Exception as err:
|
||||
print("rclinfo: bad line in %s: [%s] %s\n" % \
|
||||
print("rclinfo.py: bad line in %s: [%s] %s\n" % \
|
||||
(infofile, line, err), file = sys.stderr)
|
||||
nodename = prevnodename
|
||||
node += line
|
||||
59
src/filters/rclipynb.py
Executable file
59
src/filters/rclipynb.py
Executable file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2021 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
# Recoll handler for iPython / Jupyter notebook files.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
import rclexecm
|
||||
from rclbasehandler import RclBaseHandler
|
||||
|
||||
class IPYNBextractor(RclBaseHandler):
|
||||
|
||||
def __init__(self, em):
|
||||
super(IPYNBextractor, self).__init__(em)
|
||||
|
||||
def html_text(self, fn):
|
||||
text = open(fn, 'rb').read()
|
||||
data = json.loads(text)
|
||||
mdtext = ""
|
||||
if "worksheets" in data:
|
||||
cells = data["worksheets"][0]["cells"]
|
||||
else:
|
||||
cells = data["cells"]
|
||||
for cell in cells:
|
||||
if cell["cell_type"] == "markdown":
|
||||
mdtext += "\n"
|
||||
for line in cell["source"]:
|
||||
mdtext += "# " + line + "\n"
|
||||
elif cell["cell_type"] == "code":
|
||||
mdtext += "\n\n"
|
||||
key = "source" if "source" in cell else "input"
|
||||
for line in cell[key]:
|
||||
mdtext += line
|
||||
mdtext += "\n"
|
||||
#print("%s"%mdtext, file=sys.stderr)
|
||||
self.outputmimetype = 'text/plain'
|
||||
return mdtext
|
||||
|
||||
|
||||
# Main program: create protocol handler and extractor and run them
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = IPYNBextractor(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
@ -1,8 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Read a .kar midi karaoke file and translate to recoll indexable format
|
||||
# This does not work with Python3 yet because python:midi doesn't
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
@ -46,11 +44,7 @@ htmltemplate = '''
|
||||
|
||||
nlbytes = b'\n'
|
||||
bsbytes = b'\\'
|
||||
PY3 = sys.version > '3'
|
||||
if PY3:
|
||||
nullchar = 0
|
||||
else:
|
||||
nullchar = chr(0)
|
||||
nullchar = 0
|
||||
|
||||
class KarTextExtractor(RclBaseHandler):
|
||||
# Afaik, the only charset encodings with null bytes are variations on
|
||||
@ -13,12 +13,7 @@ epsilon with dasia (in unicode but not iso). Can this be replaced by either epsi
|
||||
with acute accent ?
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
PY3 = sys.version > '3'
|
||||
if not PY3:
|
||||
import string
|
||||
import glob
|
||||
import os
|
||||
import os.path
|
||||
@ -38,10 +33,7 @@ class European8859TextClassifier:
|
||||
# Table to translate from punctuation to spaces
|
||||
self.punct = b'''0123456789<>/*?[].@+-,#_$%&={};.,:!"''' + b"'\n\r"
|
||||
spaces = len(self.punct) * b' '
|
||||
if PY3:
|
||||
self.spacetable = bytes.maketrans(self.punct, spaces)
|
||||
else:
|
||||
self.spacetable = string.maketrans(self.punct, spaces)
|
||||
self.spacetable = bytes.maketrans(self.punct, spaces)
|
||||
|
||||
def readlanguages(self, langzip):
|
||||
"""Extract the stop words lists from the zip file.
|
||||
|
||||
@ -23,24 +23,15 @@
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
from struct import unpack, pack
|
||||
import six
|
||||
|
||||
PY3 = sys.version > '3'
|
||||
|
||||
if PY3:
|
||||
def next_byte_as_int(data):
|
||||
return next(data)
|
||||
def next_byte_as_char(data):
|
||||
return bytes([next(data)])
|
||||
else:
|
||||
def next_byte_as_int(data):
|
||||
return ord(data.next())
|
||||
def next_byte_as_char(data):
|
||||
return next(data)
|
||||
def next_byte_as_int(data):
|
||||
return next(data)
|
||||
def next_byte_as_char(data):
|
||||
return bytes([next(data)])
|
||||
|
||||
##
|
||||
## Constants
|
||||
@ -270,12 +261,8 @@ class NoteEvent(Event):
|
||||
self.velocity)
|
||||
|
||||
def decode_data(self):
|
||||
if PY3:
|
||||
self.pitch = self.data[0]
|
||||
self.velocity = self.data[1]
|
||||
else:
|
||||
self.pitch = ord(self.data[0])
|
||||
self.velocity = ord(self.data[1])
|
||||
self.pitch = self.data[0]
|
||||
self.velocity = self.data[1]
|
||||
|
||||
|
||||
class NoteOnEvent(NoteEvent):
|
||||
@ -309,12 +296,8 @@ class ControlChangeEvent(Event):
|
||||
hex(ord(self.data[1])))
|
||||
|
||||
def decode_data(self):
|
||||
if PY3:
|
||||
self.control = self.data[0]
|
||||
self.value = self.data[1]
|
||||
else:
|
||||
self.control = ord(self.data[0])
|
||||
self.value = ord(self.data[1])
|
||||
self.control = self.data[0]
|
||||
self.value = self.data[1]
|
||||
|
||||
|
||||
class ProgramChangeEvent(Event):
|
||||
@ -328,10 +311,7 @@ class ProgramChangeEvent(Event):
|
||||
hex(ord(self.data[0])))
|
||||
|
||||
def decode_data(self):
|
||||
if PY3:
|
||||
self.value = self.data[0]
|
||||
else:
|
||||
self.value = ord(self.data[0])
|
||||
self.value = self.data[0]
|
||||
|
||||
|
||||
class ChannelAfterTouchEvent(Event):
|
||||
@ -356,12 +336,8 @@ class PitchWheelEvent(Event):
|
||||
hex(ord(self.data[1])))
|
||||
|
||||
def decode_data(self):
|
||||
if PY3:
|
||||
first = self.data[0]
|
||||
second = self.data[1]
|
||||
else:
|
||||
first = ord(self.data[0])
|
||||
second = ord(self.data[1])
|
||||
first = self.data[0]
|
||||
second = self.data[1]
|
||||
self.value = ((second << 7) | first) - 0x2000
|
||||
|
||||
|
||||
@ -461,10 +437,7 @@ class PortEvent(MetaEvent):
|
||||
|
||||
def decode_data(self):
|
||||
assert(len(self.data) == 1)
|
||||
if PY3:
|
||||
self.port = self.data[0]
|
||||
else:
|
||||
self.port = ord(self.data[0])
|
||||
self.port = self.data[0]
|
||||
|
||||
class TrackLoopEvent(MetaEvent):
|
||||
name = 'Track Loop'
|
||||
@ -498,13 +471,7 @@ class SetTempoEvent(MetaEvent):
|
||||
|
||||
def decode_data(self):
|
||||
assert(len(self.data) == 3)
|
||||
if PY3:
|
||||
self.mpqn = (self.data[0] << 16) + (self.data[1] << 8) \
|
||||
+ self.data[2]
|
||||
else:
|
||||
self.mpqn = (ord(self.data[0]) << 16) + (ord(self.data[1]) << 8) \
|
||||
+ ord(self.data[2])
|
||||
|
||||
self.mpqn = (self.data[0] << 16) + (self.data[1] << 8) + self.data[2]
|
||||
self.tempo = float(6e7) / self.mpqn
|
||||
|
||||
|
||||
@ -523,22 +490,13 @@ class TimeSignatureEvent(MetaEvent):
|
||||
(super(TimeSignatureEvent, self).__str__(),
|
||||
self.numerator, self.denominator,
|
||||
self.metronome, self.thirtyseconds)
|
||||
if PY3:
|
||||
def decode_data(self):
|
||||
assert(len(self.data) == 4)
|
||||
self.numerator = self.data[0]
|
||||
# Weird: the denominator is two to the power of the data variable
|
||||
self.denominator = 2 ** self.data[1]
|
||||
self.metronome = self.data[2]
|
||||
self.thirtyseconds = self.data[3]
|
||||
else:
|
||||
def decode_data(self):
|
||||
assert(len(self.data) == 4)
|
||||
self.numerator = ord(self.data[0])
|
||||
# Weird: the denominator is two to the power of the data variable
|
||||
self.denominator = 2 ** ord(self.data[1])
|
||||
self.metronome = ord(self.data[2])
|
||||
self.thirtyseconds = ord(self.data[3])
|
||||
def decode_data(self):
|
||||
assert(len(self.data) == 4)
|
||||
self.numerator = self.data[0]
|
||||
# Weird: the denominator is two to the power of the data variable
|
||||
self.denominator = 2 ** self.data[1]
|
||||
self.metronome = self.data[2]
|
||||
self.thirtyseconds = self.data[3]
|
||||
|
||||
|
||||
class KeySignatureEvent(MetaEvent):
|
||||
|
||||
@ -26,6 +26,8 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import atexit
|
||||
import signal
|
||||
import importlib.util
|
||||
|
||||
import rclconfig
|
||||
@ -33,7 +35,27 @@ import rclocrcache
|
||||
import rclexecm
|
||||
|
||||
def _deb(s):
|
||||
rclexecm.logmsg(s)
|
||||
rclexecm.logmsg("rclocr: %s" % s)
|
||||
|
||||
ocrcleanupmodule = None
|
||||
@atexit.register
|
||||
def finalcleanup():
|
||||
if ocrcleanupmodule:
|
||||
ocrcleanupmodule.cleanocr()
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
sys.exit(1)
|
||||
|
||||
# Not all signals necessary exist on all systems, use catch
|
||||
try: signal.signal(signal.SIGHUP, signal_handler)
|
||||
except: pass
|
||||
try: signal.signal(signal.SIGINT, signal_handler)
|
||||
except: pass
|
||||
try: signal.signal(signal.SIGQUIT, signal_handler)
|
||||
except: pass
|
||||
try: signal.signal(signal.SIGTERM, signal_handler)
|
||||
except: pass
|
||||
|
||||
|
||||
def Usage():
|
||||
_deb("Usage: rclocr.py <imagefilename>")
|
||||
@ -72,7 +94,7 @@ if incache:
|
||||
try:
|
||||
breakwrite(sys.stdout.buffer, data)
|
||||
except Exception as e:
|
||||
_deb("RCLOCR error writing: %s" % e)
|
||||
_deb("error writing: %s" % e)
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
|
||||
@ -112,6 +134,7 @@ if not ok:
|
||||
|
||||
# The OCR module will retrieve its specific parameters from the
|
||||
# configuration
|
||||
ocrcleanupmodule = ocr
|
||||
status, data = ocr.runocr(config, path)
|
||||
|
||||
if not status:
|
||||
|
||||
@ -42,6 +42,9 @@ abbyocrdir = ""
|
||||
def _deb(s):
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
def cleanocr():
|
||||
pass
|
||||
|
||||
# Return true if abbyy appears to be available
|
||||
def ocrpossible(config, path):
|
||||
global abbyyocrcmd
|
||||
|
||||
@ -22,37 +22,63 @@
|
||||
# OCR is extremely slow, caching the results is necessary.
|
||||
#
|
||||
# The cache stores 2 kinds of objects:
|
||||
# - Path files are named from the hash of the image file path and
|
||||
# contain the image data hash, the modification time and size of the
|
||||
# image file at the time the OCR'd data was stored in the cache, and
|
||||
# the image path itself (the last is for purging only).
|
||||
# - Data files are named with the hash of the image data and contain
|
||||
# the zlib-compressed OCR'd data.
|
||||
# - Path files are named from the hash of the image file path and contain the
|
||||
# image data hash, the modification time and size of the image file at the
|
||||
# time the OCR'd data was stored in the cache, and the image path itself (the
|
||||
# last is for purging only).
|
||||
# - Data files are named with the hash of the image data and contain the
|
||||
# zlib-compressed OCR'd data.
|
||||
# - The cache Path and Data files are stored under top subdirectories: objects/
|
||||
# and paths/.
|
||||
#
|
||||
# When retrieving data from the cache:
|
||||
# - We first use the image file size and modification time: if an
|
||||
# entry exists for the imagepath/mtime/size triplet, and is up to
|
||||
# date, the corresponding data is obtained from the data file and
|
||||
# returned.
|
||||
# - Else we then use the image data: if an entry exists for the
|
||||
# computed hashed value of the data, it is returned. This allows
|
||||
# moving files around without needing to run OCR again, but of
|
||||
# course, it is more expensive than the first step
|
||||
# - We first use the image file size and modification time: if an entry exists
|
||||
# for the imagepath/mtime/size triplet, and is up to date, the corresponding
|
||||
# data is obtained from the data file and returned.
|
||||
# - Else we then use the image data: if an entry exists for the computed hashed
|
||||
# value of the data, it is returned. This allows moving files around without
|
||||
# needing to run OCR again, but of course, it is more expensive than the
|
||||
# first step
|
||||
#
|
||||
# If we need to use the second step, as a side effect, a path file is
|
||||
# created or updated so that the data will be found with the first
|
||||
# step next time around.
|
||||
# In both cases, the paths are hashed with sha1, and the first two characters of
|
||||
# the hash are used as a top level directory, the rest as a file name. E.g. for:
|
||||
# pd,pf = self._hashpath(path), the result would be stored under pd/pf
|
||||
#
|
||||
# Purging the cache of obsolete data.
|
||||
# If we need to use the second step, as a side effect, a path file is created or
|
||||
# updated so that the data will be found with the first step next time around.
|
||||
#
|
||||
# - The cache path and data files are stored under 2 different
|
||||
# directories (objects, paths) to make purging easier.
|
||||
# - Purging the paths tree just involves walking it, reading the
|
||||
# files, and checking the existence of the recorded paths.
|
||||
# - There is no easy way to purge the data tree. The only possibility
|
||||
# is to input a list of possible source files (e.g. result of a
|
||||
# find in the image files area), and compute all the hashes. Data
|
||||
# files which do not match one of the hashes are deleted.
|
||||
# When processing embedded documents like email attachments, recoll uses
|
||||
# temporary copies in TMPDIR (which defaults to /tmp) or RECOLL_TMPDIR. Of
|
||||
# course the paths for the temporary files changes when re-processing a given
|
||||
# document. We do not store the Path file for data stored in TMPDIR or
|
||||
# RECOLL_TMPDIR, because doing so would cause an indefinite accumulation of
|
||||
# unusable Path files. This means that access to the OCR data for these
|
||||
# documents always causes the computation of the data hash, and is slower. With
|
||||
# recent Recoll versions which cache the text content in the index, this only
|
||||
# occurs when reindexing (with older versions, this could also occur for
|
||||
# Preview).
|
||||
#
|
||||
# Purging the cache of obsolete data:
|
||||
#
|
||||
# This can be done by running this file as a top level script with a --purge
|
||||
# option (possibly completed by a --purgedata option but see below)
|
||||
# - Purging the paths tree just involves walking it, reading the files, and
|
||||
# checking the existence of the recorded paths. Path files for non-existent
|
||||
# files are deleted.
|
||||
# - Purging the data tree: we make a list of all Data files referenced by at
|
||||
# least one Path file, then walk the data tree, deleting unreferenced
|
||||
# files. This means that Data files from temporary document copies (see
|
||||
# above) will be deleted, which is quite unsatisfying. This would be
|
||||
# difficult to change:
|
||||
# - There is no way to detect the affected files because the Data files store
|
||||
# no origin information
|
||||
# - Even if we wanted to store an indication that the data file comes from a
|
||||
# temporary document, we'd have no way to access the original document
|
||||
# because the full ipath is not available. Changing this would be close to
|
||||
# impossible because internfile...
|
||||
# In consequence the --purgedata option must be explicitely added for a data
|
||||
# purge to be performed. Only set it if re-OCRing all embedded documents is reasonable.
|
||||
|
||||
|
||||
import sys
|
||||
import os
|
||||
@ -61,11 +87,19 @@ import urllib.parse
|
||||
import zlib
|
||||
import glob
|
||||
|
||||
import rclexecm
|
||||
from rclexecm import logmsg as _deb
|
||||
|
||||
def _catslash(p):
|
||||
if p and p[-1] != "/":
|
||||
p += "/"
|
||||
return p
|
||||
|
||||
|
||||
_tmpdir = os.environ["TMPDIR"] if "TMPDIR" in os.environ else "/tmp"
|
||||
_tmpdir = _catslash(_tmpdir)
|
||||
_recoll_tmpdir = os.environ["RECOLL_TMPDIR"] if "RECOLL_TMPDIR" in os.environ else None
|
||||
_recoll_tmpdir = _catslash(_recoll_tmpdir)
|
||||
|
||||
def _deb(s):
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
|
||||
class OCRCache(object):
|
||||
def __init__(self, conf):
|
||||
@ -90,7 +124,7 @@ class OCRCache(object):
|
||||
|
||||
# Compute sha1 of path data contents, as two parts of 2 and 38 chars
|
||||
def _hashdata(self, path):
|
||||
#_deb("Hashing DATA")
|
||||
# _deb("Hashing DATA")
|
||||
m = hashlib.sha1()
|
||||
with open(path, "rb") as f:
|
||||
while True:
|
||||
@ -101,39 +135,39 @@ class OCRCache(object):
|
||||
h = m.hexdigest()
|
||||
return h[0:2], h[2:]
|
||||
|
||||
|
||||
def _readpathfile(self, ppf):
|
||||
'''Read path file and return values. We do not decode the image path
|
||||
as this is only used for purging'''
|
||||
with open(ppf, 'r') as f:
|
||||
line = f.read()
|
||||
dd,df,tm,sz,pth = line.split()
|
||||
dd, df, tm, sz, pth = line.split()
|
||||
tm = int(tm)
|
||||
sz = int(sz)
|
||||
return dd,df,tm,sz,pth
|
||||
|
||||
return dd, df, tm, sz, pth
|
||||
|
||||
# Try to read the stored attributes for a given path: data hash,
|
||||
# modification time and size. If this fails, the path itself is
|
||||
# not cached (but the data still might be, maybe the file was moved)
|
||||
def _cachedpathattrs(self, path):
|
||||
pd,pf = self._hashpath(path)
|
||||
pd, pf = self._hashpath(path)
|
||||
pathfilepath = os.path.join(self.pathdir, pd, pf)
|
||||
if not os.path.exists(pathfilepath):
|
||||
return False, None, None, None, None
|
||||
try:
|
||||
dd, df, tm, sz, pth = self._readpathfile(pathfilepath)
|
||||
return True, dd, df, tm, sz
|
||||
except:
|
||||
except Exception as ex:
|
||||
_deb(f"Error while trying to access pathfile {pathfilepath}: {ex}")
|
||||
return False, None, None, None, None
|
||||
|
||||
# Compute the path hash, and get the mtime and size for given
|
||||
# path, for updating the cache path file
|
||||
def _newpathattrs(self, path):
|
||||
pd,pf = self._hashpath(path)
|
||||
pd, pf = self._hashpath(path)
|
||||
tm = int(os.path.getmtime(path))
|
||||
sz = int(os.path.getsize(path))
|
||||
return pd, pf, tm, sz
|
||||
|
||||
|
||||
# Check if the cache appears up to date for a given path, only
|
||||
# using the modification time and size. Return the data file path
|
||||
# elements if we get a hit.
|
||||
@ -142,31 +176,25 @@ class OCRCache(object):
|
||||
if not ret:
|
||||
return False, None, None
|
||||
pd, pf, ntm, nsz = self._newpathattrs(path)
|
||||
#_deb(" tm %d sz %d" % (ntm, nsz))
|
||||
#_deb("otm %d osz %d" % (otm, osz))
|
||||
# _deb(" tm %d sz %d" % (ntm, nsz))
|
||||
# _deb("otm %d osz %d" % (otm, osz))
|
||||
if otm != ntm or osz != nsz:
|
||||
return False, None, None
|
||||
return True, od, of
|
||||
|
||||
# Check if cache appears up to date for path (no data check),
|
||||
# return True/False
|
||||
def pathincache(self, path):
|
||||
ret, dd, df = self._pathincache(path)
|
||||
return ret
|
||||
|
||||
# Compute the data file name for path. Expensive: we compute the data hash.
|
||||
# Return both the data file path and path elements (for storage in path file)
|
||||
def _datafilename(self, path):
|
||||
d, f = self._hashdata(path)
|
||||
return os.path.join(self.objdir, d, f), d, f
|
||||
|
||||
# Check if the data for path is in cache: expensive, needs to
|
||||
# compute the hash for the path's data contents. Returns True/False
|
||||
def dataincache(self, path):
|
||||
return os.path.exists(self._datafilename(path)[0])
|
||||
|
||||
# Create path file with given elements.
|
||||
def _updatepathfile(self, pd, pf, dd, df, tm, sz, path):
|
||||
global _tmpdir, _recoll_tmpdir
|
||||
if (_tmpdir and path.startswith(_tmpdir)) or \
|
||||
(_recoll_tmpdir and path.startswith(_recoll_tmpdir)):
|
||||
_deb(f"ocrcache: not storing path data for temporary file {path}")
|
||||
return
|
||||
dir = os.path.join(self.pathdir, pd)
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
@ -178,7 +206,7 @@ class OCRCache(object):
|
||||
# Store data for path. Only rewrite an existing data file if told
|
||||
# to do so: this is only useful if we are forcing an OCR re-run.
|
||||
def store(self, path, datatostore, force=False):
|
||||
dd,df = self._hashdata(path)
|
||||
dd, df = self._hashdata(path)
|
||||
pd, pf, tm, sz = self._newpathattrs(path)
|
||||
self._updatepathfile(pd, pf, dd, df, tm, sz, path)
|
||||
dir = os.path.join(self.objdir, dd)
|
||||
@ -186,7 +214,7 @@ class OCRCache(object):
|
||||
os.makedirs(dir)
|
||||
dfile = os.path.join(dir, df)
|
||||
if force or not os.path.exists(dfile):
|
||||
#_deb("Storing data")
|
||||
# _deb("Storing data")
|
||||
cpressed = zlib.compress(datatostore)
|
||||
with open(dfile, "wb") as f:
|
||||
f.write(cpressed)
|
||||
@ -203,11 +231,12 @@ class OCRCache(object):
|
||||
dfn, dd, df = self._datafilename(path)
|
||||
|
||||
if not os.path.exists(dfn):
|
||||
_deb(f"ocrcache: no existing OCR data file for {path}")
|
||||
return False, b""
|
||||
|
||||
if not pincache:
|
||||
# File has moved. create/Update path file for next time
|
||||
_deb("ocrcache::get file %s was moved, updating path data" % path)
|
||||
# File may have moved. Create/Update path file for next time
|
||||
_deb(f"ocrcache::get: data ok but path file for {path} does not exist: creating it")
|
||||
pd, pf, tm, sz = self._newpathattrs(path)
|
||||
self._updatepathfile(pd, pf, dd, df, tm, sz, path)
|
||||
|
||||
@ -223,10 +252,10 @@ class OCRCache(object):
|
||||
ntm = int(os.path.getmtime(origpath))
|
||||
nsz = int(os.path.getsize(origpath))
|
||||
if ntm != otm or nsz != osz:
|
||||
#_deb("Purgepaths otm %d ntm %d osz %d nsz %d"%(otm, ntm, osz, nsz))
|
||||
# _deb("Purgepaths otm %d ntm %d osz %d nsz %d"%(otm, ntm, osz, nsz))
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def purgepaths(self):
|
||||
'''Remove all stale pathfiles: source image does not exist or has
|
||||
been changed. Mostly useful for removed files, modified ones would be
|
||||
@ -251,15 +280,15 @@ class OCRCache(object):
|
||||
def _pgdt_pathcb(self, f):
|
||||
'''Get a pathfile name, read it, and record datafile identifier
|
||||
(concatenate data file subdir and file name)'''
|
||||
#_deb("_pgdt_pathcb: %s" % f)
|
||||
# _deb("_pgdt_pathcb: %s" % f)
|
||||
dd, df, tm, sz, orgpath = self._readpathfile(f)
|
||||
self._pgdt_alldatafns.add(dd+df)
|
||||
|
||||
def _pgdt_datacb(self, datafn):
|
||||
'''Get a datafile name and check that it is referenced by a previously
|
||||
seen pathfile'''
|
||||
p1,fn = os.path.split(datafn)
|
||||
p2,dn = os.path.split(p1)
|
||||
p1, fn = os.path.split(datafn)
|
||||
p2, dn = os.path.split(p1)
|
||||
tst = dn+fn
|
||||
if tst in self._pgdt_alldatafns:
|
||||
_deb("purgedata: ok : %s" % datafn)
|
||||
@ -267,7 +296,7 @@ class OCRCache(object):
|
||||
else:
|
||||
_deb("purgedata: removing : %s" % datafn)
|
||||
os.remove(datafn)
|
||||
|
||||
|
||||
def purgedata(self):
|
||||
'''Remove all data files which do not match any from the input list,
|
||||
based on data contents hash. We make a list of all data files
|
||||
@ -280,50 +309,61 @@ class OCRCache(object):
|
||||
self._pgdt_alldatafns = set()
|
||||
self._walk(self.pathdir, self._pgdt_pathcb)
|
||||
self._walk(self.objdir, self._pgdt_datacb)
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import rclconfig
|
||||
def _Usage():
|
||||
_deb("Usage: rclocrcache.py --purge")
|
||||
import getopt
|
||||
|
||||
def Usage(f=sys.stderr):
|
||||
print("Usage: rclocrcache.py --purge [--purgedata]", file=f)
|
||||
print("Usage: rclocrcache.py --store <imgdatapath> <ocrdatapath>", file=f)
|
||||
print("Usage: rclocrcache.py --get <imgdatapath>", file=f)
|
||||
sys.exit(1)
|
||||
if len(sys.argv) != 2:
|
||||
_Usage()
|
||||
if sys.argv[1] != "--purge":
|
||||
_Usage()
|
||||
|
||||
|
||||
conf = rclconfig.RclConfig()
|
||||
cache = OCRCache(conf)
|
||||
cache.purgepaths()
|
||||
cache.purgedata()
|
||||
sys.exit(0)
|
||||
|
||||
# def trycache(p):
|
||||
# _deb("== CACHE tests for %s"%p)
|
||||
# ret = cache.pathincache(p)
|
||||
# s = "" if ret else " not"
|
||||
# _deb("path for %s%s in cache" % (p, s))
|
||||
# if not ret:
|
||||
# return False
|
||||
# ret = cache.dataincache(p)
|
||||
# s = "" if ret else " not"
|
||||
# _deb("data for %s%s in cache" % (p, s))
|
||||
# return ret
|
||||
# def trystore(p):
|
||||
# _deb("== STORE test for %s" % p)
|
||||
# cache.store(p, b"my OCR'd text is one line\n", force=False)
|
||||
# def tryget(p):
|
||||
# _deb("== GET test for %s" % p)
|
||||
# incache, data = cache.get(p)
|
||||
# if incache:
|
||||
# _deb("Data from cache [%s]" % data)
|
||||
# else:
|
||||
# _deb("Data was not found in cache")
|
||||
# return incache, data
|
||||
# if False:
|
||||
# path = sys.argv[1]
|
||||
# incache, data = tryget(path)
|
||||
# if not incache:
|
||||
# trystore(path)
|
||||
#
|
||||
opts, args = getopt.getopt(sys.argv[1:], "h", ["help", "purge", "purgedata", "store", "get"])
|
||||
purgedata = False
|
||||
purge = False
|
||||
|
||||
for opt, arg in opts:
|
||||
if opt in ['-h', '--help']:
|
||||
Usage(sys.stdout)
|
||||
elif opt in ['--purgedata']:
|
||||
purgedata = True
|
||||
elif opt in ['--purge']:
|
||||
if len(args) != 0:
|
||||
Usage()
|
||||
purge = True
|
||||
elif opt in ['--store']:
|
||||
if len(args) != 2:
|
||||
Usage()
|
||||
imgdatapath = args[0]
|
||||
ocrdatapath = args[1]
|
||||
ocrdata = open(ocrdatapath, "rb").read()
|
||||
cache.store(imgdatapath, ocrdata, force=False)
|
||||
sys.exit(0)
|
||||
elif opt in ['--get']:
|
||||
if len(args) != 1:
|
||||
Usage()
|
||||
imgdatapath = args[0]
|
||||
incache, data = cache.get(imgdatapath)
|
||||
if incache:
|
||||
print(f"OCR data from cache {data}")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("OCR Data was not found in cache", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(f"Unknown option {opt}", file=sys.stderr)
|
||||
Usage()
|
||||
|
||||
# End options. Need purging ?
|
||||
if purge:
|
||||
cache.purgepaths()
|
||||
if purgedata:
|
||||
cache.purgedata()
|
||||
|
||||
Usage()
|
||||
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import atexit
|
||||
import tempfile
|
||||
import subprocess
|
||||
import glob
|
||||
@ -38,39 +37,28 @@ _okexts = ('.tif', '.tiff', '.jpg', '.png', '.jpeg')
|
||||
|
||||
tesseractcmd = None
|
||||
pdftoppmcmd = None
|
||||
|
||||
pdftocairocmd = None
|
||||
|
||||
def _deb(s):
|
||||
rclexecm.logmsg(s)
|
||||
|
||||
|
||||
def vacuumdir(dir):
|
||||
if dir:
|
||||
for fn in os.listdir(dir):
|
||||
path = os.path.join(dir, fn)
|
||||
if os.path.isfile(path):
|
||||
os.unlink(path)
|
||||
return True
|
||||
|
||||
rclexecm.logmsg("rclocrtesseract: %s" % s)
|
||||
|
||||
tmpdir = None
|
||||
|
||||
def _maybemaketmpdir():
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
if not vacuumdir(tmpdir):
|
||||
_deb("openfile: vacuumdir %s failed" % tmpdir)
|
||||
if not tmpdir.vacuumdir():
|
||||
_deb("openfile: vacuumdir %s failed" % tmpdir.getpath())
|
||||
return False
|
||||
else:
|
||||
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
||||
tmpdir = rclexecm.SafeTmpDir("rclocrtesseract")
|
||||
|
||||
|
||||
def finalcleanup():
|
||||
def cleanocr():
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
vacuumdir(tmpdir)
|
||||
os.rmdir(tmpdir)
|
||||
|
||||
|
||||
atexit.register(finalcleanup)
|
||||
del tmpdir
|
||||
tmpdir = None
|
||||
|
||||
|
||||
# Return true if tesseract and the appropriate conversion program for
|
||||
@ -107,12 +95,16 @@ def ocrpossible(config, path):
|
||||
# legacy code used pdftoppm for some reason, and it appears
|
||||
# that the newest builds from conda-forge do not include
|
||||
# pdftocairo. So stay with pdftoppm.
|
||||
global pdftoppmcmd
|
||||
if not pdftoppmcmd:
|
||||
pdftoppmcmd = rclexecm.which("pdftoppm")
|
||||
if not pdftoppmcmd:
|
||||
pdftoppmcmd = rclexecm.which("poppler/pdftoppm")
|
||||
if pdftoppmcmd:
|
||||
global pdftoppmcmd, pdftocairocmd
|
||||
if not pdftoppmcmd and not pdftocairocmd:
|
||||
pdftocairocmd = rclexecm.which("pdftocairo")
|
||||
if not pdftocairocmd:
|
||||
pdftocairocmd = rclexecm.which("poppler/pdftocairo")
|
||||
if not pdftocairocmd:
|
||||
pdftoppmcmd = rclexecm.which("pdftoppm")
|
||||
if not pdftoppmcmd:
|
||||
pdftoppmcmd = rclexecm.which("poppler/pdftoppm")
|
||||
if pdftoppmcmd or pdftocairocmd:
|
||||
return True
|
||||
|
||||
return False
|
||||
@ -169,14 +161,17 @@ def _pdftesseract(config, path):
|
||||
|
||||
tesseractlang = _guesstesseractlang(config, path)
|
||||
|
||||
#tesserrorfile = os.path.join(tmpdir, "tesserrorfile")
|
||||
tmpfile = os.path.join(tmpdir, "ocrXXXXXX")
|
||||
#tesserrorfile = os.path.join(tmpdir.getpath(), "tesserrorfile")
|
||||
tmpfile = os.path.join(tmpdir.getpath(), "ocrXXXXXX")
|
||||
|
||||
# Split pdf pages
|
||||
try:
|
||||
vacuumdir(tmpdir)
|
||||
cmd = [pdftoppmcmd, "-r", "300", path, tmpfile]
|
||||
#_deb("Executing %s" % cmd)
|
||||
tmpdir.vacuumdir()
|
||||
if pdftocairocmd:
|
||||
cmd = [pdftocairocmd, "-tiff", "-tiffcompression", "lzw", "-r", "300", path, tmpfile]
|
||||
else:
|
||||
cmd = [pdftoppmcmd, "-r", "300", path, tmpfile]
|
||||
#_deb("Executing %s" % cmd)
|
||||
subprocess.check_call(cmd)
|
||||
except Exception as e:
|
||||
_deb("%s failed: %s" % (pdftoppmcmd,e))
|
||||
@ -186,8 +181,8 @@ def _pdftesseract(config, path):
|
||||
# system is full. There is no really good way to check for
|
||||
# this. We consider any empty file to signal an error
|
||||
|
||||
ppmfiles = glob.glob(tmpfile + "*")
|
||||
for f in ppmfiles:
|
||||
pages = glob.glob(tmpfile + "*")
|
||||
for f in pages:
|
||||
size = os.path.getsize(f)
|
||||
if os.path.getsize(f) == 0:
|
||||
_deb("pdftoppm created empty files. "
|
||||
@ -203,7 +198,7 @@ def _pdftesseract(config, path):
|
||||
except:
|
||||
pass
|
||||
|
||||
for f in sorted(ppmfiles):
|
||||
for f in sorted(pages):
|
||||
out = b''
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
|
||||
@ -1,19 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import print_function
|
||||
# Copyright (C) 2020-2022 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
# Read an org-mode file, break it into "documents" along the separator lines
|
||||
# and interface with recoll execm
|
||||
'''Read an org-mode file, optionally break it into subdocs" along level 1 headings'''
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
import re
|
||||
|
||||
import rclexecm
|
||||
import rclconfig
|
||||
import conftree
|
||||
|
||||
class OrgModeExtractor:
|
||||
def __init__(self, em):
|
||||
self.file = ""
|
||||
self.contents = []
|
||||
self.em = em
|
||||
|
||||
self.selftext = ""
|
||||
self.docs = []
|
||||
config = rclconfig.RclConfig()
|
||||
self.createsubdocs = conftree.valToBool(config.getConfParam("orgmodesubdocs"))
|
||||
|
||||
def extractone(self, index):
|
||||
if index >= len(self.docs):
|
||||
return(False, "", "", True)
|
||||
@ -23,7 +42,7 @@ class OrgModeExtractor:
|
||||
iseof = rclexecm.RclExecM.noteof
|
||||
if self.currentindex >= len(self.docs) -1:
|
||||
iseof = rclexecm.RclExecM.eofnext
|
||||
self.em.setmimetype("text/plain")
|
||||
self.em.setmimetype("text/x-orgmode-sub")
|
||||
try:
|
||||
self.em.setfield("title", docdata.splitlines()[0])
|
||||
except:
|
||||
@ -33,7 +52,6 @@ class OrgModeExtractor:
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.file = params["filename"]
|
||||
|
||||
try:
|
||||
data = open(self.file, "rb").read()
|
||||
except Exception as e:
|
||||
@ -41,9 +59,15 @@ class OrgModeExtractor:
|
||||
return False
|
||||
|
||||
self.currentindex = -1
|
||||
if not self.createsubdocs:
|
||||
self.selftext = data
|
||||
return True
|
||||
|
||||
res = rb'''^\* '''
|
||||
self.docs = re.compile(res, flags=re.MULTILINE).split(data)
|
||||
# Note that there can be text before the first heading. This goes into the self doc,
|
||||
# because it's not a proper entry.
|
||||
self.selftext = self.docs[0]
|
||||
self.docs = self.docs[1:]
|
||||
#self.em.rclog("openfile: Entry count: %d" % len(self.docs))
|
||||
return True
|
||||
@ -59,6 +83,8 @@ class OrgModeExtractor:
|
||||
return self.extractone(index)
|
||||
|
||||
def getnext(self, params):
|
||||
if not self.createsubdocs:
|
||||
return (True, self.selftext, "", rclexecm.RclExecM.eofnext)
|
||||
|
||||
if self.currentindex == -1:
|
||||
# Return "self" doc
|
||||
@ -68,7 +94,7 @@ class OrgModeExtractor:
|
||||
eof = rclexecm.RclExecM.eofnext
|
||||
else:
|
||||
eof = rclexecm.RclExecM.noteof
|
||||
return (True, "", "", eof)
|
||||
return (True, self.selftext, "", eof)
|
||||
|
||||
if self.currentindex >= len(self.docs):
|
||||
self.em.rclog("getnext: EOF hit")
|
||||
|
||||
@ -33,6 +33,7 @@ import glob
|
||||
import traceback
|
||||
import atexit
|
||||
import signal
|
||||
import time
|
||||
|
||||
import rclexecm
|
||||
import rclconfig
|
||||
@ -66,11 +67,17 @@ _htmlprefix =b'''<html><head>
|
||||
_htmlsuffix = b'''</pre></body></html>'''
|
||||
|
||||
def finalcleanup():
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
vacuumdir(tmpdir)
|
||||
os.rmdir(tmpdir)
|
||||
del tmpdir
|
||||
tmpdir = None
|
||||
|
||||
ocrproc = None
|
||||
def signal_handler(signal, frame):
|
||||
global ocrproc
|
||||
if ocrproc:
|
||||
ocrproc.wait()
|
||||
ocrproc = None
|
||||
sys.exit(1)
|
||||
|
||||
atexit.register(finalcleanup)
|
||||
@ -85,14 +92,6 @@ except: pass
|
||||
try: signal.signal(signal.SIGTERM, signal_handler)
|
||||
except: pass
|
||||
|
||||
def vacuumdir(dir):
|
||||
if dir:
|
||||
for fn in os.listdir(dir):
|
||||
path = os.path.join(dir, fn)
|
||||
if os.path.isfile(path):
|
||||
os.unlink(path)
|
||||
return True
|
||||
|
||||
class PDFExtractor:
|
||||
def __init__(self, em):
|
||||
self.currentindex = 0
|
||||
@ -213,7 +212,7 @@ class PDFExtractor:
|
||||
# no big deal
|
||||
return True
|
||||
try:
|
||||
vacuumdir(tmpdir)
|
||||
tmpdir.vacuumdir()
|
||||
# Note: the java version of pdftk sometimes/often fails
|
||||
# here with writing to stdout:
|
||||
# Error occurred during initialization of VM
|
||||
@ -223,9 +222,9 @@ class PDFExtractor:
|
||||
# output, until we fix the error or preferably find a way
|
||||
# to do it with poppler...
|
||||
subprocess.check_call(
|
||||
[self.pdftk, self.filename, "unpack_files", "output",
|
||||
tmpdir], stdout=sys.stderr)
|
||||
self.attachlist = sorted(os.listdir(tmpdir))
|
||||
[self.pdftk, self.filename, "unpack_files", "output", tmpdir.getpath()],
|
||||
stdout=sys.stderr)
|
||||
self.attachlist = sorted(os.listdir(tmpdir.getpath()))
|
||||
return True
|
||||
except Exception as e:
|
||||
self.em.rclog("extractAttach: failed: %s" % e)
|
||||
@ -399,11 +398,12 @@ class PDFExtractor:
|
||||
def maybemaketmpdir(self):
|
||||
global tmpdir
|
||||
if tmpdir:
|
||||
if not vacuumdir(tmpdir):
|
||||
self.em.rclog("openfile: vacuumdir %s failed" % tmpdir)
|
||||
if not tmpdir.vacuumdir():
|
||||
self.em.rclog("openfile: vacuumdir %s failed" % tmpdir.getpath())
|
||||
return False
|
||||
else:
|
||||
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
||||
tmpdir = rclexecm.SafeTmpDir("rclpdf", self.em)
|
||||
#self.em.rclog("Using temporary directory %s" % tmpdir.getpath())
|
||||
if self.pdftk and re.match("/snap/", self.pdftk):
|
||||
# We know this is Unix (Ubuntu actually). Check that tmpdir
|
||||
# belongs to the user as snap commands can't use /tmp to share
|
||||
@ -415,9 +415,7 @@ class PDFExtractor:
|
||||
if st.st_uid == os.getuid():
|
||||
ok = True
|
||||
if not ok:
|
||||
self.em.rclog(
|
||||
"pdftk is a snap command and needs TMPDIR to be "
|
||||
"a directory you own")
|
||||
self.em.rclog("pdftk is a snap command and needs TMPDIR to be owned by you")
|
||||
|
||||
def _process_annotations(self, html):
|
||||
doc = Poppler.Document.new_from_file(
|
||||
@ -491,9 +489,11 @@ class PDFExtractor:
|
||||
s = self.config.getConfParam("pdfocr")
|
||||
if rclexecm.configparamtrue(s):
|
||||
try:
|
||||
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"),
|
||||
self.filename]
|
||||
data = subprocess.check_output(cmd)
|
||||
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"), self.filename]
|
||||
global ocrproc
|
||||
ocrproc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
||||
data, stderr = ocrproc.communicate()
|
||||
ocrproc = None
|
||||
html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
|
||||
except Exception as e:
|
||||
self.em.rclog("%s failed: %s" % (cmd, e))
|
||||
@ -520,7 +520,9 @@ class PDFExtractor:
|
||||
if not self.attextractdone:
|
||||
if not self.extractAttach():
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
path = os.path.join(tmpdir, ipath)
|
||||
if type(ipath) != type(""):
|
||||
ipath = ipath.decode('utf-8')
|
||||
path = os.path.join(tmpdir.getpath(), ipath)
|
||||
if os.path.isfile(path):
|
||||
f = open(path, "rb")
|
||||
docdata = f.read();
|
||||
|
||||
@ -2,8 +2,6 @@
|
||||
|
||||
# Recoll PPT text extractor
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
import re
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
#################################
|
||||
# Copyright (C) 2019 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
@ -28,12 +28,14 @@ import os
|
||||
import pathlib
|
||||
import email.parser
|
||||
import email.policy
|
||||
import email.message
|
||||
import mailbox
|
||||
import subprocess
|
||||
import rclexecm
|
||||
import rclconfig
|
||||
import conftree
|
||||
import base64
|
||||
import traceback
|
||||
|
||||
_mswindows = (sys.platform == "win32" or sys.platform == "msys")
|
||||
if _mswindows:
|
||||
@ -95,14 +97,26 @@ class EmailBuilder(object):
|
||||
newmsg = email.message.EmailMessage(policy=email.policy.default)
|
||||
headerstr = self.headers.decode("UTF-8", errors='replace')
|
||||
# print("%s" % headerstr)
|
||||
headers = self.parser.parsestr(headerstr, headersonly=True)
|
||||
try:
|
||||
headers = self.parser.parsestr(headerstr, headersonly=True)
|
||||
except:
|
||||
# This sometimes fails, for example with 'day is out of range for month'. Try to go on
|
||||
# without headers
|
||||
headers = email.message.EmailMessage()
|
||||
|
||||
#self.log("EmailBuilder: content-type %s" % headers['content-type'])
|
||||
for nm in ('from', 'subject'):
|
||||
for nm in ('from', 'subject', 'date'):
|
||||
if nm in headers:
|
||||
newmsg.add_header(nm, headers[nm])
|
||||
try:
|
||||
newmsg.add_header(nm, headers[nm])
|
||||
except:
|
||||
pass
|
||||
|
||||
for h in ('to', 'cc'):
|
||||
tolist = headers.get_all(h)
|
||||
try:
|
||||
tolist = headers.get_all(h)
|
||||
except:
|
||||
tolist = []
|
||||
if not tolist:
|
||||
continue
|
||||
alldests = ""
|
||||
@ -113,7 +127,10 @@ class EmailBuilder(object):
|
||||
alldests += sd + ", "
|
||||
if alldests:
|
||||
alldests = alldests.rstrip(", ")
|
||||
newmsg.add_header(h, alldests)
|
||||
try:
|
||||
newmsg.add_header(h, alldests)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Decoding the body: the .pst contains the text value decoded from qp
|
||||
# or base64 (at least that's what libpff sends). Unfortunately, it
|
||||
@ -135,8 +152,13 @@ class EmailBuilder(object):
|
||||
charset = headers.get_content_charset()
|
||||
body = ''
|
||||
if charset:
|
||||
body = self.body.decode(charset, errors='replace')
|
||||
#self.log("DECODE FROM HEADER CHARSET %s SUCCEEDED"% charset)
|
||||
if charset == 'unicode':
|
||||
charset = 'utf-16'
|
||||
try:
|
||||
body = self.body.decode(charset, errors='replace')
|
||||
#self.log("DECODE FROM HEADER CHARSET %s SUCCEEDED"% charset)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
body = self.body.decode('utf-8')
|
||||
@ -377,6 +399,7 @@ class PstExtractor(object):
|
||||
return(False, "", "", rclexecm.RclExecM.eofnow)
|
||||
except Exception as ex:
|
||||
self.em.rclog("getnext: exception: %s" % ex)
|
||||
traceback.print_exc()
|
||||
return(False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
return (True, doc, ipath, rclexecm.RclExecM.noteof)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Rclpython is based on "colorize.py" from:
|
||||
# http://chrisarndt.de/en/software/python/colorize.html
|
||||
@ -51,6 +51,12 @@ _css_classes = {
|
||||
_TEXT: 'text',
|
||||
}
|
||||
|
||||
# python3.8 token.py sends an ENCODING token which we ignore
|
||||
try:
|
||||
token_encoding_type = token.ENCODING
|
||||
except:
|
||||
token_encoding_type = 62
|
||||
|
||||
_HTML_HEADER = """\
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd">
|
||||
@ -146,17 +152,21 @@ class Parser:
|
||||
def __call__(self, toktype, toktext, startpos, endpos, line):
|
||||
""" Token handler.
|
||||
"""
|
||||
srow, scol = startpos
|
||||
erow, ecol = endpos
|
||||
if 0:
|
||||
print("type %s %s text %s start %s %s end %s %s<br>\n" % \
|
||||
(toktype, token.tok_name[toktype], toktext, \
|
||||
srow, scol,erow,ecol))
|
||||
srow, scol = startpos
|
||||
erow, ecol = endpos
|
||||
srow, scol,erow,ecol), file=sys.stderr)
|
||||
|
||||
# calculate new positions
|
||||
oldpos = self.pos
|
||||
newpos = self.lines[srow] + scol
|
||||
self.pos = newpos + len(toktext)
|
||||
|
||||
if toktype == token_encoding_type:
|
||||
return
|
||||
|
||||
# handle newlines
|
||||
if toktype in [token.NEWLINE, tokenize.NL]:
|
||||
self.out.write(b'\n')
|
||||
|
||||
@ -18,8 +18,6 @@
|
||||
# Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import os
|
||||
@ -60,7 +58,7 @@ except Exception as ex:
|
||||
# (https://www.rarlab.com/rar_add.htm). The unrar-free version fails
|
||||
# with the message "Failed the read enough data"
|
||||
#
|
||||
# This is identical to rclzip except I did a search/replace from zip
|
||||
# This is identical to rclzip.py except I did a search/replace from zip
|
||||
# to rar, and changed this comment.
|
||||
class RarExtractor:
|
||||
def __init__(self, em):
|
||||
@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import rclexec1
|
||||
|
||||
@ -2,12 +2,10 @@
|
||||
|
||||
# Tar-file filter for Recoll
|
||||
# Thanks to Recoll user Martin Ziegler
|
||||
# This is a modified version of /usr/share/recoll/filters/rclzip
|
||||
# This is a modified version of /usr/share/recoll/filters/rclzip.py
|
||||
# It works not only for tar-files, but automatically for gzipped and
|
||||
# bzipped tar-files at well.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
|
||||
try:
|
||||
@ -18,8 +18,6 @@
|
||||
# Wrapping a text file. Recoll does it internally in most cases, but
|
||||
# this is for use by another filter.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
from rclbasehandler import RclBaseHandler
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
"""Index text lines as document (execm handler sample). This exists
|
||||
to demonstrate the execm interface and is not meant to be useful or
|
||||
efficient"""
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
# No shebang: this is only used on Windows. We use a shell script on Linux
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
|
||||
@ -2,8 +2,6 @@
|
||||
|
||||
# WAR web archive filter for recoll. War file are gzipped tar files
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import rclexecm
|
||||
import tarfile
|
||||
|
||||
@ -16,7 +16,6 @@
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
# Code to extract XMP tags using libexempi and python-xmp
|
||||
from __future__ import print_function
|
||||
|
||||
can_xmp = True
|
||||
try:
|
||||
|
||||
@ -18,11 +18,11 @@
|
||||
|
||||
# Zip file extractor for Recoll
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
import fnmatch
|
||||
import datetime
|
||||
|
||||
import rclexecm
|
||||
from zipfile import ZipFile
|
||||
|
||||
@ -49,7 +49,7 @@ if not hasrclconfig:
|
||||
# and stores it in the catalog as an unicode object. Else it uses the
|
||||
# binary string, which it decodes as CP437 (zip standard).
|
||||
#
|
||||
# When reading the file, the input file name is used by rclzip
|
||||
# When reading the file, the input file name is used by rclzip.py
|
||||
# directly as an index into the catalog.
|
||||
#
|
||||
# When we send the file name data to the indexer, we have to serialize
|
||||
@ -119,6 +119,8 @@ class ZipExtractor:
|
||||
# element).
|
||||
filename = posixpath.basename(ipath)
|
||||
self.em.setfield("filename", filename)
|
||||
dt = datetime.datetime(*info.date_time)
|
||||
self.em.setfield("modificationdate", str(int(dt.timestamp())))
|
||||
except:
|
||||
pass
|
||||
ok = True
|
||||
@ -151,14 +153,11 @@ class ZipExtractor:
|
||||
if skipped is not None:
|
||||
self.skiplist += conftree.stringToStrings(skipped)
|
||||
try:
|
||||
if rclexecm.PY3:
|
||||
# Note: py3 ZipFile wants an str file name, which
|
||||
# is wrong: file names are binary. But it accepts an
|
||||
# open file, and open() has no such restriction
|
||||
self.f = open(filename, 'rb')
|
||||
self.zip = ZipFile(self.f)
|
||||
else:
|
||||
self.zip = ZipFile(filename)
|
||||
# Note: py3 ZipFile wants an str file name, which
|
||||
# is wrong: file names are binary. But it accepts an
|
||||
# open file, and open() has no such restriction
|
||||
self.f = open(filename, 'rb')
|
||||
self.zip = ZipFile(self.f)
|
||||
return True
|
||||
except Exception as err:
|
||||
self.em.rclog("openfile: failed: [%s]" % err)
|
||||
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (C) 2017 J.F.Dockes
|
||||
#!/usr/bin/python3
|
||||
# Copyright (C) 2017-2022 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
@ -31,6 +31,7 @@ but it can also be run by hand.
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import getopt
|
||||
try:
|
||||
from hashlib import md5 as md5
|
||||
except:
|
||||
@ -94,28 +95,44 @@ def list_all_files(dir):
|
||||
return mfiles,cfiles
|
||||
|
||||
#######################
|
||||
def msg(s):
|
||||
print(f"{s}", file=sys.stderr)
|
||||
def usage():
|
||||
print("Usage: recoll-we-move-files.py [<downloaddir>]", file=sys.stderr)
|
||||
msg("Usage: recoll-we-move-files.py [-c <recollconfigdir>]")
|
||||
msg(" The script needs the recoll configuration directory. This can be set either through")
|
||||
msg(" the RECOLL_CONFDIR environment variable or the '-c' command line option (which takes")
|
||||
msg(" precedence). If none is set, the default configuration directory will be used.")
|
||||
sys.exit(1)
|
||||
|
||||
config = rclconfig.RclConfig()
|
||||
|
||||
# Source dir is parameter, else from config else default Downloads directory
|
||||
opts, args = getopt.getopt(sys.argv[1:], "c:")
|
||||
if not len(args) == 0:
|
||||
usage()
|
||||
|
||||
configdir = None
|
||||
for opt,val in opts:
|
||||
#logdeb(f"opt {opt} val {val}")
|
||||
if opt == "-c":
|
||||
configdir = val
|
||||
if not os.path.isdir(val):
|
||||
msg(f"{val} is not a directory")
|
||||
usage()
|
||||
else:
|
||||
usage()
|
||||
|
||||
config = rclconfig.RclConfig(argcnf=configdir)
|
||||
|
||||
# Get the directory where the browser extension creates the page files. Our user can set it as a
|
||||
# subdirectory of the default Downloads directory, for tidyness
|
||||
downloadsdir = config.getConfParam("webdownloadsdir")
|
||||
if not downloadsdir:
|
||||
downloadsdir = "~/Downloads"
|
||||
downloadsdir = os.path.expanduser(downloadsdir)
|
||||
if not os.path.isdir(downloadsdir):
|
||||
msg(f"Downloads directory {downloadsdir} does not exist")
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
mydir = sys.argv[1]
|
||||
elif len(sys.argv) == 1:
|
||||
mydir = downloadsdir
|
||||
else:
|
||||
usage()
|
||||
if not os.path.isdir(mydir):
|
||||
usage()
|
||||
|
||||
# Get target webqueue recoll directory from recoll configuration
|
||||
# Get the target recoll webqueue directory, into which we are going to move the downloaded files.
|
||||
webqueuedir = config.getConfParam("webqueuedir")
|
||||
if not webqueuedir:
|
||||
if _mswindows:
|
||||
@ -125,10 +142,11 @@ if not webqueuedir:
|
||||
webqueuedir = os.path.expanduser(webqueuedir)
|
||||
os.makedirs(webqueuedir, exist_ok = True)
|
||||
|
||||
# logdeb("webqueuedir is %s" % webqueuedir)
|
||||
|
||||
#logdeb(f"recoll confdir [{configdir}] downloadsdir [{downloadsdir}] webqueuedir [{webqueuedir}]")
|
||||
|
||||
# Get the lists of all files created by the browser addon
|
||||
mfiles, cfiles = list_all_files(mydir)
|
||||
mfiles, cfiles = list_all_files(downloadsdir)
|
||||
|
||||
# Only keep the last version
|
||||
mfiles = delete_previous_instances(mfiles, downloadsdir)
|
||||
@ -143,7 +161,7 @@ cfiles = delete_previous_instances(cfiles, downloadsdir)
|
||||
# The old plugin created the data first, so we move data then meta
|
||||
for hash in cfiles.keys():
|
||||
if hash in mfiles.keys():
|
||||
newname = "firefox-recoll-web-"+hash
|
||||
newname = "firefox-recoll-web-" + hash
|
||||
shutil.move(os.path.join(downloadsdir, cfiles[hash]),
|
||||
os.path.join(webqueuedir, newname))
|
||||
shutil.move(os.path.join(downloadsdir, mfiles[hash]),
|
||||
|
||||
@ -23,8 +23,6 @@
|
||||
# the minimum version supported.
|
||||
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import xml.sax
|
||||
|
||||
|
||||
@ -61,8 +61,7 @@ public:
|
||||
EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m)
|
||||
{
|
||||
m = new Internal(_m);
|
||||
LOGDEB("EXEDocFetcher::EXEDocFetcher: fetch is " <<
|
||||
stringsToString(m->sfetch) << "\n");
|
||||
LOGDEB("EXEDocFetcher::EXEDocFetcher: fetch is " << stringsToString(m->sfetch) << "\n");
|
||||
}
|
||||
|
||||
bool EXEDocFetcher::fetch(RclConfig*, const Rcl::Doc& idoc, RawDoc& out)
|
||||
@ -77,8 +76,7 @@ bool EXEDocFetcher::makesig(RclConfig*, const Rcl::Doc& idoc, string& sig)
|
||||
}
|
||||
|
||||
// Lookup bckid in the config and create an appropriate fetcher.
|
||||
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config,
|
||||
const string& bckid)
|
||||
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config, const string& bckid)
|
||||
{
|
||||
// The config we only read once, not gonna change.
|
||||
static ConfSimple *bconf;
|
||||
|
||||
@ -40,6 +40,8 @@ public:
|
||||
class Internal;
|
||||
EXEDocFetcher(const Internal&);
|
||||
virtual ~EXEDocFetcher() {}
|
||||
EXEDocFetcher(const EXEDocFetcher&) = delete;
|
||||
EXEDocFetcher& operator=(const EXEDocFetcher&) = delete;
|
||||
|
||||
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
||||
/** Calls stat to retrieve file signature data */
|
||||
@ -51,7 +53,6 @@ private:
|
||||
};
|
||||
|
||||
// Lookup bckid in the config and create an appropriate fetcher.
|
||||
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config,
|
||||
const std::string& bckid);
|
||||
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config, const std::string& bckid);
|
||||
|
||||
#endif /* _EXEFETCHER_H_INCLUDED_ */
|
||||
|
||||
@ -72,18 +72,18 @@ public:
|
||||
* @param idoc the data gathered from the index for this doc (udi/ipath)
|
||||
* @param sig output.
|
||||
*/
|
||||
virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc,
|
||||
std::string& sig) = 0;
|
||||
virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig) = 0;
|
||||
enum Reason{FetchOk, FetchNotExist, FetchNoPerm, FetchOther};
|
||||
virtual Reason testAccess(RclConfig*, const Rcl::Doc&) {
|
||||
return FetchOther;
|
||||
}
|
||||
DocFetcher() {}
|
||||
virtual ~DocFetcher() {}
|
||||
DocFetcher(const DocFetcher&) = delete;
|
||||
DocFetcher& operator=(const DocFetcher&) = delete;
|
||||
};
|
||||
|
||||
/** Return an appropriate fetcher object given the backend string
|
||||
* identifier inside idoc*/
|
||||
std::unique_ptr<DocFetcher> docFetcherMake(RclConfig *config,
|
||||
const Rcl::Doc& idoc);
|
||||
/** Return an appropriate fetcher object given the backend string identifier inside idoc*/
|
||||
std::unique_ptr<DocFetcher> docFetcherMake(RclConfig *config, const Rcl::Doc& idoc);
|
||||
|
||||
#endif /* _FETCHER_H_INCLUDED_ */
|
||||
|
||||
@ -23,14 +23,18 @@
|
||||
/**
|
||||
* The file-system fetcher:
|
||||
*/
|
||||
class FSDocFetcher : public DocFetcher{
|
||||
class FSDocFetcher : public DocFetcher {
|
||||
public:
|
||||
/** FSDocFetcher::fetch always returns a file name */
|
||||
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
||||
|
||||
/** Calls stat to retrieve file signature data */
|
||||
virtual bool makesig(RclConfig* cnf,const Rcl::Doc& idoc, std::string& sig);
|
||||
virtual DocFetcher::Reason testAccess(RclConfig* cnf, const Rcl::Doc& idoc);
|
||||
FSDocFetcher() {}
|
||||
virtual ~FSDocFetcher() {}
|
||||
FSDocFetcher(const FSDocFetcher&) = delete;
|
||||
FSDocFetcher& operator=(const FSDocFetcher&) = delete;
|
||||
};
|
||||
|
||||
extern void fsmakesig(const struct PathStat *stp, std::string& out);
|
||||
|
||||
@ -195,6 +195,7 @@ bool FsIndexer::index(int flags)
|
||||
m_walker.setMaxDepth(2);
|
||||
}
|
||||
|
||||
bool walkok(true);
|
||||
for (const auto& topdir : m_tdl) {
|
||||
LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
|
||||
getDbDir() << "\n");
|
||||
@ -229,29 +230,46 @@ bool FsIndexer::index(int flags)
|
||||
if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
|
||||
LOGERR("FsIndexer::index: error while indexing " << topdir <<
|
||||
": " << m_walker.getReason() << "\n");
|
||||
return false;
|
||||
// DO NOT return: we need to flush the queues before the Db can be closed !
|
||||
walkok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ)
|
||||
m_iwqueue.waitIdle();
|
||||
if (m_haveSplitQ)
|
||||
m_dwqueue.waitIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
|
||||
shutdownQueues(walkok);
|
||||
if (m_missing) {
|
||||
string missing;
|
||||
m_missing->getMissingDescription(missing);
|
||||
if (!missing.empty()) {
|
||||
LOGINFO("FsIndexer::index missing helper program(s):\n" <<
|
||||
missing << "\n");
|
||||
LOGINFO("FsIndexer::index missing helper program(s):\n" << missing << "\n");
|
||||
}
|
||||
m_config->storeMissingHelperDesc(missing);
|
||||
}
|
||||
LOGINFO("fsindexer index time: " << chron.millis() << " mS\n");
|
||||
return true;
|
||||
LOGINFO("fsindexer: status: " << walkok << " index time: " << chron.millis() << " mS\n");
|
||||
return walkok;
|
||||
}
|
||||
|
||||
void FsIndexer::shutdownQueues(bool ok)
|
||||
{
|
||||
#ifdef IDX_THREADS
|
||||
if (!ok) {
|
||||
// Error or more probably interrupt. Discard everything for fast shutdown
|
||||
if (m_haveInternQ) {
|
||||
m_iwqueue.closeShop();
|
||||
}
|
||||
if (m_haveSplitQ) {
|
||||
m_dwqueue.closeShop();
|
||||
}
|
||||
m_db->closeQueue();
|
||||
}
|
||||
if (m_haveInternQ) {
|
||||
m_iwqueue.waitIdle();
|
||||
}
|
||||
if (m_haveSplitQ) {
|
||||
m_dwqueue.waitIdle();
|
||||
}
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
}
|
||||
|
||||
static bool matchesSkipped(
|
||||
@ -359,7 +377,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
FsTreeWalker walker;
|
||||
walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||
|
||||
for (list<string>::iterator it = files.begin(); it != files.end(); ) {
|
||||
for (auto it = files.begin(); it != files.end(); ) {
|
||||
LOGDEB2("FsIndexer::indexFiles: [" << *it << "]\n");
|
||||
|
||||
m_config->setKeyDir(path_getfather(*it));
|
||||
@ -403,22 +421,14 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ)
|
||||
m_iwqueue.waitIdle();
|
||||
if (m_haveSplitQ)
|
||||
m_dwqueue.waitIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
shutdownQueues(ret);
|
||||
|
||||
// Purge possible orphan documents
|
||||
if (ret == true) {
|
||||
LOGDEB("Indexfiles: purging orphans\n");
|
||||
const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
|
||||
for (vector<string>::const_iterator it = purgecandidates.begin();
|
||||
it != purgecandidates.end(); it++) {
|
||||
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
||||
m_db->purgeOrphans(*it);
|
||||
for (const auto& udi : m_purgeCandidates.getCandidates()) {
|
||||
LOGDEB("Indexfiles: purging orphans for " << udi << "\n");
|
||||
m_db->purgeOrphans(udi);
|
||||
}
|
||||
#ifdef IDX_THREADS
|
||||
m_db->waitUpdIdle();
|
||||
@ -458,13 +468,7 @@ bool FsIndexer::purgeFiles(list<string>& files)
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
#ifdef IDX_THREADS
|
||||
if (m_haveInternQ)
|
||||
m_iwqueue.waitIdle();
|
||||
if (m_haveSplitQ)
|
||||
m_dwqueue.waitIdle();
|
||||
m_db->waitUpdIdle();
|
||||
#endif // IDX_THREADS
|
||||
shutdownQueues(ret);
|
||||
LOGDEB("FsIndexer::purgeFiles: done\n");
|
||||
return ret;
|
||||
}
|
||||
@ -488,10 +492,9 @@ void FsIndexer::localfieldsfromconf()
|
||||
ConfSimple attrs;
|
||||
m_config->valueSplitAttributes(sfields, value, attrs);
|
||||
vector<string> nmlst = attrs.getNames(cstr_null);
|
||||
for (vector<string>::const_iterator it = nmlst.begin();
|
||||
it != nmlst.end(); it++) {
|
||||
string nm = m_config->fieldCanon(*it);
|
||||
attrs.get(*it, m_localfields[nm]);
|
||||
for (const auto& anm : nmlst) {
|
||||
string nm = m_config->fieldCanon(anm);
|
||||
attrs.get(anm, m_localfields[nm]);
|
||||
LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
|
||||
m_localfields[nm] << "]\n");
|
||||
}
|
||||
@ -499,12 +502,11 @@ void FsIndexer::localfieldsfromconf()
|
||||
|
||||
void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
|
||||
{
|
||||
for (map<string, string>::const_iterator it = fields.begin();
|
||||
it != fields.end(); it++) {
|
||||
for (const auto& field : fields) {
|
||||
// Being chosen by the user, localfields override values from
|
||||
// the filter. The key is already canonic (see
|
||||
// localfieldsfromconf())
|
||||
doc.meta[it->first] = it->second;
|
||||
doc.meta[field.first] = field.second;
|
||||
}
|
||||
}
|
||||
|
||||
@ -840,9 +842,7 @@ FsTreeWalker::Status FsIndexer::processonefile(
|
||||
}
|
||||
}
|
||||
#if defined(HAVE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
|
||||
// See framagit issue 26. If this appears to be a good idea
|
||||
// after all (not sure), we'll need a command line switch to
|
||||
// control it. For now it's compile-time only.
|
||||
// See framagit issue 26. This is off by default and controlled by a command line switch.
|
||||
if (m_cleancache) {
|
||||
int fd = open(fn.c_str(), O_RDONLY);
|
||||
if (fd >= 0) {
|
||||
|
||||
@ -27,7 +27,6 @@
|
||||
#endif // IDX_THREADS
|
||||
|
||||
class FIMissingStore;
|
||||
struct PathStat;
|
||||
|
||||
class DbUpdTask;
|
||||
class InternfileTask;
|
||||
@ -55,6 +54,8 @@ public:
|
||||
*/
|
||||
FsIndexer(RclConfig *cnf, Rcl::Db *db);
|
||||
virtual ~FsIndexer();
|
||||
FsIndexer(const FsIndexer&) = delete;
|
||||
FsIndexer& operator=(const FsIndexer&) = delete;
|
||||
|
||||
/**
|
||||
* Top level file system tree index method for updating a given database.
|
||||
@ -157,6 +158,7 @@ private:
|
||||
processonefile(RclConfig *config, const string &fn,
|
||||
const struct PathStat *,
|
||||
const map<string,string>& localfields);
|
||||
void shutdownQueues(bool);
|
||||
};
|
||||
|
||||
#endif /* _fsindexer_h_included_ */
|
||||
|
||||
@ -57,6 +57,8 @@ class DbIxStatusUpdater {
|
||||
public:
|
||||
DbIxStatusUpdater(const RclConfig *config, bool nox11monitor);
|
||||
virtual ~DbIxStatusUpdater(){}
|
||||
DbIxStatusUpdater(const DbIxStatusUpdater&) = delete;
|
||||
DbIxStatusUpdater& operator=(const DbIxStatusUpdater&) = delete;
|
||||
|
||||
enum Incr {IncrNone, IncrDocsDone = 0x1, IncrFilesDone = 0x2, IncrFileErrors = 0x4};
|
||||
// Change phase/fn and update
|
||||
|
||||
@ -59,12 +59,10 @@ bool runWebFilesMoverScript(RclConfig *config)
|
||||
static string downloadsdir;
|
||||
if (downloadsdir.empty()) {
|
||||
if (!config->getConfParam("webdownloadsdir", downloadsdir)) {
|
||||
downloadsdir = path_tildexpand("~/Downloads");
|
||||
downloadsdir = "~/Downloads";
|
||||
}
|
||||
downloadsdir = path_tildexpand(downloadsdir);
|
||||
}
|
||||
vector<string> cmdvec;
|
||||
config->pythonCmd("recoll-we-move-files.py", cmdvec);
|
||||
|
||||
/* Arrange to not actually run the script if the directory did not change */
|
||||
static time_t dirmtime;
|
||||
time_t ndirmtime = 0;
|
||||
@ -72,17 +70,17 @@ bool runWebFilesMoverScript(RclConfig *config)
|
||||
if (path_fileprops(downloadsdir.c_str(), &st) == 0) {
|
||||
ndirmtime = st.pst_mtime;
|
||||
}
|
||||
/* If stat fails, presumably Downloads does not exist or is not
|
||||
accessible, dirmtime and mdirmtime stay at 0, and we never
|
||||
execute the script, which is the right thing. */
|
||||
// If stat fails, presumably Downloads does not exist or is not accessible, dirmtime and
|
||||
// mdirmtime stay at 0, and we never execute the script, which is the right thing.
|
||||
if (dirmtime != ndirmtime) {
|
||||
/* The script is going to change the directory, so updating
|
||||
dirmtime before it runs means that we are going to execute
|
||||
it one time too many (it will run without doing anything),
|
||||
but we can't set the mtime to after the run in case files
|
||||
are created during the run. */
|
||||
// The script is going to change the directory, so updating dirmtime before it runs means
|
||||
// that we are going to execute it one time too many (it will run without doing anything),
|
||||
// but we can't set the mtime to after the run in case files are created during the run.
|
||||
dirmtime = ndirmtime;
|
||||
vector<string> cmdvec;
|
||||
config->pythonCmd("recoll-we-move-files.py", cmdvec);
|
||||
ExecCmd cmd;
|
||||
cmd.putenv("RECOLL_CONFDIR", config->getConfDir());
|
||||
int status = cmd.doexec1(cmdvec);
|
||||
return status == 0;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user