Compare commits
No commits in common. "87256b6a69292c0e51d2831902c6c2c933ba5714" and "2a92200012ad750b2492089174631e1364074a47" have entirely different histories.
87256b6a69
...
2a92200012
5
.gitignore
vendored
5
.gitignore
vendored
@ -24,9 +24,8 @@ build-*-Debug
|
|||||||
build-*-Release
|
build-*-Release
|
||||||
libtool
|
libtool
|
||||||
ptrans
|
ptrans
|
||||||
**/Makefile.in
|
|
||||||
src/Makefile
|
src/Makefile
|
||||||
src/rclgrep/Makefile
|
src/Makefile.in
|
||||||
src/TAGS
|
src/TAGS
|
||||||
src/aclocal.m4
|
src/aclocal.m4
|
||||||
src/autom4te.cache
|
src/autom4te.cache
|
||||||
@ -78,6 +77,7 @@ src/recollq
|
|||||||
src/sampleconf/rclmon.sh
|
src/sampleconf/rclmon.sh
|
||||||
src/sampleconf/recoll.conf
|
src/sampleconf/recoll.conf
|
||||||
src/testmains/Makefile
|
src/testmains/Makefile
|
||||||
|
src/testmains/Makefile.in
|
||||||
src/xadump
|
src/xadump
|
||||||
stamp-h1
|
stamp-h1
|
||||||
tests/casediac/aspdict.en.rws
|
tests/casediac/aspdict.en.rws
|
||||||
@ -103,7 +103,6 @@ tests/indexedmimetypes/missing
|
|||||||
tests/indexedmimetypes/recoll.conf
|
tests/indexedmimetypes/recoll.conf
|
||||||
tests/indexedmimetypes/xapiandb
|
tests/indexedmimetypes/xapiandb
|
||||||
tests/xattr/mimeview
|
tests/xattr/mimeview
|
||||||
unac/autom4te.cache
|
|
||||||
website/faqsandhowtos/*.html
|
website/faqsandhowtos/*.html
|
||||||
website/idxthreads/forkingRecoll.html
|
website/idxthreads/forkingRecoll.html
|
||||||
website/idxthreads/xapDocCopyCrash.html
|
website/idxthreads/xapDocCopyCrash.html
|
||||||
|
|||||||
@ -14,8 +14,8 @@ share/pixmaps/recoll.png
|
|||||||
%%DATADIR%%/filters/hotrecoll.py
|
%%DATADIR%%/filters/hotrecoll.py
|
||||||
%%DATADIR%%/filters/rclabw
|
%%DATADIR%%/filters/rclabw
|
||||||
%%DATADIR%%/filters/rclaptosidman
|
%%DATADIR%%/filters/rclaptosidman
|
||||||
%%DATADIR%%/filters/rclaudio.py
|
%%DATADIR%%/filters/rclaudio
|
||||||
%%DATADIR%%/filters/rclchm.py
|
%%DATADIR%%/filters/rclchm
|
||||||
%%DATADIR%%/filters/rcldjvu
|
%%DATADIR%%/filters/rcldjvu
|
||||||
%%DATADIR%%/filters/rcldoc
|
%%DATADIR%%/filters/rcldoc
|
||||||
%%DATADIR%%/filters/rcldvi
|
%%DATADIR%%/filters/rcldvi
|
||||||
@ -23,11 +23,11 @@ share/pixmaps/recoll.png
|
|||||||
%%DATADIR%%/filters/rclfb2
|
%%DATADIR%%/filters/rclfb2
|
||||||
%%DATADIR%%/filters/rclflac
|
%%DATADIR%%/filters/rclflac
|
||||||
%%DATADIR%%/filters/rclgaim
|
%%DATADIR%%/filters/rclgaim
|
||||||
%%DATADIR%%/filters/rclics.py
|
%%DATADIR%%/filters/rclics
|
||||||
%%DATADIR%%/filters/rclid3
|
%%DATADIR%%/filters/rclid3
|
||||||
%%DATADIR%%/filters/rclimg
|
%%DATADIR%%/filters/rclimg
|
||||||
%%DATADIR%%/filters/rclinfo.py
|
%%DATADIR%%/filters/rclinfo
|
||||||
%%DATADIR%%/filters/rclkar.py
|
%%DATADIR%%/filters/rclkar
|
||||||
%%DATADIR%%/filters/rclkwd
|
%%DATADIR%%/filters/rclkwd
|
||||||
%%DATADIR%%/filters/rcllatinclass.py
|
%%DATADIR%%/filters/rcllatinclass.py
|
||||||
%%DATADIR%%/filters/rcllatinstops.zip
|
%%DATADIR%%/filters/rcllatinstops.zip
|
||||||
@ -41,7 +41,7 @@ share/pixmaps/recoll.png
|
|||||||
%%DATADIR%%/filters/rclps
|
%%DATADIR%%/filters/rclps
|
||||||
%%DATADIR%%/filters/rclpurple
|
%%DATADIR%%/filters/rclpurple
|
||||||
%%DATADIR%%/filters/rclpython
|
%%DATADIR%%/filters/rclpython
|
||||||
%%DATADIR%%/filters/rclrar.py
|
%%DATADIR%%/filters/rclrar
|
||||||
%%DATADIR%%/filters/rclrtf
|
%%DATADIR%%/filters/rclrtf
|
||||||
%%DATADIR%%/filters/rclscribus
|
%%DATADIR%%/filters/rclscribus
|
||||||
%%DATADIR%%/filters/rclshowinfo
|
%%DATADIR%%/filters/rclshowinfo
|
||||||
@ -51,11 +51,11 @@ share/pixmaps/recoll.png
|
|||||||
%%DATADIR%%/filters/rcltex
|
%%DATADIR%%/filters/rcltex
|
||||||
%%DATADIR%%/filters/rcltext
|
%%DATADIR%%/filters/rcltext
|
||||||
%%DATADIR%%/filters/rcluncomp
|
%%DATADIR%%/filters/rcluncomp
|
||||||
%%DATADIR%%/filters/rclwar.py
|
%%DATADIR%%/filters/rclwar
|
||||||
%%DATADIR%%/filters/rclwpd
|
%%DATADIR%%/filters/rclwpd
|
||||||
%%DATADIR%%/filters/rclxls
|
%%DATADIR%%/filters/rclxls
|
||||||
%%DATADIR%%/filters/rclzip.py
|
%%DATADIR%%/filters/rclzip
|
||||||
%%DATADIR%%/filters/rcl7z.py
|
%%DATADIR%%/filters/rcl7z
|
||||||
%%DATADIR%%/filters/xdg-open
|
%%DATADIR%%/filters/xdg-open
|
||||||
%%DATADIR%%/images/aptosid-book.png
|
%%DATADIR%%/images/aptosid-book.png
|
||||||
%%DATADIR%%/images/aptosid-manual.png
|
%%DATADIR%%/images/aptosid-manual.png
|
||||||
|
|||||||
@ -5,30 +5,30 @@
|
|||||||
# sudo apt-get install pkg-kde-tools cdbs
|
# sudo apt-get install pkg-kde-tools cdbs
|
||||||
|
|
||||||
# Active series:
|
# Active series:
|
||||||
|
# 16.04LTS xenial 2021-04
|
||||||
# 18.04LTS bionic 2023-04
|
# 18.04LTS bionic 2023-04
|
||||||
# 20.04LTS focal 2025-04
|
# 20.04LTS focal 2025-04
|
||||||
# 22.04LTS jammy 2027-04
|
# 20.10 groovy 2021-07
|
||||||
SERIES="bionic focal jammy kinetic"
|
# 21.04 hirsute 2022-01
|
||||||
|
|
||||||
PPA_KEYID=7808CE96D38B9201
|
PPA_KEYID=7808CE96D38B9201
|
||||||
|
|
||||||
RCLVERS=1.33.1
|
RCLVERS=1.31.0
|
||||||
SCOPEVERS=1.20.2.4
|
SCOPEVERS=1.20.2.4
|
||||||
GSSPVERS=1.1.1
|
GSSPVERS=1.1.0
|
||||||
PPAVERS=1
|
PPAVERS=1
|
||||||
|
|
||||||
#
|
#
|
||||||
#Y=/y
|
RCLSRC=/y/home/dockes/projets/fulltext/recoll/src
|
||||||
Y=
|
SCOPESRC=/y/home/dockes/projets/fulltext/unity-scope-recoll
|
||||||
RCLSRC=${Y}/home/dockes/projets/fulltext/recoll/src
|
GSSPSRC=/y/home/dockes/projets/fulltext/gssp-recoll
|
||||||
SCOPESRC=${Y}/home/dockes/projets/fulltext/unity-scope-recoll
|
RCLDOWNLOAD=/y/home/dockes/projets/lesbonscomptes/recoll
|
||||||
GSSPSRC=${Y}/home/dockes/projets/fulltext/gssp-recoll
|
|
||||||
RCLDOWNLOAD=${Y}/home/dockes/projets/lesbonscomptes/recoll
|
|
||||||
|
|
||||||
PPANAME=recoll15-ppa
|
|
||||||
PPANAME=recollexp1-ppa
|
|
||||||
#PPANAME=recoll-webengine-ppa
|
|
||||||
|
|
||||||
|
case $RCLVERS in
|
||||||
|
[23]*) PPANAME=recollexp-ppa;;
|
||||||
|
*) PPANAME=recoll15-ppa;;
|
||||||
|
esac
|
||||||
|
#PPANAME=recollexp-ppa
|
||||||
echo "PPA: $PPANAME. Type CR if Ok, else ^C"
|
echo "PPA: $PPANAME. Type CR if Ok, else ^C"
|
||||||
read rep
|
read rep
|
||||||
|
|
||||||
@ -49,8 +49,8 @@ check_recoll_orig()
|
|||||||
|
|
||||||
####### QT
|
####### QT
|
||||||
debdir=debian
|
debdir=debian
|
||||||
series=$SERIES
|
series="bionic focal groovy hirsute"
|
||||||
#series=bionic
|
series=
|
||||||
|
|
||||||
if test "X$series" != X ; then
|
if test "X$series" != X ; then
|
||||||
check_recoll_orig
|
check_recoll_orig
|
||||||
@ -77,7 +77,7 @@ for series in $series ; do
|
|||||||
-e s/PPAVERS/${PPAVERS}/g \
|
-e s/PPAVERS/${PPAVERS}/g \
|
||||||
< ${debdir}/changelog > recoll-${RCLVERS}/debian/changelog
|
< ${debdir}/changelog > recoll-${RCLVERS}/debian/changelog
|
||||||
|
|
||||||
(cd recoll-${RCLVERS};debuild -d -k$PPA_KEYID -S -sa) || break
|
(cd recoll-${RCLVERS};debuild -k$PPA_KEYID -S -sa) || break
|
||||||
|
|
||||||
dput $PPANAME recoll_${RCLVERS}-1~ppa${PPAVERS}~${series}1_source.changes
|
dput $PPANAME recoll_${RCLVERS}-1~ppa${PPAVERS}~${series}1_source.changes
|
||||||
done
|
done
|
||||||
@ -85,8 +85,8 @@ done
|
|||||||
|
|
||||||
|
|
||||||
### KIO.
|
### KIO.
|
||||||
series=$SERIES
|
series="bionic focal groovy hirsute"
|
||||||
series=
|
#series=
|
||||||
|
|
||||||
debdir=debiankio
|
debdir=debiankio
|
||||||
topdir=kio-recoll-${RCLVERS}
|
topdir=kio-recoll-${RCLVERS}
|
||||||
@ -125,7 +125,7 @@ for svers in $series ; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
### GSSP
|
### GSSP
|
||||||
series=$SERIES
|
series="bionic focal groovy hirsute"
|
||||||
series=
|
series=
|
||||||
|
|
||||||
debdir=debiangssp
|
debdir=debiangssp
|
||||||
|
|||||||
@ -1,169 +1,3 @@
|
|||||||
recoll (1.33.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Small updates to the build files to accomodate the new rclgrep utility.
|
|
||||||
* New textunknownasplain configuration variable to index all files with no known association
|
|
||||||
but identified as text/xxx by the "file" or "xdg-mime" command.
|
|
||||||
* Make sure that a single double-quoted word is not stem-expanded (act as if it was
|
|
||||||
capitalized). Expanding a quoted term is unexpected.
|
|
||||||
* Apply stemming to terms containing a single dash. These were not expanded before.
|
|
||||||
* Linux real time: fix monitoring under topdirs members which are symbolic links.
|
|
||||||
* Fix the GUI simple search which was broken in 1.33.0 when switching filters on/off
|
|
||||||
* Exclude Tamil characters from unac processing (experimental for now).
|
|
||||||
* Windows GUI directory side filters: the computed paths were wrong on Windows.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 25 Sep 2022 19:19:00 +0200
|
|
||||||
|
|
||||||
recoll (1.33.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Query processing: simplify queries a bit more before sending to Xapian, allows using OP_FILTER
|
|
||||||
for path filtering. -> Medium version bump.
|
|
||||||
* GUI: allow specifying a fixed geometry for the results list viewport by setting
|
|
||||||
RECOLL_RESULTS_GEOMETRY=widthxheight . For people with fixed-width result formats CSS.
|
|
||||||
* recollq: add option to extract a result document into a file.
|
|
||||||
* Replace application/x-flac with audio/flac for FLAC audio files.
|
|
||||||
* Fix web queue processing for non-default configuration directories.
|
|
||||||
* Fix encoding issue in pdf attachment extraction.
|
|
||||||
* GUI: result list: fix issue with webengine builds not displaying Icons. Paging still not working
|
|
||||||
right with webengine (QTBUG-105842). Main builds revert/remain to webkit.
|
|
||||||
* Misc. small adjustments.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 30 Aug 2022 10:59:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.8-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Add environment variable RECOLL_RESULTS_GEOMETRY for forcing a fixed geometry to the results
|
|
||||||
list viewport.
|
|
||||||
* Fix result list Qt Webengine icon display issue.
|
|
||||||
* Improve result list paging behaviour. Only fully works with Qt Webkit.
|
|
||||||
* recollq: add option to extract result document to a file.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 21 Aug 2022 07:59:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.7-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* kio_recoll: updated to work with newer kf5 versions (it seems that 5.96 broke it at least on
|
|
||||||
arch linux).
|
|
||||||
* rclaudio: fix extracting comment fields from flac files.
|
|
||||||
* Python code preview: get rid of spurious encoding value output.
|
|
||||||
* Fix glitch in Qt GUI when between list and table display.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 07 Aug 2022 17:42:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* GUI: switch to using Qt-Webengine instead of Qt-Webkit because of CSS support issues in Webkit.
|
|
||||||
* GUI: result list paragraph format. Preserve unquoted % characters if there is no matching
|
|
||||||
translation. USer manual: document the need to quote % as %% anyway.
|
|
||||||
* GUI: result list devel/debug. Add parameter to dump the HTML sent to the engine.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 05 Jul 2022 09:56:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Try to improve font size consistency by using px sizes everywhere.
|
|
||||||
* Fix Increase/Decrease font size menu options.
|
|
||||||
* Allow displaying line numbers in snippets.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Wed, 29 Jun 2022 09:36:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Fix issues when opening a file with %F (parent of subdocument): avoid creating a temporary file
|
|
||||||
when this can be avoided.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 21 Jun 2022 20:51:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Restore result list popup menu function when using webengine. This was broken in 1.32.1.
|
|
||||||
* Show progress dialog when result list abstracts generation takes too long
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 14 Jun 2022 07:51:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* GUI side panel filters: make sure the filter is applied even if set before
|
|
||||||
the query.
|
|
||||||
* GUI side panel directory filter: compute the tree from the index, not the
|
|
||||||
file system, to allow filtering data from external indexes. Update the tree
|
|
||||||
when an indexing completes.
|
|
||||||
* Implement whole UI scaling factor (fonts only, no icons).
|
|
||||||
* Orgmode: add orgmodesubdocs configuration variable to decide if we index
|
|
||||||
whole files or create subdocuments for nodes. Also index text before the
|
|
||||||
first heading.
|
|
||||||
* GUI: fix path translation for importing an index from Windows.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Fri, 20 May 2022 10:55:00 +0200
|
|
||||||
|
|
||||||
recoll (1.32.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* GUI: added a collapsible side pane for quick filtering on dates or
|
|
||||||
directories.
|
|
||||||
* Showing duplicates now uses a spreadsheet like the result table for
|
|
||||||
easy access to the duplicate files.
|
|
||||||
* Fixed the temporary copy open dialog (again!).
|
|
||||||
* The default mimeview and mimeconf configuration files were
|
|
||||||
separated into generic and system-specific parts to avoid update
|
|
||||||
errors (no consequences for users).
|
|
||||||
* Renamed all Python input handler with a .py extension. This is
|
|
||||||
relied on Windows rather than listing an explicit python
|
|
||||||
interpreter.
|
|
||||||
* Added %l specification to viewer definitions for opening at a
|
|
||||||
specific line.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Fri, 11 Mar 2022 18:17:00 +0100
|
|
||||||
|
|
||||||
recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Almost no change: translation files update.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
|
|
||||||
|
|
||||||
recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Fix ennoying bug in tesseract OCR temporary files cleanup.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 04 Dec 2021 10:05:00 +0100
|
|
||||||
|
|
||||||
recoll (1.31.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Linux/Mac: Bug in threads management could result in index corruption or crash
|
|
||||||
after signal interrupt.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Thu, 25 Nov 2021 16:30:00 +0100
|
|
||||||
|
|
||||||
recoll (1.31.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Highligthing for group (phrase/near): eliminate some spurious matches.
|
|
||||||
* Fix page number string detection which could sometimes prevent correct
|
|
||||||
highlighting in snippets.
|
|
||||||
* Avoid query completer consuming excessive resources on unstripped
|
|
||||||
indexes.
|
|
||||||
* Fix some cases where different instances of the indexer could use
|
|
||||||
different pid/lock files.
|
|
||||||
* Fix processing on some unicode dash and apos character variations.
|
|
||||||
* PST: fix indexing in marginal cases. Extract message dates.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 13 Nov 2021 16:30:00 +0100
|
|
||||||
|
|
||||||
recoll (1.31.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Add support for .ipynb iPython/Jupyter notebook format.
|
|
||||||
* Implement Alt+/ shortcut to search the menu entries and possibly execute the result.
|
|
||||||
* Fix configuration GUI button margins on Mac OS.
|
|
||||||
* Add *.pyc __pycache__ .pytest_cache .tox and .direnv to the default skipped names list.
|
|
||||||
* Add /opt/homebrew/bin to the helper search path when built under Mac Homebrew.
|
|
||||||
* Linux: let recollindex adjust its OOM killer "badness" on startup.
|
|
||||||
* simple search: add Ctrl+H as keyboard shortcut for "show history".
|
|
||||||
* Renamed the fragment buttons configuration file from fragbuts.xml to fragment-buttons.xml.
|
|
||||||
* Zip archives: set the modification date attribute for members.
|
|
||||||
* ost/pst filter: fix not fetching the message dates.
|
|
||||||
* Anchored searches: remove unwarranted slack increase. The anchor term should behave like a
|
|
||||||
normal one for slack computations.
|
|
||||||
* Fix djvu issues on Windows.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 11 Oct 2021 10:51:00 +0200
|
|
||||||
|
|
||||||
recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||||
|
|
||||||
* GUI: modified shortcuts were not read from the preferences !
|
* GUI: modified shortcuts were not read from the preferences !
|
||||||
|
|||||||
@ -3,19 +3,18 @@ Section: x11
|
|||||||
Priority: optional
|
Priority: optional
|
||||||
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
|
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
|
||||||
Build-Depends: bison,
|
Build-Depends: bison,
|
||||||
debhelper (>= 10),
|
debhelper (>= 9),
|
||||||
dh-python,
|
dh-python,
|
||||||
dpkg-dev (>= 1.16.1~),
|
dpkg-dev (>= 1.16.1~),
|
||||||
libaspell-dev,
|
libaspell-dev,
|
||||||
libchm-dev,
|
libchm-dev,
|
||||||
# qtwebengine5-dev,
|
|
||||||
libqt5webkit5-dev,
|
libqt5webkit5-dev,
|
||||||
libx11-dev,
|
libx11-dev,
|
||||||
libxapian-dev (>= 1.2.0),
|
libxapian-dev (>= 1.2.0),
|
||||||
libxslt1-dev,
|
libxslt1-dev,
|
||||||
libz-dev,
|
libz-dev,
|
||||||
pkg-config,
|
pkg-config,
|
||||||
python2-dev (>= 2.6.6-3~),
|
python-all-dev (>= 2.6.6-3~),
|
||||||
python-setuptools,
|
python-setuptools,
|
||||||
python3-all-dev,
|
python3-all-dev,
|
||||||
python3-setuptools,
|
python3-setuptools,
|
||||||
|
|||||||
@ -1,115 +0,0 @@
|
|||||||
Source: recoll
|
|
||||||
Section: x11
|
|
||||||
Priority: optional
|
|
||||||
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
|
|
||||||
Build-Depends: bison,
|
|
||||||
debhelper (>= 9),
|
|
||||||
dh-python,
|
|
||||||
dpkg-dev (>= 1.16.1~),
|
|
||||||
libaspell-dev,
|
|
||||||
libchm-dev,
|
|
||||||
libqt5webkit5-dev,
|
|
||||||
# qtwebengine5-dev,
|
|
||||||
libx11-dev,
|
|
||||||
libxapian-dev (>= 1.2.0),
|
|
||||||
libxslt1-dev,
|
|
||||||
libz-dev,
|
|
||||||
pkg-config,
|
|
||||||
python-all-dev (>= 2.6.6-3~),
|
|
||||||
python-setuptools,
|
|
||||||
python3-all-dev,
|
|
||||||
python3-setuptools,
|
|
||||||
qtbase5-dev
|
|
||||||
X-Python3-Version: >= 3.4
|
|
||||||
Vcs-Git: https://salsa.debian.org/debian/recoll.git
|
|
||||||
Vcs-Browser: https://salsa.debian.org/debian/recoll
|
|
||||||
Homepage: https://www.lesbonscomptes.com/recoll
|
|
||||||
Standards-Version: 4.2.1
|
|
||||||
|
|
||||||
Package: recoll
|
|
||||||
Architecture: all
|
|
||||||
Depends: recollcmd, recollgui, ${misc:Depends}
|
|
||||||
Description: Personal full text search package
|
|
||||||
This package is a personal full text search package is based on a very strong
|
|
||||||
backend (Xapian), for which it provides an easy to use and feature-rich
|
|
||||||
interface.
|
|
||||||
.
|
|
||||||
Features:
|
|
||||||
* Qt-based GUI.
|
|
||||||
* Supports the following document types (and their compressed versions)
|
|
||||||
- Natively: text, html, OpenOffice files, excel, ppt, maildir and
|
|
||||||
mailbox (Mozilla and IceDove mail) with attachments, pidgin log files
|
|
||||||
- With external helpers: pdf (pdftotext), postscript (ghostscript), msword
|
|
||||||
(antiword), rtf (unrtf). And others...
|
|
||||||
* Powerful query facilities, with boolean searches, phrases, filter on file
|
|
||||||
types and directory tree.
|
|
||||||
* Support for multiple charsets, Internal processing and storage uses Unicode
|
|
||||||
UTF-8.
|
|
||||||
* Stemming performed at query time (can switch stemming language after
|
|
||||||
indexing).
|
|
||||||
* Easy installation. No database daemon, web server or exotic language
|
|
||||||
necessary.
|
|
||||||
* The indexer can run either continuously or in batch.
|
|
||||||
|
|
||||||
Package: recollcmd
|
|
||||||
Architecture: any
|
|
||||||
Breaks: recoll (<< 1.23.7-2)
|
|
||||||
Replaces: recoll (<< 1.23.7-2)
|
|
||||||
Depends: python3, ${misc:Depends}, ${shlibs:Depends}
|
|
||||||
Recommends: antiword,
|
|
||||||
aspell,
|
|
||||||
groff,
|
|
||||||
libimage-exiftool-perl,
|
|
||||||
poppler-utils,
|
|
||||||
python3-lxml,
|
|
||||||
python3-recoll,
|
|
||||||
python3-six,
|
|
||||||
python3-mutagen,
|
|
||||||
python3-rarfile,
|
|
||||||
unrtf,
|
|
||||||
unzip,
|
|
||||||
xdg-utils
|
|
||||||
Suggests: ghostscript,
|
|
||||||
libinotifytools0,
|
|
||||||
untex,
|
|
||||||
wv
|
|
||||||
Description: Command line programs for recoll
|
|
||||||
This package supports indexing and command line querying.
|
|
||||||
|
|
||||||
Package: recollgui
|
|
||||||
Architecture: any
|
|
||||||
Breaks: recoll (<< 1.23.7-2)
|
|
||||||
Replaces: recoll (<< 1.23.7-2)
|
|
||||||
Depends: recollcmd (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends}
|
|
||||||
Description: GUI program and elements for recoll
|
|
||||||
Main recoll GUI for configuring, controlling and querying recoll indexes.
|
|
||||||
|
|
||||||
Package: python-recoll
|
|
||||||
Architecture: any
|
|
||||||
Section: python
|
|
||||||
Depends: python2,
|
|
||||||
recollcmd (= ${binary:Version}),
|
|
||||||
${misc:Depends},
|
|
||||||
${python:Depends},
|
|
||||||
${shlibs:Depends}
|
|
||||||
Description: Python extension for recoll
|
|
||||||
Personal full text search package which is based on a very strong backend
|
|
||||||
(Xapian), for which it provides an easy to use and feature-rich interface.
|
|
||||||
.
|
|
||||||
This package provides Python extension module for recoll which can be use to
|
|
||||||
extend recoll such as an Ubuntu Unity Lens.
|
|
||||||
|
|
||||||
Package: python3-recoll
|
|
||||||
Architecture: any
|
|
||||||
Section: python
|
|
||||||
Depends: python3,
|
|
||||||
recollcmd (= ${binary:Version}),
|
|
||||||
${misc:Depends},
|
|
||||||
${python3:Depends},
|
|
||||||
${shlibs:Depends}
|
|
||||||
Description: Python extension for recoll (Python3)
|
|
||||||
Personal full text search package which is based on a very strong backend
|
|
||||||
(Xapian), for which it provides an easy to use and feature-rich interface.
|
|
||||||
.
|
|
||||||
This package provides Python3 extension module for recoll which can be use to
|
|
||||||
extend recoll such as an Ubuntu Unity Lens.
|
|
||||||
@ -1,5 +1,5 @@
|
|||||||
diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp
|
diff --git a/internfile/mh_mbox.cpp b/srcinternfile/mh_mbox.cpp
|
||||||
index c77d42c8..ccd6a613 100644
|
index 2a0918cf..92ad7e23 100644
|
||||||
--- a/internfile/mh_mbox.cpp
|
--- a/internfile/mh_mbox.cpp
|
||||||
+++ b/internfile/mh_mbox.cpp
|
+++ b/internfile/mh_mbox.cpp
|
||||||
@@ -27,6 +27,7 @@
|
@@ -27,6 +27,7 @@
|
||||||
@ -19,25 +19,19 @@ index c77d42c8..ccd6a613 100644
|
|||||||
int msgnum{0}; // Current message number in folder. Starts at 1
|
int msgnum{0}; // Current message number in folder. Starts at 1
|
||||||
int64_t lineno{0}; // debug
|
int64_t lineno{0}; // debug
|
||||||
int64_t fsize{0};
|
int64_t fsize{0};
|
||||||
@@ -322,13 +323,6 @@ void MimeHandlerMbox::clear_impl()
|
@@ -321,7 +322,6 @@ void MimeHandlerMbox::clear_impl()
|
||||||
|
{
|
||||||
m->fn.erase();
|
m->fn.erase();
|
||||||
m->ipath.erase();
|
m->ipath.erase();
|
||||||
|
- m->instream = ifstream();
|
||||||
- // We used to use m->instream = ifstream() which fails with some compilers, as the copy
|
|
||||||
- // constructor is marked deleted in standard c++ (works with many compilers though).
|
|
||||||
- if (m->instream.is_open()) {
|
|
||||||
- m->instream.close();
|
|
||||||
- }
|
|
||||||
- m->instream.clear();
|
|
||||||
-
|
|
||||||
m->msgnum = 0;
|
m->msgnum = 0;
|
||||||
m->lineno = 0;
|
m->lineno = 0;
|
||||||
m->fsize = 0;
|
m->fsize = 0;
|
||||||
@@ -346,8 +340,9 @@ bool MimeHandlerMbox::set_document_file_impl(const string&, const string &fn)
|
@@ -339,8 +339,9 @@ bool MimeHandlerMbox::set_document_file_impl(const string&, const string &fn)
|
||||||
LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n");
|
LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n");
|
||||||
clear_impl();
|
clear_impl();
|
||||||
m->fn = fn;
|
m->fn = fn;
|
||||||
- m->instream.open(fn.c_str(), std::ifstream::binary);
|
- m->instream = ifstream(fn.c_str(), std::ifstream::binary);
|
||||||
- if (!m->instream.good()) {
|
- if (!m->instream.good()) {
|
||||||
+ m->instream = std::unique_ptr<ifstream>(
|
+ m->instream = std::unique_ptr<ifstream>(
|
||||||
+ new ifstream(fn.c_str(), std::ifstream::binary));
|
+ new ifstream(fn.c_str(), std::ifstream::binary));
|
||||||
@ -45,7 +39,7 @@ index c77d42c8..ccd6a613 100644
|
|||||||
LOGSYSERR("MimeHandlerMail::set_document_file", "ifstream", fn);
|
LOGSYSERR("MimeHandlerMail::set_document_file", "ifstream", fn);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -396,13 +391,13 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
@@ -389,13 +390,13 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||||
fsize)) < 0) {
|
fsize)) < 0) {
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -63,7 +57,7 @@ index c77d42c8..ccd6a613 100644
|
|||||||
LOGSYSERR("tryUseCache", "getline", "");
|
LOGSYSERR("tryUseCache", "getline", "");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@@ -411,7 +406,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
@@ -404,7 +405,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||||
if ((fromregex(line) ||
|
if ((fromregex(line) ||
|
||||||
((quirks & MBOXQUIRK_TBIRD) && minifromregex(line))) ) {
|
((quirks & MBOXQUIRK_TBIRD) && minifromregex(line))) ) {
|
||||||
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
|
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
|
||||||
@ -72,7 +66,7 @@ index c77d42c8..ccd6a613 100644
|
|||||||
msgnum = mtarg -1;
|
msgnum = mtarg -1;
|
||||||
cachefound = true;
|
cachefound = true;
|
||||||
} else {
|
} else {
|
||||||
@@ -421,7 +416,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
@@ -414,7 +415,7 @@ bool MimeHandlerMbox::Internal::tryUseCache(int mtarg)
|
||||||
out:
|
out:
|
||||||
if (!cachefound) {
|
if (!cachefound) {
|
||||||
// No cached result: scan.
|
// No cached result: scan.
|
||||||
@ -81,7 +75,7 @@ index c77d42c8..ccd6a613 100644
|
|||||||
msgnum = 0;
|
msgnum = 0;
|
||||||
}
|
}
|
||||||
return cachefound;
|
return cachefound;
|
||||||
@@ -429,7 +424,7 @@ out:
|
@@ -422,7 +423,7 @@ out:
|
||||||
|
|
||||||
bool MimeHandlerMbox::next_document()
|
bool MimeHandlerMbox::next_document()
|
||||||
{
|
{
|
||||||
@ -90,7 +84,7 @@ index c77d42c8..ccd6a613 100644
|
|||||||
LOGERR("MimeHandlerMbox::next_document: not open\n");
|
LOGERR("MimeHandlerMbox::next_document: not open\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -465,10 +460,10 @@ bool MimeHandlerMbox::next_document()
|
@@ -458,10 +459,10 @@ bool MimeHandlerMbox::next_document()
|
||||||
msgtxt.erase();
|
msgtxt.erase();
|
||||||
string line;
|
string line;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|||||||
@ -21,11 +21,10 @@ endif
|
|||||||
|
|
||||||
# main packaging script based on dh7 syntax
|
# main packaging script based on dh7 syntax
|
||||||
%:
|
%:
|
||||||
dh $@ --with python2 --with python3
|
dh $@ --parallel --with python2 --with python3
|
||||||
|
|
||||||
override_dh_auto_configure:
|
override_dh_auto_configure:
|
||||||
dh_auto_configure -- --enable-recollq --enable-xadump --enable-webkit
|
dh_auto_configure -- --enable-recollq --enable-xadump
|
||||||
# dh_auto_configure -- --enable-recollq --enable-xadump --enable-webengine
|
|
||||||
|
|
||||||
build3vers := $(shell py3versions -sv)
|
build3vers := $(shell py3versions -sv)
|
||||||
|
|
||||||
|
|||||||
@ -1,76 +1,3 @@
|
|||||||
kio-recoll (1.33.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 30 Aug 2022 10:59:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.32.7-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Updated kio_recoll to work with newer kf5 versions (it seems that 5.96 broke it at least on
|
|
||||||
arch).
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 07 Aug 2022 17:42:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.32.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 05 Jul 2022 09:56:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.32.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Wed, 29 Jun 2022 09:36:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.32.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 14 Jun 2022 07:51:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.32.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 15 May 2022 08:07:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.32.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Fri, 11 Mar 2022 18:17:00 +0100
|
|
||||||
|
|
||||||
kio-recoll (1.31.6-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 20 Dec 2021 09:25:00 +0100
|
|
||||||
|
|
||||||
kio-recoll (1.31.5-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow recoll version
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 04 Dec 2021 10:05:00 +0100
|
|
||||||
|
|
||||||
kio-recoll (1.31.4-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow recoll version
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Thu, 25 Nov 2021 16:30:00 +0100
|
|
||||||
|
|
||||||
kio-recoll (1.31.3-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow recoll version
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sat, 13 Nov 2021 16:30:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.31.2-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* Follow recoll version
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Mon, 11 Oct 2021 10:55:00 +0200
|
|
||||||
|
|
||||||
kio-recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
kio-recoll (1.31.0-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
||||||
|
|
||||||
* Follow recoll version
|
* Follow recoll version
|
||||||
|
|||||||
@ -1,13 +0,0 @@
|
|||||||
diff --git a/src/kde/kioslave/kio_recoll/dirif.cpp b/src/kde/kioslave/kio_recoll/dirif.cpp
|
|
||||||
index 4438a1e7..48284ece 100644
|
|
||||||
--- a/kde/kioslave/kio_recoll/dirif.cpp
|
|
||||||
+++ b/kde/kioslave/kio_recoll/dirif.cpp
|
|
||||||
@@ -35,7 +35,7 @@
|
|
||||||
#include <QDebug>
|
|
||||||
#include <QUrl>
|
|
||||||
#include <QStandardPaths>
|
|
||||||
-#include <KF5/kio_version.h>
|
|
||||||
+#include <kio_version.h>
|
|
||||||
|
|
||||||
#include "kio_recoll.h"
|
|
||||||
#include "pathut.h"
|
|
||||||
@ -1,8 +0,0 @@
|
|||||||
README for Debian
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
The rclgrep package is a partial installation of the recollcmd package, with no
|
|
||||||
Xapian dependency. It conflicts with recollcmd, which also provides the rclgrep
|
|
||||||
command.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Tue, 20 Sep 2022 08:32:00 +0200
|
|
||||||
@ -1,7 +0,0 @@
|
|||||||
rclgrep (1.33.1-1~ppaPPAVERS~SERIES1) SERIES; urgency=low
|
|
||||||
|
|
||||||
* 1st version of rclgrep: a non-indexed search program using recoll
|
|
||||||
data extraction modules to effect grep-like function.
|
|
||||||
|
|
||||||
-- Jean-Francois Dockes <jf@dockes.org> Sun, 11 Sep 2022 10:59:00 +0200
|
|
||||||
|
|
||||||
@ -1 +0,0 @@
|
|||||||
11
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
Source: rclgrep
|
|
||||||
Section: x11
|
|
||||||
Priority: optional
|
|
||||||
Maintainer: Jean-Francois Dockes <jfd@recoll.org>
|
|
||||||
Build-Depends: debhelper (>= 10),
|
|
||||||
dh-python,
|
|
||||||
dpkg-dev (>= 1.16.1~),
|
|
||||||
libchm-dev,
|
|
||||||
libmagic-dev,
|
|
||||||
libxslt1-dev,
|
|
||||||
libz-dev,
|
|
||||||
pkg-config,
|
|
||||||
python3-all-dev,
|
|
||||||
python3-setuptools
|
|
||||||
X-Python3-Version: >= 3.6
|
|
||||||
Homepage: https://www.lesbonscomptes.com/recoll
|
|
||||||
Standards-Version: 4.2.1
|
|
||||||
|
|
||||||
Package: rclgrep
|
|
||||||
Architecture: any
|
|
||||||
Depends: python3, ${misc:Depends}, ${shlibs:Depends}
|
|
||||||
Conflicts: recollcmd
|
|
||||||
Recommends: antiword,
|
|
||||||
groff,
|
|
||||||
libimage-exiftool-perl,
|
|
||||||
poppler-utils,
|
|
||||||
python3-lxml,
|
|
||||||
python3-six,
|
|
||||||
python3-mutagen,
|
|
||||||
python3-rarfile,
|
|
||||||
unrtf,
|
|
||||||
unzip,sfami
|
|
||||||
xdg-utils
|
|
||||||
Suggests: ghostscript,
|
|
||||||
untex,
|
|
||||||
wv
|
|
||||||
Description: grep-like program based on recoll data extraction modules.
|
|
||||||
The program supports most grep options and aims at supplying a very similar
|
|
||||||
output format. It will search all formats supported by Recoll, including
|
|
||||||
compound documents and nested archives (mbox, zip, ....) with full
|
|
||||||
regexp support (unlike recoll). It does not not create
|
|
||||||
an index and the package has no dependency on Xapian. In consequence,
|
|
||||||
searching is vastly slower than when using recoll.
|
|
||||||
|
|
||||||
@ -1,141 +0,0 @@
|
|||||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
|
||||||
Upstream-Name: recoll
|
|
||||||
Upstream-Contact: Jean-Francois Dockes <jfd@recoll.org>
|
|
||||||
Source: https://www.lesbonscomptes.com/recoll/
|
|
||||||
|
|
||||||
Files: *
|
|
||||||
Copyright: 2005-2014, Jean-Francois Dockes <jfd@recoll.org>
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: bincimapmime/*
|
|
||||||
Copyright: 2002-2005, Andreas Aardal Hanssen <andreas-binc@bincimap.org>
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: filters/rcl* internfile/htmlparse.cpp
|
|
||||||
Copyright: 2000-2004, Mikio Hirabayashi
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: filters/rclpython
|
|
||||||
Copyright: J\xfcrgen Hermann, Mike Brown, Christopher Arndt
|
|
||||||
<http://chrisarndt.de/en/software/python/colorize.html>
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: internfile/htmlparse.cpp internfile/mh_html.cpp
|
|
||||||
Copyright: 1999-2001, BrightStation PLC,
|
|
||||||
2001, Ananova Ltd,
|
|
||||||
2002-2004, Olly Betts.
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: unac/*
|
|
||||||
Copyright: 2000-2002, Loic Dachary <loic@senga.org>
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: common/*
|
|
||||||
Copyright: 2004-2005, J.F.Dockes
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
Files: debian/*
|
|
||||||
Copyright: 2007-2014, Kartik Mistry <kartik@debian.org>
|
|
||||||
License: GPL-2+
|
|
||||||
|
|
||||||
License: GPL-2+
|
|
||||||
This package is free software; you can redistribute it and/or modify it under
|
|
||||||
the terms of the GNU General Public License as published by the Free Software
|
|
||||||
Foundation; either version 2 of the License, or (at your option) any later
|
|
||||||
version.
|
|
||||||
.
|
|
||||||
This package is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
||||||
.
|
|
||||||
You should have received a copy of the GNU General Public License along with
|
|
||||||
this package; if not, write to the Free Software Foundation, Inc., 51 Franklin
|
|
||||||
St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
.
|
|
||||||
On Debian systems, the complete text of the GNU General Public License can be
|
|
||||||
found in `/usr/share/common-licenses/GPL-2' and
|
|
||||||
`/usr/share/common-licenses/GPL-3'.
|
|
||||||
|
|
||||||
Files: aspell/*
|
|
||||||
Copyright: 2001-2002, by Kevin Atkinson
|
|
||||||
License: LGPL-2+
|
|
||||||
|
|
||||||
License: LGPL-2+
|
|
||||||
This package is free software; you can redistribute it and/or modify it under
|
|
||||||
the terms of the GNU Lesser General Public License as published by the Free
|
|
||||||
Software Foundation; either version 2 of the License, or (at your option) any
|
|
||||||
later version.
|
|
||||||
.
|
|
||||||
This package is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
|
||||||
details.
|
|
||||||
.
|
|
||||||
You should have received a copy of the GNU Lesser General Public License along
|
|
||||||
with this package; if not, write to the Free Software Foundation, Inc., 51
|
|
||||||
Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
.
|
|
||||||
On Debian systems, the complete text of the GNU Lesser General Public License
|
|
||||||
can be found in `/usr/share/common-licenses/LGPL-2' and
|
|
||||||
`/usr/share/common-licenses/LGPL-2.1' and `/usr/share/common-licenses/LGPL-3'.
|
|
||||||
|
|
||||||
Files: common/uproplist.h
|
|
||||||
Copyright: 1991-2006, Unicode, Inc.
|
|
||||||
License: Unicode
|
|
||||||
|
|
||||||
License: Unicode
|
|
||||||
All rights reserved. Distributed under the Terms of Use in
|
|
||||||
https://www.unicode.org/copyright.html
|
|
||||||
.
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of the Unicode data files and any associated documentation (the "Data Files")
|
|
||||||
or Unicode software and any associated documentation (the "Software") to deal
|
|
||||||
in the Data Files or Software without restriction, including without
|
|
||||||
limitation the rights to use, copy, modify, merge, publish, distribute, and/or
|
|
||||||
sell copies of the Data Files or Software, and to permit persons to whom the
|
|
||||||
Data Files or Software are furnished to do so, provided that (a) the above
|
|
||||||
copyright notice(s) and this permission notice appear with all copies of the
|
|
||||||
Data Files or Software, (b) both the above copyright notice(s) and this
|
|
||||||
permission notice appear in associated documentation, and (c) there is clear
|
|
||||||
notice in each modified Data File or in the Software as well as in the
|
|
||||||
documentation associated with the Data File(s) or Software that the data or
|
|
||||||
software has been modified.
|
|
||||||
.
|
|
||||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
|
||||||
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD
|
|
||||||
PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
|
|
||||||
THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
|
||||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
|
||||||
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
||||||
ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE
|
|
||||||
DATA FILES OR SOFTWARE.
|
|
||||||
.
|
|
||||||
Except as contained in this notice, the name of a copyright holder shall not
|
|
||||||
be used in advertising or otherwise to promote the sale, use or other dealings
|
|
||||||
in these Data Files or Software without prior written authorization of the
|
|
||||||
copyright holder.
|
|
||||||
|
|
||||||
Files: utils/md5.*
|
|
||||||
Copyright: 1991-1992, RSA Data Security, Inc. All rights reserved.
|
|
||||||
License: RSA
|
|
||||||
|
|
||||||
License: RSA
|
|
||||||
MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
|
|
||||||
.
|
|
||||||
License to copy and use this software is granted provided that it is
|
|
||||||
identified as the "RSA Data Security, Inc. MD5 Message-Digest Algorithm" in
|
|
||||||
all material mentioning or referencing this software or this function.
|
|
||||||
.
|
|
||||||
License is also granted to make and use derivative works provided that such
|
|
||||||
works are identified as "derived from the RSA Data Security, Inc. MD5
|
|
||||||
Message-Digest Algorithm" in all material mentioning or referencing the
|
|
||||||
derived work.
|
|
||||||
.
|
|
||||||
RSA Data Security, Inc. makes no representations concerning either the
|
|
||||||
merchantability of this software or the suitability of this software for any
|
|
||||||
particular purpose. It is provided "as is" without express or implied warranty
|
|
||||||
of any kind.
|
|
||||||
.
|
|
||||||
These notices must be retained in any copies of any part of this documentation
|
|
||||||
and/or software.
|
|
||||||
@ -1,2 +0,0 @@
|
|||||||
usr/lib/python*/*-packages/recollchm/*
|
|
||||||
usr/lib/python*/*-packages/recollchm-*/*
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
#!/usr/bin/make -f
|
|
||||||
|
|
||||||
# Uncomment this to turn on verbose mode.
|
|
||||||
#export DH_VERBOSE=1
|
|
||||||
|
|
||||||
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
|
|
||||||
DPKG_EXPORT_BUILDFLAGS = 1
|
|
||||||
include /usr/share/dpkg/buildflags.mk
|
|
||||||
|
|
||||||
DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
|
|
||||||
DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
|
|
||||||
|
|
||||||
build3vers := $(shell py3versions -sv)
|
|
||||||
|
|
||||||
#build qt5 UI
|
|
||||||
export QT_SELECT := qt5
|
|
||||||
|
|
||||||
ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
|
|
||||||
NJOBS := -j $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
|
|
||||||
endif
|
|
||||||
|
|
||||||
# main packaging script based on dh7 syntax
|
|
||||||
%:
|
|
||||||
dh $@ --with python3
|
|
||||||
|
|
||||||
override_dh_auto_configure:
|
|
||||||
dh_auto_configure -- --enable-rclgrep --disable-python-module --disable-indexer \
|
|
||||||
--disable-qtgui --disable-recollq --disable-testmains \
|
|
||||||
--disable-xadump
|
|
||||||
|
|
||||||
build3vers := $(shell py3versions -sv)
|
|
||||||
|
|
||||||
override_dh_auto_install:
|
|
||||||
dh_auto_install
|
|
||||||
set -e && for i in $(build3vers); do \
|
|
||||||
(cd python/pychm; python$$i ./setup.py install \
|
|
||||||
--install-layout=deb \
|
|
||||||
--prefix=/usr \
|
|
||||||
--root=$(CURDIR)/debian/tmp/ ) ; \
|
|
||||||
done
|
|
||||||
find $(CURDIR) -type f -name '*.la' -exec rm -f '{}' \;
|
|
||||||
find $(CURDIR) -type f -name '*.pyc' -exec rm -f '{}' \;
|
|
||||||
rm -rf $(CURDIR)/debian/rclgrep/usr/lib/python*/*/*/__pycache__
|
|
||||||
rm -rf $(CURDIR)/debian/tmp/usr/lib/python*/*/*/__pycache__
|
|
||||||
@ -1 +0,0 @@
|
|||||||
3.0 (quilt)
|
|
||||||
@ -1,41 +0,0 @@
|
|||||||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
|
||||||
|
|
||||||
mQINBFbJ6UABEADLsFg8qXTrNrYUnNS5UXlAWUH7/ZHNRgr/EIkhKAbdlzVAywTM
|
|
||||||
fX6wo9crKzlqT3IcEOFe0RVJoh0FSNEQQlUhyJAFNlbcocsDYNqk7pDjxnUBUMM2
|
|
||||||
U3ikLEPzRxWDhVepAVQPeloD1i8b4MJrSHnLb49PMmXg+6MHA+dzOS59onE5QDcz
|
|
||||||
kw1RF0N0gl7693rOMP/ATefA2KPQyKCIweKB/3NbOcv4/T1XDyag0G7xYkT4stEl
|
|
||||||
TN2P8c6HSyhWDxp2slZ04kdf17TuoeOqMO9gKE+eEC17lllLuhSrbBdfYTYt05pN
|
|
||||||
Y1eRup+6oamoMc3ITD2U2GtY+65AHw5MxjGigpZ3kj5DwF/f2IgtDBSoXjm8aaRb
|
|
||||||
iYMvt3kXnb3Ai/oVvSlkIQMlDDpdAQmzB0FO0MCzzykq5mQVbl3Uw3i2q5vg1IIL
|
|
||||||
fGOB1USa0JOVRSq8C66ncijYO6Jafx3uYCGVdIypoLs332kGsyQaIatoJRbPkKT/
|
|
||||||
Wu/DGE8kHOaCo5795HbRk0O/Up5wQP3N/OXGmrQPtbafRz9bkjXOKGtq660VJ67K
|
|
||||||
ttgY9L1fD7jb+zDoUaY33K8Trfqaxm5aGkI6Pj3VvQSF2CAaJuEnh/c0r9UdGn0e
|
|
||||||
e1L0yP1kUj80Qv99QFEoH2UtBrfLsXAiRvcr/PfyGTp/+Q7wkCHsHC84TwARAQAB
|
|
||||||
tCRKZWFuLUZyYW5jb2lzIERvY2tlcyA8amZAZG9ja2VzLm9yZz6JAkAEEwEKACoC
|
|
||||||
GwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4ACGQEFAlitGxQFCQWlmM4ACgkQeAjO
|
|
||||||
ltOLkgEE4xAAqGOSt6U+CGdI333Yx7KaCA+XgJPsiaqfG2AIuv4Y0/LW8467uy4u
|
|
||||||
DdbgJ3GQ6kWUZD0b/nrp74Ly5ZM9DCIZzOX9FQ3R9FBhbBS1fVfqFD2yZQv4lze9
|
|
||||||
Bjj7EMRieRGUtVIb7BiUrmJOyIbiGktEOuqqTj7RehN/2sflv5jH2NW33+i3t/x7
|
|
||||||
YWTAPHxieiOYO0Z0JtXe+ZXJ92LNaR+5DOsGItTSeJKzbh1oUtAcbt5DDDQKMJwb
|
|
||||||
tIRg+9Mjj3IUqCsiFkKOfq34TXDu2paKWkdGuOJ8u2DqvgUYkqFfY4JOpWrax+Mt
|
|
||||||
hsS6VSDIxL7H9UqaJpXWMMhUN2gFM+wy/y1OeNo5bKM4KiLbaugEvOb1RCQm2R6h
|
|
||||||
HNcDO52KSFJMZSCzO/jjN2qJjDcLu2DAsQzWI+bzZgP+tpr3yWvW2OCCY+PdT4ZA
|
|
||||||
5lwnd85P5x1wYhb/eoXi9QyWburu3vaNGdDWUljUkBB29l09hoDbAYPwWujLDGyT
|
|
||||||
0j6+NWh27dLe8bnwe5YEBPHcwvuLnSBVVXY+UM/0toEWwpRdTvnxZUKKxtN+yiCA
|
|
||||||
k82qRNXaUGaWpaL0xYPfanZSXi1dSNNEertS/BrF5PpmNdQsK1+sibNF1KKhR5ge
|
|
||||||
2QSrjoNzL4kBgJq4ojJBcBd75p9HzheXCHdG1EHQBTeetDqiwEPbO/W0KUplYW4t
|
|
||||||
RnJhbmNvaXMgRG9ja2VzIDxqZmRvY2tlc0BnbWFpbC5jb20+iQI9BBMBCgAnAhsD
|
|
||||||
BQsJCAcDBRUKCQgLBRYCAwEAAh4BAheABQJYrRsbBQkFpZjOAAoJEHgIzpbTi5IB
|
|
||||||
GKYP/09nsWnDCqv+3OKzmbHlMKCLvGU8IGU1q909sUelKmyjSFXmh3BsgR4DrfXu
|
|
||||||
hGWtmu/mPYzCWzbK8TWYsU1O6em7YRY8lt/q/8gciSahl+xFT/G5GJHf7KFgtsSn
|
|
||||||
QcbA18dzXKpxmTOTMEmWLh4zZlaUbaH2gmpXBQvH4smu/FV6rq5YYYDG9A3PDujr
|
|
||||||
QmOyguD7wNvb6ahrgpTbMawsj6zLIT1pkC6t1Orz/gmYsuk47EJFfvaO3+YIUQ0D
|
|
||||||
pFN9EkDjhcIa3vSsd+EBMbmweFB6y9gs7LmolqwiddUlYv5wGOLgiE1EJEI4bcvr
|
|
||||||
vm8RWHziWytmpTPjzFpETaBVMC8xTt6tiNWNeTUkjbBX0Fek9GEvzAJIpe18LnM8
|
|
||||||
raFREUriHuUwIGrrFrhj7rBAX51DiiJUguDi+842SjlzVE2SCwyjXVlglDItBPKO
|
|
||||||
Y284KpI+wLhJCggtwtzZOQcAc5l8j3JpXjhm1tjSKggEONdBu2l7mWZRAJCBziMK
|
|
||||||
mnUPL8q44l2hc/sDu4cCpsHW+pssGDQqtR+t/fPMGFuXd+WnfYskhyQVms44yAYJ
|
|
||||||
Y/cx43tgYLHDx2TraTQZqh1qgmrXesS2DiT+5pCjQh0ChwTEBjGCz41WcQkD9nTL
|
|
||||||
k3E6amPE6WAPS07bX9zkLHYYIOu8wd8nRoPKlVjhMpBvz8LE
|
|
||||||
=2J/9
|
|
||||||
-----END PGP PUBLIC KEY BLOCK-----
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
version=4
|
|
||||||
opts=pgpmode=auto \
|
|
||||||
https://www.lesbonscomptes.com/recoll/download.html \
|
|
||||||
(?:|.*/)recoll(?:[_\-]v?|)(\d[^\s/]*)\.(?:tar\.xz|txz|tar\.bz2|tbz2|tar\.gz|tgz)
|
|
||||||
@ -60,40 +60,40 @@ index f41a9f39..dc3085a4 100755
|
|||||||
#
|
#
|
||||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
diff --git filters/rcl7z.py filters/rcl7z.py
|
diff --git filters/rcl7z filters/rcl7z
|
||||||
index c68c8bcb..ac50c4ec 100755
|
index c68c8bcb..ac50c4ec 100755
|
||||||
--- filters/rcl7z.py
|
--- filters/rcl7z
|
||||||
+++ filters/rcl7z.py
|
+++ filters/rcl7z
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
|
|
||||||
# 7-Zip file filter for Recoll
|
# 7-Zip file filter for Recoll
|
||||||
|
|
||||||
diff --git filters/rclaudio.py filters/rclaudio.py
|
diff --git filters/rclaudio filters/rclaudio
|
||||||
index 94ca0be7..08d6375a 100755
|
index 94ca0be7..08d6375a 100755
|
||||||
--- filters/rclaudio.py
|
--- filters/rclaudio
|
||||||
+++ filters/rclaudio.py
|
+++ filters/rclaudio
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
|
|
||||||
# Audio tag filter for Recoll, using mutagen
|
# Audio tag filter for Recoll, using mutagen
|
||||||
|
|
||||||
diff --git filters/rclchm.py filters/rclchm.py
|
diff --git filters/rclchm filters/rclchm
|
||||||
index f9811c37..3bc9b16d 100755
|
index f9811c37..3bc9b16d 100755
|
||||||
--- filters/rclchm.py
|
--- filters/rclchm
|
||||||
+++ filters/rclchm.py
|
+++ filters/rclchm
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
"""Extract Html files from a Microsoft Compiled Html Help file (.chm)
|
||||||
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
Needs at least python 2.2 for HTMLParser (chmlib needs 2.2 too)"""
|
||||||
|
|
||||||
diff --git filters/rcldia.py filters/rcldia.py
|
diff --git filters/rcldia filters/rcldia
|
||||||
index 282148eb..a480294b 100755
|
index 282148eb..a480294b 100755
|
||||||
--- filters/rcldia.py
|
--- filters/rcldia
|
||||||
+++ filters/rcldia.py
|
+++ filters/rcldia
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
@ -120,30 +120,30 @@ index e8fa1831..b92b185d 100755
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
diff --git filters/rclepub.py filters/rclepub.py
|
diff --git filters/rclepub filters/rclepub
|
||||||
index 8042d7f9..51786af1 100755
|
index 8042d7f9..51786af1 100755
|
||||||
--- filters/rclepub.py
|
--- filters/rclepub
|
||||||
+++ filters/rclepub.py
|
+++ filters/rclepub
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
"""Extract Html content from an EPUB file (.epub)"""
|
"""Extract Html content from an EPUB file (.epub)"""
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
diff --git filters/rclepub.py1 filters/rclepub.py1
|
diff --git filters/rclepub1 filters/rclepub1
|
||||||
index bd44f635..a7ea6c06 100755
|
index bd44f635..a7ea6c06 100755
|
||||||
--- filters/rclepub.py1
|
--- filters/rclepub1
|
||||||
+++ filters/rclepub.py1
|
+++ filters/rclepub1
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
"""Extract Html content from an EPUB file (.chm), concatenating all sections"""
|
"""Extract Html content from an EPUB file (.chm), concatenating all sections"""
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
diff --git filters/rclics.py filters/rclics.py
|
diff --git filters/rclics filters/rclics
|
||||||
index 0ef04f2d..de177024 100755
|
index 0ef04f2d..de177024 100755
|
||||||
--- filters/rclics.py
|
--- filters/rclics
|
||||||
+++ filters/rclics.py
|
+++ filters/rclics
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
@ -160,20 +160,20 @@ index 7eb1da91..4eb6c9b0 100755
|
|||||||
|
|
||||||
# Python-based Image Tag extractor for Recoll. This is less thorough
|
# Python-based Image Tag extractor for Recoll. This is less thorough
|
||||||
# than the Perl-based rclimg script, but useful if you don't want to
|
# than the Perl-based rclimg script, but useful if you don't want to
|
||||||
diff --git filters/rclinfo.py filters/rclinfo.py
|
diff --git filters/rclinfo filters/rclinfo
|
||||||
index f353d19e..36cf34e0 100755
|
index f353d19e..36cf34e0 100755
|
||||||
--- filters/rclinfo.py
|
--- filters/rclinfo
|
||||||
+++ filters/rclinfo.py
|
+++ filters/rclinfo
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
|
|
||||||
# Read a file in GNU info format and output its nodes as subdocs,
|
# Read a file in GNU info format and output its nodes as subdocs,
|
||||||
# interfacing with recoll execm
|
# interfacing with recoll execm
|
||||||
diff --git filters/rclkar.py filters/rclkar.py
|
diff --git filters/rclkar filters/rclkar
|
||||||
index d6570dd5..34b8d2a2 100755
|
index d6570dd5..34b8d2a2 100755
|
||||||
--- filters/rclkar.py
|
--- filters/rclkar
|
||||||
+++ filters/rclkar.py
|
+++ filters/rclkar
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
@ -230,10 +230,10 @@ index 615455b3..1e411890 100755
|
|||||||
# -*- coding: iso-8859-1 -*-
|
# -*- coding: iso-8859-1 -*-
|
||||||
"""
|
"""
|
||||||
MoinMoin - Python source parser and colorizer
|
MoinMoin - Python source parser and colorizer
|
||||||
diff --git filters/rclrar.py filters/rclrar.py
|
diff --git filters/rclrar filters/rclrar
|
||||||
index 8f723fa5..5f6adfb0 100755
|
index 8f723fa5..5f6adfb0 100755
|
||||||
--- filters/rclrar.py
|
--- filters/rclrar
|
||||||
+++ filters/rclrar.py
|
+++ filters/rclrar
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
@ -280,10 +280,10 @@ index 8c1b8aea..cee17324 100755
|
|||||||
# Copyright (C) 2014 J.F.Dockes
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
diff --git filters/rcltar.py filters/rcltar.py
|
diff --git filters/rcltar filters/rcltar
|
||||||
index d8bf100d..ab4b306e 100755
|
index d8bf100d..ab4b306e 100755
|
||||||
--- filters/rcltar.py
|
--- filters/rcltar
|
||||||
+++ filters/rcltar.py
|
+++ filters/rcltar
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
@ -320,10 +320,10 @@ index 32a11c1a..eab3b257 100644
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
diff --git filters/rclwar.py filters/rclwar.py
|
diff --git filters/rclwar filters/rclwar
|
||||||
index b654f3b3..301e28e9 100755
|
index b654f3b3..301e28e9 100755
|
||||||
--- filters/rclwar.py
|
--- filters/rclwar
|
||||||
+++ filters/rclwar.py
|
+++ filters/rclwar
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
@ -360,10 +360,10 @@ index 158e1222..602769af 100755
|
|||||||
# Copyright (C) 2016 J.F.Dockes
|
# Copyright (C) 2016 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
diff --git filters/rclzip.py filters/rclzip.py
|
diff --git filters/rclzip filters/rclzip
|
||||||
index 35739625..0c597fbd 100755
|
index 35739625..0c597fbd 100755
|
||||||
--- filters/rclzip.py
|
--- filters/rclzip
|
||||||
+++ filters/rclzip.py
|
+++ filters/rclzip
|
||||||
@@ -1,4 +1,4 @@
|
@@ -1,4 +1,4 @@
|
||||||
-#!/usr/bin/env python2
|
-#!/usr/bin/env python2
|
||||||
+#!/usr/bin/env python2.7
|
+#!/usr/bin/env python2.7
|
||||||
|
|||||||
@ -12,31 +12,27 @@ usage()
|
|||||||
|
|
||||||
# Adjustable things
|
# Adjustable things
|
||||||
top=~/Recoll
|
top=~/Recoll
|
||||||
# The possibly bogus version we have in paths (may be harcoded in the .pro)
|
qtversion=5.14.2
|
||||||
# qcbuildloc=Desktop_Qt_5_15_2_clang_64bit
|
# Will probably need adjustment on M1
|
||||||
qcbuildloc=Qt_6_2_4_for_macOS
|
path_clang=clang_64
|
||||||
|
deploy=~/Qt/${qtversion}/${path_clang}/bin/macdeployqt
|
||||||
|
|
||||||
# qtversion=5.15.2
|
|
||||||
qtversion=6.2.4
|
|
||||||
|
|
||||||
#deploy=~/Qt/${qtversion}/macos/clang_64bit/macdeployqt
|
qt_ver_sion=`echo $qtversion | sed -e 's/\./_/g'`
|
||||||
deploy=~/Qt/${qtversion}/macos/bin/macdeployqt
|
|
||||||
|
|
||||||
toprecoll=$top/recoll/src
|
toprecoll=$top/recoll/src
|
||||||
appdir=$toprecoll/build-recoll-win-${qcbuildloc}-Release/recoll.app
|
appdir=$toprecoll/build-recoll-win-Desktop_Qt_${qt_ver_sion}_${path_clang}bit-Release/recoll.app
|
||||||
rclindexdir=$toprecoll/windows/build-recollindex-${qcbuildloc}-Release
|
rclindexdir=$toprecoll/windows/build-recollindex-Desktop_Qt_${qt_ver_sion}_${path_clang}bit-Release
|
||||||
rclqdir=$toprecoll/windows/build-recollq-${qcbuildloc}-Release
|
|
||||||
bindir=$appdir/Contents/MacOS
|
bindir=$appdir/Contents/MacOS
|
||||||
datadir=$appdir/Contents/Resources
|
datadir=$appdir/Contents/Resources
|
||||||
|
|
||||||
dmg=$appdir/../recoll.dmg
|
dmg=$appdir/../recoll.dmg
|
||||||
|
|
||||||
version=`cat $toprecoll/RECOLL-VERSION.txt`
|
version=`cat $toprecoll/VERSION`
|
||||||
|
|
||||||
test -d $appdir || fatal Must first have built recoll in $appdir
|
test -d $appdir || fatal Must first have built recoll in $appdir
|
||||||
|
|
||||||
cp $rclindexdir/recollindex $bindir || exit 1
|
cp $rclindexdir/recollindex $bindir || exit 1
|
||||||
cp $rclqdir/recollq $bindir || exit 1
|
|
||||||
|
|
||||||
cp $top/antiword/antiword $bindir || exit 1
|
cp $top/antiword/antiword $bindir || exit 1
|
||||||
mkdir -p $datadir/antiword || exit 1
|
mkdir -p $datadir/antiword || exit 1
|
||||||
@ -49,7 +45,7 @@ $deploy $appdir -dmg || exit 1
|
|||||||
|
|
||||||
|
|
||||||
hash=`(cd $top/recoll;git log -n 1 | head -1 | awk '{print $2}' |cut -b 1-8)`
|
hash=`(cd $top/recoll;git log -n 1 | head -1 | awk '{print $2}' |cut -b 1-8)`
|
||||||
dte=`date +%Y%m%d`
|
|
||||||
mv $dmg ~/Documents/recoll-$version-$dte-$hash.dmg || exit 1
|
mv $dmg ~/Documents/recoll-$version-$hash.dmg || exit 1
|
||||||
ls -l ~/Documents/recoll-$version-*.dmg
|
ls -l ~/Documents/recoll-$version-*.dmg
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
Summary: Desktop full text search tool with Qt GUI
|
Summary: Desktop full text search tool with Qt GUI
|
||||||
Name: recoll
|
Name: recoll
|
||||||
Version: 1.32.7
|
Version: 1.29.2
|
||||||
Release: 2%{?dist}
|
Release: 2%{?dist}
|
||||||
Group: Applications/Databases
|
Group: Applications/Databases
|
||||||
License: GPLv2+
|
License: GPLv2+
|
||||||
@ -13,26 +13,15 @@ Source10: qmake-qt5.sh
|
|||||||
BuildRequires: aspell-devel
|
BuildRequires: aspell-devel
|
||||||
BuildRequires: bison
|
BuildRequires: bison
|
||||||
BuildRequires: desktop-file-utils
|
BuildRequires: desktop-file-utils
|
||||||
|
# kio
|
||||||
#BuildRequires: kdelibs4-devel
|
BuildRequires: kdelibs4-devel
|
||||||
|
|
||||||
# Fedora
|
|
||||||
BuildRequires: qt5-qtbase-devel
|
BuildRequires: qt5-qtbase-devel
|
||||||
BuildRequires: qt5-qtwebkit-devel
|
BuildRequires: qt5-qtwebkit-devel
|
||||||
#BuildRequires: qt5-qtwebengine-devel
|
BuildRequires: extra-cmake-modules
|
||||||
|
BuildRequires: kf5-kio-devel
|
||||||
|
BuildRequires: python2-devel
|
||||||
BuildRequires: python3-devel
|
BuildRequires: python3-devel
|
||||||
BuildRequires: xapian-core-devel
|
BuildRequires: xapian-core-devel
|
||||||
BuildRequires: kf5-kio-devel
|
|
||||||
|
|
||||||
# Opensuse
|
|
||||||
#BuildRequires: libQt5Gui-devel
|
|
||||||
#BuildRequires: libqt5-qtwebengine-devel
|
|
||||||
#BuildRequires: python310-devel
|
|
||||||
#BuildRequires: libxapian-devel
|
|
||||||
#BuildRequires: kio-devel
|
|
||||||
|
|
||||||
BuildRequires: extra-cmake-modules
|
|
||||||
BuildRequires: python2-devel
|
|
||||||
BuildRequires: zlib-devel
|
BuildRequires: zlib-devel
|
||||||
BuildRequires: chmlib-devel
|
BuildRequires: chmlib-devel
|
||||||
BuildRequires: libxslt-devel
|
BuildRequires: libxslt-devel
|
||||||
@ -66,7 +55,7 @@ LDFLAGS="%{?__global_ldflags}"; export LDFLAGS
|
|||||||
install -m755 -D %{SOURCE10} qmake-qt5.sh
|
install -m755 -D %{SOURCE10} qmake-qt5.sh
|
||||||
export QMAKE=qmake-qt5
|
export QMAKE=qmake-qt5
|
||||||
|
|
||||||
%configure --enable-webengine
|
%configure
|
||||||
make %{?_smp_mflags}
|
make %{?_smp_mflags}
|
||||||
|
|
||||||
%install
|
%install
|
||||||
@ -81,7 +70,7 @@ rm -f %{buildroot}/usr/share/recoll/filters/xdg-open
|
|||||||
|
|
||||||
# kio_recoll -kde5
|
# kio_recoll -kde5
|
||||||
(
|
(
|
||||||
#mkdir kde/kioslave/kio_recoll/build && pushd kde/kioslave/kio_recoll/build
|
mkdir kde/kioslave/kio_recoll/build && pushd kde/kioslave/kio_recoll/build
|
||||||
%cmake ..
|
%cmake ..
|
||||||
make %{?_smp_mflags} VERBOSE=1
|
make %{?_smp_mflags} VERBOSE=1
|
||||||
make install DESTDIR=%{buildroot}
|
make install DESTDIR=%{buildroot}
|
||||||
@ -148,12 +137,12 @@ exit 0
|
|||||||
%{_datadir}/icons/hicolor/48x48/apps/%{name}.png
|
%{_datadir}/icons/hicolor/48x48/apps/%{name}.png
|
||||||
%{_datadir}/pixmaps/%{name}.png
|
%{_datadir}/pixmaps/%{name}.png
|
||||||
%{_libdir}/recoll
|
%{_libdir}/recoll
|
||||||
%{python2_sitearch}/recoll
|
%{python_sitearch}/recoll
|
||||||
%{python2_sitearch}/Recoll*.egg-info
|
%{python_sitearch}/Recoll*.egg-info
|
||||||
%{python3_sitearch}/recoll
|
%{python3_sitearch}/recoll
|
||||||
%{python3_sitearch}/Recoll*.egg-info
|
%{python3_sitearch}/Recoll*.egg-info
|
||||||
%{python2_sitearch}/recollchm
|
%{python_sitearch}/recollchm
|
||||||
%{python2_sitearch}/recollchm*.egg-info
|
%{python_sitearch}/recollchm*.egg-info
|
||||||
%{python3_sitearch}/recollchm
|
%{python3_sitearch}/recollchm
|
||||||
%{python3_sitearch}/recollchm*.egg-info
|
%{python3_sitearch}/recollchm*.egg-info
|
||||||
%{_mandir}/man1/%{name}.1*
|
%{_mandir}/man1/%{name}.1*
|
||||||
|
|||||||
113
src/Makefile.am
113
src/Makefile.am
@ -4,16 +4,12 @@
|
|||||||
if COND_TESTMAINS
|
if COND_TESTMAINS
|
||||||
MAYBE_TESTMAINS = testmains
|
MAYBE_TESTMAINS = testmains
|
||||||
endif
|
endif
|
||||||
if COND_RCLGREP
|
SUBDIRS = . $(MAYBE_TESTMAINS)
|
||||||
MAYBE_RCLGREP = rclgrep
|
|
||||||
endif
|
|
||||||
SUBDIRS = . $(MAYBE_TESTMAINS) $(MAYBE_RCLGREP)
|
|
||||||
|
|
||||||
DIST_SUBDIRS = .
|
DIST_SUBDIRS = .
|
||||||
|
|
||||||
CXXFLAGS ?= @CXXFLAGS@
|
CXXFLAGS ?= @CXXFLAGS@
|
||||||
XAPIAN_LIBS=@XAPIAN_LIBS@
|
LIBXAPIAN=@LIBXAPIAN@
|
||||||
XAPIAN_CFLAGS=@XAPIAN_CFLAGS@
|
XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
|
||||||
XSLT_CFLAGS=@XSLT_CFLAGS@
|
XSLT_CFLAGS=@XSLT_CFLAGS@
|
||||||
XSLT_LIBS=@XSLT_LIBS@
|
XSLT_LIBS=@XSLT_LIBS@
|
||||||
LIBICONV=@LIBICONV@
|
LIBICONV=@LIBICONV@
|
||||||
@ -42,7 +38,7 @@ COMMONCPPFLAGS = -I. \
|
|||||||
AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
||||||
$(COMMONCPPFLAGS) \
|
$(COMMONCPPFLAGS) \
|
||||||
$(INCICONV) \
|
$(INCICONV) \
|
||||||
$(XAPIAN_CFLAGS) \
|
$(XAPIANCXXFLAGS) \
|
||||||
$(XSLT_CFLAGS) \
|
$(XSLT_CFLAGS) \
|
||||||
$(X_CFLAGS) \
|
$(X_CFLAGS) \
|
||||||
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
|
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
|
||||||
@ -59,10 +55,7 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
librcldir = $(libdir)/recoll
|
librcldir = $(libdir)/recoll
|
||||||
librcl_LTLIBRARIES =
|
librcl_LTLIBRARIES = librecoll.la
|
||||||
if MAKE_RECOLL_LIB
|
|
||||||
librcl_LTLIBRARIES += librecoll.la
|
|
||||||
endif
|
|
||||||
|
|
||||||
librecoll_la_SOURCES = \
|
librecoll_la_SOURCES = \
|
||||||
aspell/rclaspell.cpp \
|
aspell/rclaspell.cpp \
|
||||||
@ -294,7 +287,7 @@ AM_YFLAGS = -d
|
|||||||
# need it
|
# need it
|
||||||
librecoll_la_LDFLAGS = -release $(VERSION) -no-undefined @NO_UNDEF_LINK_FLAG@
|
librecoll_la_LDFLAGS = -release $(VERSION) -no-undefined @NO_UNDEF_LINK_FLAG@
|
||||||
|
|
||||||
librecoll_la_LIBADD = $(XSLT_LIBS) $(XAPIAN_LIBS) $(LIBICONV) $(X_LIBX11) $(LIBTHREADS)
|
librecoll_la_LIBADD = $(XSLT_LIBS) $(LIBXAPIAN) $(LIBICONV) $(X_LIBX11) $(LIBTHREADS)
|
||||||
|
|
||||||
# There is probably a better way to do this. The KIO needs to be linked
|
# There is probably a better way to do this. The KIO needs to be linked
|
||||||
# with librecoll, but librecoll is installed into a non-standard place
|
# with librecoll, but librecoll is installed into a non-standard place
|
||||||
@ -313,10 +306,7 @@ PicStatic: $(librecoll_la_OBJECTS)
|
|||||||
$(LIBTOOL) --tag=LD --mode=link gcc -g -O -o librecoll.la \
|
$(LIBTOOL) --tag=LD --mode=link gcc -g -O -o librecoll.la \
|
||||||
$(librecoll_la_OBJECTS)
|
$(librecoll_la_OBJECTS)
|
||||||
|
|
||||||
bin_PROGRAMS =
|
bin_PROGRAMS = recollindex
|
||||||
if MAKEINDEXER
|
|
||||||
bin_PROGRAMS += recollindex
|
|
||||||
endif
|
|
||||||
if MAKECMDLINE
|
if MAKECMDLINE
|
||||||
bin_PROGRAMS += recollq
|
bin_PROGRAMS += recollq
|
||||||
endif
|
endif
|
||||||
@ -344,8 +334,10 @@ recollq_SOURCES = query/recollqmain.cpp
|
|||||||
recollq_LDADD = librecoll.la
|
recollq_LDADD = librecoll.la
|
||||||
|
|
||||||
xadump_SOURCES = query/xadump.cpp
|
xadump_SOURCES = query/xadump.cpp
|
||||||
xadump_LDADD = librecoll.la $(XAPIAN_LIBS) $(LIBICONV)
|
xadump_LDADD = librecoll.la $(LIBXAPIAN) $(LIBICONV)
|
||||||
|
|
||||||
|
# Note: I'd prefer the generated query parser files not to be distributed
|
||||||
|
# at all, but failed to achieve this
|
||||||
EXTRA_DIST = \
|
EXTRA_DIST = \
|
||||||
bincimapmime/00README.recoll bincimapmime/AUTHORS bincimapmime/COPYING \
|
bincimapmime/00README.recoll bincimapmime/AUTHORS bincimapmime/COPYING \
|
||||||
\
|
\
|
||||||
@ -365,8 +357,8 @@ doc/user/custom.xsl doc/user/usermanual.xml \
|
|||||||
filters/injectcommon.sh filters/recfiltcommon filters/rcltxtlines.py \
|
filters/injectcommon.sh filters/recfiltcommon filters/rcltxtlines.py \
|
||||||
\
|
\
|
||||||
index/rclmon.sh \
|
index/rclmon.sh \
|
||||||
index/recollindex@.service \
|
index/recollindex-system.service \
|
||||||
index/recollindex.service \
|
index/recollindex-user.service \
|
||||||
\
|
\
|
||||||
kde/kioslave/kio_recoll/00README.txt \
|
kde/kioslave/kio_recoll/00README.txt \
|
||||||
kde/kioslave/kio_recoll/CMakeLists.txt \
|
kde/kioslave/kio_recoll/CMakeLists.txt \
|
||||||
@ -377,10 +369,9 @@ kde/kioslave/kio_recoll/dirif.cpp \
|
|||||||
kde/kioslave/kio_recoll/htmlif.cpp \
|
kde/kioslave/kio_recoll/htmlif.cpp \
|
||||||
kde/kioslave/kio_recoll/kio_recoll.cpp \
|
kde/kioslave/kio_recoll/kio_recoll.cpp \
|
||||||
kde/kioslave/kio_recoll/kio_recoll.h \
|
kde/kioslave/kio_recoll/kio_recoll.h \
|
||||||
kde/kioslave/kio_recoll/recoll.json \
|
|
||||||
kde/kioslave/kio_recoll/recoll.protocol \
|
|
||||||
kde/kioslave/kio_recoll/recollf.protocol \
|
kde/kioslave/kio_recoll/recollf.protocol \
|
||||||
kde/kioslave/kio_recoll/recollnolist.protocol \
|
kde/kioslave/kio_recoll/recollnolist.protocol \
|
||||||
|
kde/kioslave/kio_recoll/recoll.protocol \
|
||||||
\
|
\
|
||||||
kde/kioslave/kio_recoll-kde4/00README.txt \
|
kde/kioslave/kio_recoll-kde4/00README.txt \
|
||||||
kde/kioslave/kio_recoll-kde4/CMakeLists.txt \
|
kde/kioslave/kio_recoll-kde4/CMakeLists.txt \
|
||||||
@ -397,9 +388,6 @@ kde/kioslave/kio_recoll-kde4/recoll.protocol \
|
|||||||
\
|
\
|
||||||
query/location.hh query/position.hh query/stack.hh \
|
query/location.hh query/position.hh query/stack.hh \
|
||||||
\
|
\
|
||||||
qtgui/actsearch.ui \
|
|
||||||
qtgui/actsearch_w.cpp \
|
|
||||||
qtgui/actsearch_w.h \
|
|
||||||
qtgui/advsearch.ui \
|
qtgui/advsearch.ui \
|
||||||
qtgui/advsearch_w.cpp \
|
qtgui/advsearch_w.cpp \
|
||||||
qtgui/advsearch_w.h \
|
qtgui/advsearch_w.h \
|
||||||
@ -419,8 +407,6 @@ qtgui/fragbuts.h \
|
|||||||
qtgui/guiutils.cpp \
|
qtgui/guiutils.cpp \
|
||||||
qtgui/guiutils.h \
|
qtgui/guiutils.h \
|
||||||
qtgui/i18n/*.qm qtgui/i18n/*.ts \
|
qtgui/i18n/*.qm qtgui/i18n/*.ts \
|
||||||
qtgui/idxmodel.cpp \
|
|
||||||
qtgui/idxmodel.h \
|
|
||||||
qtgui/idxsched.h \
|
qtgui/idxsched.h \
|
||||||
qtgui/idxsched.ui \
|
qtgui/idxsched.ui \
|
||||||
qtgui/images/asearch.png \
|
qtgui/images/asearch.png \
|
||||||
@ -489,7 +475,6 @@ qtgui/rclm_idx.cpp \
|
|||||||
qtgui/rclm_menus.cpp \
|
qtgui/rclm_menus.cpp \
|
||||||
qtgui/rclm_preview.cpp \
|
qtgui/rclm_preview.cpp \
|
||||||
qtgui/rclm_saveload.cpp \
|
qtgui/rclm_saveload.cpp \
|
||||||
qtgui/rclm_sidefilters.cpp \
|
|
||||||
qtgui/rclm_view.cpp \
|
qtgui/rclm_view.cpp \
|
||||||
qtgui/rclm_wins.cpp \
|
qtgui/rclm_wins.cpp \
|
||||||
qtgui/rclmain.ui \
|
qtgui/rclmain.ui \
|
||||||
@ -582,20 +567,16 @@ python/samples/recollgui/rclmain.ui \
|
|||||||
python/samples/recollq.py \
|
python/samples/recollq.py \
|
||||||
python/samples/recollqsd.py \
|
python/samples/recollqsd.py \
|
||||||
\
|
\
|
||||||
rclgrep/Makefile.am \
|
sampleconf/fields sampleconf/fragbuts.xml sampleconf/mimeconf \
|
||||||
rclgrep/rclgrep.cpp \
|
sampleconf/mimemap sampleconf/mimeview sampleconf/mimeview.mac \
|
||||||
\
|
|
||||||
sampleconf/fields sampleconf/fragment-buttons.xml sampleconf/mimeconf \
|
|
||||||
sampleconf/mimemap sampleconf/mimeview sampleconf/macos/mimeview \
|
|
||||||
sampleconf/recoll.conf sampleconf/recoll.qss \
|
sampleconf/recoll.conf sampleconf/recoll.qss \
|
||||||
sampleconf/recoll-common.css sampleconf/recoll-common.qss \
|
|
||||||
sampleconf/recoll-dark.qss sampleconf/recoll-dark.css \
|
sampleconf/recoll-dark.qss sampleconf/recoll-dark.css \
|
||||||
\
|
\
|
||||||
testmains/Makefile.am \
|
testmains/Makefile.am \
|
||||||
\
|
\
|
||||||
unac/AUTHORS unac/COPYING unac/README unac/README.recoll unac/unac.c \
|
unac/AUTHORS unac/COPYING unac/README unac/README.recoll unac/unac.c \
|
||||||
\
|
\
|
||||||
RECOLL-VERSION.txt
|
VERSION
|
||||||
|
|
||||||
# EXTRA_DIST: The Php Code does not build anymore. No need to ship it until
|
# EXTRA_DIST: The Php Code does not build anymore. No need to ship it until
|
||||||
# someone fixes it:
|
# someone fixes it:
|
||||||
@ -637,13 +618,13 @@ install-exec-local:: rclpychm-install
|
|||||||
clean-local:: rclpychm-clean
|
clean-local:: rclpychm-clean
|
||||||
rclpychm:
|
rclpychm:
|
||||||
(cd python/pychm; set -x; \
|
(cd python/pychm; set -x; \
|
||||||
for v in 3;do \
|
for v in 2 3;do \
|
||||||
test -n "`which python$${v}`" && python$${v} setup.py build;\
|
test -n "`which python$${v}`" && python$${v} setup.py build;\
|
||||||
done \
|
done \
|
||||||
)
|
)
|
||||||
rclpychm-install:
|
rclpychm-install:
|
||||||
(cd python/pychm; set -x; \
|
(cd python/pychm; set -x; \
|
||||||
for v in 3;do test -n "`which python$${v}`" && \
|
for v in 2 3;do test -n "`which python$${v}`" && \
|
||||||
python$${v} setup.py install \
|
python$${v} setup.py install \
|
||||||
--prefix=${prefix} --root=$${DESTDIR:-/} $(OPTSFORPYTHON); \
|
--prefix=${prefix} --root=$${DESTDIR:-/} $(OPTSFORPYTHON); \
|
||||||
done \
|
done \
|
||||||
@ -673,19 +654,17 @@ defconfdir = $(pkgdatadir)/examples
|
|||||||
defconf_DATA = \
|
defconf_DATA = \
|
||||||
desktop/recollindex.desktop \
|
desktop/recollindex.desktop \
|
||||||
index/rclmon.sh \
|
index/rclmon.sh \
|
||||||
index/recollindex.service \
|
index/recollindex-system.service \
|
||||||
index/recollindex@.service \
|
index/recollindex-user.service \
|
||||||
|
sampleconf/fragbuts.xml \
|
||||||
sampleconf/fields \
|
sampleconf/fields \
|
||||||
sampleconf/fragment-buttons.xml \
|
|
||||||
sampleconf/mimeconf \
|
|
||||||
sampleconf/mimemap \
|
|
||||||
sampleconf/mimeview \
|
|
||||||
sampleconf/recoll-common.css \
|
|
||||||
sampleconf/recoll-common.qss \
|
|
||||||
sampleconf/recoll-dark.css \
|
|
||||||
sampleconf/recoll-dark.qss \
|
|
||||||
sampleconf/recoll.conf \
|
sampleconf/recoll.conf \
|
||||||
sampleconf/recoll.qss
|
sampleconf/mimeconf \
|
||||||
|
sampleconf/recoll.qss \
|
||||||
|
sampleconf/recoll-dark.qss \
|
||||||
|
sampleconf/recoll-dark.css \
|
||||||
|
sampleconf/mimemap \
|
||||||
|
sampleconf/mimeview
|
||||||
|
|
||||||
filterdir = $(pkgdatadir)/filters
|
filterdir = $(pkgdatadir)/filters
|
||||||
dist_filter_DATA = \
|
dist_filter_DATA = \
|
||||||
@ -704,31 +683,30 @@ filters/openxml-xls-body.xsl \
|
|||||||
filters/openxml-word-body.xsl \
|
filters/openxml-word-body.xsl \
|
||||||
filters/openxml-meta.xsl \
|
filters/openxml-meta.xsl \
|
||||||
filters/ppt-dump.py \
|
filters/ppt-dump.py \
|
||||||
filters/rcl7z.py \
|
filters/rcl7z \
|
||||||
filters/rclaptosidman \
|
filters/rclaptosidman \
|
||||||
filters/rclaudio.py \
|
filters/rclaudio \
|
||||||
filters/rclbasehandler.py \
|
filters/rclbasehandler.py \
|
||||||
filters/rclbibtex.sh \
|
filters/rclbibtex.sh \
|
||||||
filters/rclcheckneedretry.sh \
|
filters/rclcheckneedretry.sh \
|
||||||
filters/rclchm.py \
|
filters/rclchm \
|
||||||
filters/rcldia.py \
|
filters/rcldia \
|
||||||
filters/rcldjvu.py \
|
filters/rcldjvu.py \
|
||||||
filters/rcldoc.py \
|
filters/rcldoc.py \
|
||||||
filters/rcldvi \
|
filters/rcldvi \
|
||||||
filters/rclepub.py \
|
filters/rclepub \
|
||||||
filters/rclepub1.py \
|
filters/rclepub1 \
|
||||||
filters/rclexec1.py \
|
filters/rclexec1.py \
|
||||||
filters/rclexecm.py \
|
filters/rclexecm.py \
|
||||||
filters/rclfb2.py \
|
filters/rclfb2.py \
|
||||||
filters/rclgaim \
|
filters/rclgaim \
|
||||||
filters/rclgenxslt.py \
|
filters/rclgenxslt.py \
|
||||||
filters/rclhwp.py \
|
filters/rclhwp.py \
|
||||||
filters/rclics.py \
|
filters/rclics \
|
||||||
filters/rclimg \
|
filters/rclimg \
|
||||||
filters/rclimg.py \
|
filters/rclimg.py \
|
||||||
filters/rclinfo.py \
|
filters/rclinfo \
|
||||||
filters/rclipynb.py \
|
filters/rclkar \
|
||||||
filters/rclkar.py \
|
|
||||||
filters/rclkwd \
|
filters/rclkwd \
|
||||||
filters/rcllatinclass.py \
|
filters/rcllatinclass.py \
|
||||||
filters/rcllatinstops.zip \
|
filters/rcllatinstops.zip \
|
||||||
@ -747,21 +725,21 @@ filters/rclps \
|
|||||||
filters/rclpst.py \
|
filters/rclpst.py \
|
||||||
filters/rclpurple \
|
filters/rclpurple \
|
||||||
filters/rclpython.py \
|
filters/rclpython.py \
|
||||||
filters/rclrar.py \
|
filters/rclrar \
|
||||||
filters/rclrtf.py \
|
filters/rclrtf.py \
|
||||||
filters/rclscribus \
|
filters/rclscribus \
|
||||||
filters/rclshowinfo \
|
filters/rclshowinfo \
|
||||||
filters/rcltar.py \
|
filters/rcltar \
|
||||||
filters/rcltex \
|
filters/rcltex \
|
||||||
filters/rcltext.py \
|
filters/rcltext.py \
|
||||||
filters/rcluncomp \
|
filters/rcluncomp \
|
||||||
filters/rcluncomp.py \
|
filters/rcluncomp.py \
|
||||||
filters/rclwar.py \
|
filters/rclwar \
|
||||||
filters/rclxls.py \
|
filters/rclxls.py \
|
||||||
filters/rclxml.py \
|
filters/rclxml.py \
|
||||||
filters/rclxmp.py \
|
filters/rclxmp.py \
|
||||||
filters/rclxslt.py \
|
filters/rclxslt.py \
|
||||||
filters/rclzip.py \
|
filters/rclzip \
|
||||||
filters/recoll-we-move-files.py \
|
filters/recoll-we-move-files.py \
|
||||||
filters/recollepub.zip \
|
filters/recollepub.zip \
|
||||||
filters/svg.xsl \
|
filters/svg.xsl \
|
||||||
@ -771,13 +749,6 @@ filters/xml.xsl \
|
|||||||
python/recoll/recoll/conftree.py \
|
python/recoll/recoll/conftree.py \
|
||||||
python/recoll/recoll/rclconfig.py
|
python/recoll/recoll/rclconfig.py
|
||||||
|
|
||||||
if INSTALL_SYSTEMD_UNITS
|
|
||||||
systemd_system_unitdir = @SYSTEMD_SYSTEM_UNIT_DIR@
|
|
||||||
systemd_user_unitdir = @SYSTEMD_USER_UNIT_DIR@
|
|
||||||
systemd_system_unit_DATA = index/recollindex@.service
|
|
||||||
systemd_user_unit_DATA = index/recollindex.service
|
|
||||||
endif
|
|
||||||
|
|
||||||
install-data-hook:
|
install-data-hook:
|
||||||
(cd $(DESTDIR)/$(filterdir); \
|
(cd $(DESTDIR)/$(filterdir); \
|
||||||
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
|
chmod a+x rcl* ppt-dump.py xls-dump.py xlsxmltocsv.py hotrecoll.py; \
|
||||||
@ -796,14 +767,14 @@ doc/user/usermanual.html: doc/user/usermanual.xml
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
dist_man1_MANS = doc/man/recoll.1 doc/man/recollq.1 \
|
dist_man1_MANS = doc/man/recoll.1 doc/man/recollq.1 \
|
||||||
doc/man/recollindex.1 doc/man/xadump.1 doc/man/rclgrep.1
|
doc/man/recollindex.1 doc/man/xadump.1
|
||||||
dist_man5_MANS = doc/man/recoll.conf.5
|
dist_man5_MANS = doc/man/recoll.conf.5
|
||||||
|
|
||||||
dist-hook:
|
dist-hook:
|
||||||
(cd $(top_srcdir); find . \
|
(cd $(top_srcdir); find . \
|
||||||
\( -name '*.pyc' -o -name '#*' -o -name '*~' \) -delete)
|
\( -name '*.pyc' -o -name '#*' -o -name '*~' \) -delete)
|
||||||
if test -z "$(NOTAG)";then \
|
if test -z "$(NOTAG)";then \
|
||||||
test -z "`git status -s|grep -v recoll-$(RECOLL-VERSION.txt)`"||exit 1; \
|
test -z "`git status -s|grep -v recoll-$(VERSION)`"||exit 1; \
|
||||||
vers=`echo $(VERSION) | sed -e 's/~/_/g'`;\
|
vers=`echo $(VERSION) | sed -e 's/~/_/g'`;\
|
||||||
git tag -a RECOLL-$$vers -m "Release $$vers tagged"; \
|
git tag -a RECOLL-$$vers -m "Release $$vers tagged"; \
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -2763,8 +2763,8 @@ Chapter 4. Programming interface
|
|||||||
|
|
||||||
If you can program and want to write an execm handler, it should not be
|
If you can program and want to write an execm handler, it should not be
|
||||||
too difficult to make sense of one of the existing modules. For example,
|
too difficult to make sense of one of the existing modules. For example,
|
||||||
look at rclzip.py which uses Zip file paths as identifiers (ipath), and
|
look at rclzip which uses Zip file paths as identifiers (ipath), and
|
||||||
rclics.py, which uses an integer index. Also have a look at the comments
|
rclics, which uses an integer index. Also have a look at the comments
|
||||||
inside the internfile/mh_execm.h file and possibly at the corresponding
|
inside the internfile/mh_execm.h file and possibly at the corresponding
|
||||||
module.
|
module.
|
||||||
|
|
||||||
@ -2819,7 +2819,7 @@ Chapter 4. Programming interface
|
|||||||
|
|
||||||
text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
|
text/rtf = exec unrtf --nopict --html; charset=iso-8859-1; mimetype=text/html
|
||||||
|
|
||||||
application/x-chm = execm rclchm.py
|
application/x-chm = execm rclchm
|
||||||
|
|
||||||
The fragment specifies that:
|
The fragment specifies that:
|
||||||
|
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
1.33.1
|
|
||||||
1
src/VERSION
Normal file
1
src/VERSION
Normal file
@ -0,0 +1 @@
|
|||||||
|
1.31.0
|
||||||
@ -41,8 +41,6 @@ class Aspell {
|
|||||||
public:
|
public:
|
||||||
Aspell(const RclConfig *cnf);
|
Aspell(const RclConfig *cnf);
|
||||||
~Aspell();
|
~Aspell();
|
||||||
Aspell(const Aspell &) = delete;
|
|
||||||
Aspell& operator=(const Aspell &) = delete;
|
|
||||||
|
|
||||||
/** Check health */
|
/** Check health */
|
||||||
bool ok() const;
|
bool ok() const;
|
||||||
|
|||||||
@ -4,17 +4,14 @@ set -x
|
|||||||
|
|
||||||
aclocal
|
aclocal
|
||||||
|
|
||||||
# detect libtoolize on linux or glibtoolize in some systems
|
if test X"$HOMEBREW_ENV" != X; then
|
||||||
if (libtoolize --version) < /dev/null > /dev/null 2>&1; then
|
glt=`which glibtoolize`
|
||||||
LIBTOOLIZE=libtoolize
|
fi
|
||||||
elif (glibtoolize --version) < /dev/null > /dev/null 2>&1; then
|
if test X"$glt" != X; then
|
||||||
LIBTOOLIZE=glibtoolize
|
$glt --copy
|
||||||
else
|
else
|
||||||
echo "libtoolize or glibtoolize was not found! Please install libtool." 1>&2
|
libtoolize --copy
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$LIBTOOLIZE --copy
|
|
||||||
|
|
||||||
automake --add-missing --force-missing --copy
|
automake --add-missing --force-missing --copy
|
||||||
autoconf
|
autoconf
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
||||||
|
|
||||||
/* Path to the aspell program */
|
/* Path to the aspell program */
|
||||||
#undef ASPELL_PROG
|
#define ASPELL_PROG "/opt/local/bin/aspell"
|
||||||
|
|
||||||
/* No X11 session monitoring support */
|
/* No X11 session monitoring support */
|
||||||
#define DISABLE_X11MON 1
|
#define DISABLE_X11MON 1
|
||||||
@ -125,7 +125,7 @@
|
|||||||
#define PACKAGE_NAME "Recoll"
|
#define PACKAGE_NAME "Recoll"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "Recoll 1.33.0"
|
#define PACKAGE_STRING "Recoll 1.30.2"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "recoll"
|
#define PACKAGE_TARNAME "recoll"
|
||||||
@ -134,7 +134,7 @@
|
|||||||
#define PACKAGE_URL ""
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "1.33.0"
|
#define PACKAGE_VERSION "1.30.2"
|
||||||
|
|
||||||
/* putenv parameter is const */
|
/* putenv parameter is const */
|
||||||
/* #undef PUTENV_ARG_CONST */
|
/* #undef PUTENV_ARG_CONST */
|
||||||
|
|||||||
@ -118,7 +118,7 @@
|
|||||||
#define PACKAGE_NAME "Recoll"
|
#define PACKAGE_NAME "Recoll"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "Recoll 1.33.0"
|
#define PACKAGE_STRING "Recoll 1.30.2"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "recoll"
|
#define PACKAGE_TARNAME "recoll"
|
||||||
@ -127,13 +127,13 @@
|
|||||||
#define PACKAGE_URL ""
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "1.33.0"
|
#define PACKAGE_VERSION "1.30.2"
|
||||||
|
|
||||||
/* putenv parameter is const */
|
/* putenv parameter is const */
|
||||||
/* #undef PUTENV_ARG_CONST */
|
/* #undef PUTENV_ARG_CONST */
|
||||||
|
|
||||||
/* Real time monitoring option */
|
/* Real time monitoring option */
|
||||||
#define RCL_MONITOR 1
|
#undef RCL_MONITOR
|
||||||
|
|
||||||
/* Split camelCase words */
|
/* Split camelCase words */
|
||||||
/* #undef RCL_SPLIT_CAMELCASE */
|
/* #undef RCL_SPLIT_CAMELCASE */
|
||||||
|
|||||||
@ -67,18 +67,4 @@ typedef int ssize_t;
|
|||||||
# define PRETEND_USE(expr) ((void)(expr))
|
# define PRETEND_USE(expr) ((void)(expr))
|
||||||
#endif /* PRETEND_USE */
|
#endif /* PRETEND_USE */
|
||||||
|
|
||||||
// It's complicated to really detect gnu gcc because other compilers define __GNUC__
|
|
||||||
// See stackoverflow questions/38499462/how-to-tell-clang-to-stop-pretending-to-be-other-compilers
|
|
||||||
#if defined(__GNUC__) && !defined(__llvm__) && !defined(__INTEL_COMPILER)
|
|
||||||
#define REAL_GCC __GNUC__ // probably
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef REAL_GCC
|
|
||||||
// Older gcc versions pretended to supply std::regex, but the resulting programs mostly crashed.
|
|
||||||
#include <features.h>
|
|
||||||
#if ! __GNUC_PREREQ(6,0)
|
|
||||||
#define NO_STD_REGEX 1
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* INCLUDED */
|
#endif /* INCLUDED */
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2004-2022 J.F.Dockes
|
/* Copyright (C) 2004 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -33,11 +33,14 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <list>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <fstream>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
@ -55,16 +58,6 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
// Naming the directory for platform-specific default config files, overriding the top-level ones
|
|
||||||
// E.g. /usr/share/recoll/examples/windows
|
|
||||||
#ifdef _WIN32
|
|
||||||
static const string confsysdir{"windows"};
|
|
||||||
#elif defined(_APPLE__)
|
|
||||||
static const string confsysdir{"macos"};
|
|
||||||
#else
|
|
||||||
static const string confsysdir;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Static, logically const, RclConfig members or module static
|
// Static, logically const, RclConfig members or module static
|
||||||
// variables are initialized once from the first object build during
|
// variables are initialized once from the first object build during
|
||||||
// process initialization.
|
// process initialization.
|
||||||
@ -97,8 +90,9 @@ void RclConfig::setPlusMinus(const string& sbase, const set<string>& upd,
|
|||||||
stringToStrings(sbase, base);
|
stringToStrings(sbase, base);
|
||||||
|
|
||||||
vector<string> diff;
|
vector<string> diff;
|
||||||
auto it = set_difference(base.begin(), base.end(), upd.begin(), upd.end(),
|
auto it =
|
||||||
std::inserter(diff, diff.begin()));
|
set_difference(base.begin(), base.end(), upd.begin(), upd.end(),
|
||||||
|
std::inserter(diff, diff.begin()));
|
||||||
sminus = stringsToString(diff);
|
sminus = stringsToString(diff);
|
||||||
|
|
||||||
diff.clear();
|
diff.clear();
|
||||||
@ -145,7 +139,7 @@ bool ParamStale::needrecompute()
|
|||||||
string newvalue;
|
string newvalue;
|
||||||
conffile->get(paramnames[i], newvalue, parent->m_keydir);
|
conffile->get(paramnames[i], newvalue, parent->m_keydir);
|
||||||
LOGDEB1("ParamStale::needrecompute: " << paramnames[i] << " -> " <<
|
LOGDEB1("ParamStale::needrecompute: " << paramnames[i] << " -> " <<
|
||||||
newvalue << " keydir " << parent->m_keydir << "\n");
|
newvalue << " keydir " << parent->m_keydir << endl);
|
||||||
if (newvalue.compare(savedvalues[i])) {
|
if (newvalue.compare(savedvalues[i])) {
|
||||||
savedvalues[i] = newvalue;
|
savedvalues[i] = newvalue;
|
||||||
needrecomp = true;
|
needrecomp = true;
|
||||||
@ -182,7 +176,8 @@ void ParamStale::init(ConfNull *cnf)
|
|||||||
|
|
||||||
bool RclConfig::isDefaultConfig() const
|
bool RclConfig::isDefaultConfig() const
|
||||||
{
|
{
|
||||||
string defaultconf = path_cat(path_homedata(), path_defaultrecollconfsubdir());
|
string defaultconf = path_cat(path_homedata(),
|
||||||
|
path_defaultrecollconfsubdir());
|
||||||
path_catslash(defaultconf);
|
path_catslash(defaultconf);
|
||||||
string specifiedconf = path_canon(m_confdir);
|
string specifiedconf = path_canon(m_confdir);
|
||||||
path_catslash(specifiedconf);
|
path_catslash(specifiedconf);
|
||||||
@ -192,7 +187,8 @@ bool RclConfig::isDefaultConfig() const
|
|||||||
|
|
||||||
RclConfig::RclConfig(const RclConfig &r)
|
RclConfig::RclConfig(const RclConfig &r)
|
||||||
: m_oldstpsuffstate(this, "recoll_noindex"),
|
: m_oldstpsuffstate(this, "recoll_noindex"),
|
||||||
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+", "noContentSuffixes-"}),
|
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+",
|
||||||
|
"noContentSuffixes-"}),
|
||||||
m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
|
m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
|
||||||
m_onlnstate(this, "onlyNames"),
|
m_onlnstate(this, "onlyNames"),
|
||||||
m_rmtstate(this, "indexedmimetypes"),
|
m_rmtstate(this, "indexedmimetypes"),
|
||||||
@ -204,7 +200,8 @@ RclConfig::RclConfig(const RclConfig &r)
|
|||||||
|
|
||||||
RclConfig::RclConfig(const string *argcnf)
|
RclConfig::RclConfig(const string *argcnf)
|
||||||
: m_oldstpsuffstate(this, "recoll_noindex"),
|
: m_oldstpsuffstate(this, "recoll_noindex"),
|
||||||
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+", "noContentSuffixes-"}),
|
m_stpsuffstate(this, {"noContentSuffixes", "noContentSuffixes+",
|
||||||
|
"noContentSuffixes-"}),
|
||||||
m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
|
m_skpnstate(this, {"skippedNames", "skippedNames+", "skippedNames-"}),
|
||||||
m_onlnstate(this, "onlyNames"),
|
m_onlnstate(this, "onlyNames"),
|
||||||
m_rmtstate(this, "indexedmimetypes"),
|
m_rmtstate(this, "indexedmimetypes"),
|
||||||
@ -233,7 +230,8 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
if (argcnf && !argcnf->empty()) {
|
if (argcnf && !argcnf->empty()) {
|
||||||
m_confdir = path_absolute(*argcnf);
|
m_confdir = path_absolute(*argcnf);
|
||||||
if (m_confdir.empty()) {
|
if (m_confdir.empty()) {
|
||||||
m_reason = string("Cant turn [") + *argcnf + "] into absolute path";
|
m_reason =
|
||||||
|
string("Cant turn [") + *argcnf + "] into absolute path";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -251,8 +249,9 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
// this is the default conf
|
// this is the default conf
|
||||||
if (!autoconfdir && !isDefaultConfig()) {
|
if (!autoconfdir && !isDefaultConfig()) {
|
||||||
if (!path_exists(m_confdir)) {
|
if (!path_exists(m_confdir)) {
|
||||||
m_reason = std::string("Explicitly specified configuration [") + m_confdir +
|
m_reason = "Explicitly specified configuration "
|
||||||
"] directory must exist (won't be automatically created). Use mkdir first";
|
"directory must exist"
|
||||||
|
" (won't be automatically created). Use mkdir first";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -292,7 +291,8 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
o_localecharset = string(cstr_cp1252);
|
o_localecharset = string(cstr_cp1252);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
LOGDEB1("RclConfig::getDefCharset: localecharset [" << o_localecharset << "]\n");
|
LOGDEB1("RclConfig::getDefCharset: localecharset [" <<
|
||||||
|
o_localecharset << "]\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *cp;
|
const char *cp;
|
||||||
@ -310,15 +310,8 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
m_cdirs.push_back(cp);
|
m_cdirs.push_back(cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Base/installation config, and its platform-specific overrides
|
// Base/installation config
|
||||||
std::string defaultsdir = path_cat(m_datadir, "examples");
|
m_cdirs.push_back(path_cat(m_datadir, "examples"));
|
||||||
if (!confsysdir.empty()) {
|
|
||||||
std::string sdir = path_cat(defaultsdir, confsysdir);
|
|
||||||
if (path_isdir(sdir)) {
|
|
||||||
m_cdirs.push_back(sdir);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_cdirs.push_back(defaultsdir);
|
|
||||||
|
|
||||||
string cnferrloc;
|
string cnferrloc;
|
||||||
for (const auto& dir : m_cdirs) {
|
for (const auto& dir : m_cdirs) {
|
||||||
@ -346,14 +339,17 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
// there are several. This only uses the distributed file, not any
|
// there are several. This only uses the distributed file, not any
|
||||||
// local customization (too complicated).
|
// local customization (too complicated).
|
||||||
if (mime_suffixes.empty()) {
|
if (mime_suffixes.empty()) {
|
||||||
ConfSimple mm(path_cat(path_cat(m_datadir, "examples"), "mimemap").c_str());
|
ConfSimple mm(
|
||||||
|
path_cat(path_cat(m_datadir, "examples"), "mimemap").c_str());
|
||||||
vector<ConfLine> order = mm.getlines();
|
vector<ConfLine> order = mm.getlines();
|
||||||
for (const auto& entry: order) {
|
for (const auto& entry: order) {
|
||||||
if (entry.m_kind == ConfLine::CFL_VAR) {
|
if (entry.m_kind == ConfLine::CFL_VAR) {
|
||||||
LOGDEB1("CONFIG: " << entry.m_data << " -> " << entry.m_value << "\n");
|
LOGDEB1("CONFIG: " << entry.m_data << " -> " << entry.m_value <<
|
||||||
|
endl);
|
||||||
// Remember: insert() only does anything for new keys,
|
// Remember: insert() only does anything for new keys,
|
||||||
// so we only have the first value in the map
|
// so we only have the first value in the map
|
||||||
mime_suffixes.insert(pair<string,string>(entry.m_value, entry.m_data));
|
mime_suffixes.insert(
|
||||||
|
pair<string,string>(entry.m_value, entry.m_data));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -388,9 +384,9 @@ RclConfig::RclConfig(const string *argcnf)
|
|||||||
|
|
||||||
bool RclConfig::updateMainConfig()
|
bool RclConfig::updateMainConfig()
|
||||||
{
|
{
|
||||||
ConfStack<ConfTree> *newconf = new ConfStack<ConfTree>("recoll.conf", m_cdirs, true);
|
ConfStack<ConfTree> *newconf =
|
||||||
|
new ConfStack<ConfTree>("recoll.conf", m_cdirs, true);
|
||||||
if (newconf == 0 || !newconf->ok()) {
|
if (newconf == 0 || !newconf->ok()) {
|
||||||
std::cerr << "updateMainConfig: new Confstack not ok\n";
|
|
||||||
if (m_conf)
|
if (m_conf)
|
||||||
return false;
|
return false;
|
||||||
m_ok = false;
|
m_ok = false;
|
||||||
@ -520,7 +516,8 @@ bool RclConfig::getConfParam(const string &name, vector<int> *vip,
|
|||||||
char *ep;
|
char *ep;
|
||||||
vip->push_back(strtol(vs[i].c_str(), &ep, 0));
|
vip->push_back(strtol(vs[i].c_str(), &ep, 0));
|
||||||
if (ep == vs[i].c_str()) {
|
if (ep == vs[i].c_str()) {
|
||||||
LOGDEB("RclConfig::getConfParam: bad int value in [" << name << "]\n");
|
LOGDEB("RclConfig::getConfParam: bad int value in [" << name <<
|
||||||
|
"]\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -589,10 +586,12 @@ void RclConfig::initThrConf()
|
|||||||
out:
|
out:
|
||||||
ostringstream sconf;
|
ostringstream sconf;
|
||||||
for (unsigned int i = 0; i < 3; i++) {
|
for (unsigned int i = 0; i < 3; i++) {
|
||||||
sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second << ") ";
|
sconf << "(" << m_thrConf[i].first << ", " << m_thrConf[i].second <<
|
||||||
|
") ";
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB("RclConfig::initThrConf: chosen config (ql,nt): " << sconf.str() << "\n");
|
LOGDEB("RclConfig::initThrConf: chosen config (ql,nt): " << sconf.str() <<
|
||||||
|
"\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
pair<int,int> RclConfig::getThrConf(ThrStage who) const
|
pair<int,int> RclConfig::getThrConf(ThrStage who) const
|
||||||
@ -685,7 +684,7 @@ public:
|
|||||||
class SuffCmp {
|
class SuffCmp {
|
||||||
public:
|
public:
|
||||||
int operator()(const SfString& s1, const SfString& s2) const {
|
int operator()(const SfString& s1, const SfString& s2) const {
|
||||||
//cout << "Comparing " << s1.m_str << " and " << s2.m_str << "\n";
|
//cout << "Comparing " << s1.m_str << " and " << s2.m_str << endl;
|
||||||
string::const_reverse_iterator
|
string::const_reverse_iterator
|
||||||
r1 = s1.m_str.rbegin(), re1 = s1.m_str.rend(),
|
r1 = s1.m_str.rbegin(), re1 = s1.m_str.rend(),
|
||||||
r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
|
r2 = s2.m_str.rbegin(), re2 = s2.m_str.rend();
|
||||||
@ -735,7 +734,8 @@ vector<string>& RclConfig::getStopSuffixes()
|
|||||||
m_maxsufflen = int(entry.length());
|
m_maxsufflen = int(entry.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOGDEB1("RclConfig::getStopSuffixes: ->" << stringsToString(m_stopsuffvec) << "\n");
|
LOGDEB1("RclConfig::getStopSuffixes: ->" <<
|
||||||
|
stringsToString(m_stopsuffvec) << endl);
|
||||||
return m_stopsuffvec;
|
return m_stopsuffvec;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -845,22 +845,16 @@ string RclConfig::getMimeHandlerDef(const string &mtype, bool filtertypes, const
|
|||||||
if (!m_excludeMTypes.empty() && m_excludeMTypes.count(stringtolower(mtype))) {
|
if (!m_excludeMTypes.empty() && m_excludeMTypes.count(stringtolower(mtype))) {
|
||||||
IdxDiags::theDiags().record(IdxDiags::ExcludedMime, fn, mtype);
|
IdxDiags::theDiags().record(IdxDiags::ExcludedMime, fn, mtype);
|
||||||
LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " in excluded mime list (fn " <<
|
LOGDEB1("RclConfig::getMimeHandlerDef: " << mtype << " in excluded mime list (fn " <<
|
||||||
fn << ")\n");
|
fn << ")\n");
|
||||||
return hs;
|
return hs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mimeconf->get(mtype, hs, "index")) {
|
if (!mimeconf->get(mtype, hs, "index")) {
|
||||||
if (mtype.find("text/") == 0) {
|
|
||||||
bool alltext{false};
|
|
||||||
getConfParam("textunknownasplain", &alltext);
|
|
||||||
if (alltext && mimeconf->get("text/plain", hs, "index")) {
|
|
||||||
return hs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mtype != "inode/directory") {
|
if (mtype != "inode/directory") {
|
||||||
IdxDiags::theDiags().record(IdxDiags::NoHandler, fn, mtype);
|
IdxDiags::theDiags().record(IdxDiags::NoHandler, fn, mtype);
|
||||||
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " << fn << ")\n");
|
LOGDEB1("getMimeHandlerDef: no handler for '" << mtype << "' (fn " <<
|
||||||
|
fn << ")\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return hs;
|
return hs;
|
||||||
@ -879,11 +873,12 @@ const vector<MDReaper>& RclConfig::getMDReapers()
|
|||||||
ConfSimple attrs;
|
ConfSimple attrs;
|
||||||
valueSplitAttributes(sreapers, value, attrs);
|
valueSplitAttributes(sreapers, value, attrs);
|
||||||
vector<string> nmlst = attrs.getNames(cstr_null);
|
vector<string> nmlst = attrs.getNames(cstr_null);
|
||||||
for (const auto& nm : nmlst) {
|
for (vector<string>::const_iterator it = nmlst.begin();
|
||||||
|
it != nmlst.end(); it++) {
|
||||||
MDReaper reaper;
|
MDReaper reaper;
|
||||||
reaper.fieldname = fieldCanon(nm);
|
reaper.fieldname = fieldCanon(*it);
|
||||||
string s;
|
string s;
|
||||||
attrs.get(nm, s);
|
attrs.get(*it, s);
|
||||||
stringToStrings(s, reaper.cmdv);
|
stringToStrings(s, reaper.cmdv);
|
||||||
m_mdreapers.push_back(reaper);
|
m_mdreapers.push_back(reaper);
|
||||||
}
|
}
|
||||||
@ -909,17 +904,11 @@ bool RclConfig::getGuiFilter(const string& catfiltername, string& frag) const
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RclConfig::valueSplitAttributes(const string& whole, string& value, ConfSimple& attrs)
|
bool RclConfig::valueSplitAttributes(const string& whole, string& value,
|
||||||
|
ConfSimple& attrs)
|
||||||
{
|
{
|
||||||
bool inquote{false};
|
/* There is currently no way to escape a semi-colon */
|
||||||
string::size_type semicol0;
|
string::size_type semicol0 = whole.find_first_of(";");
|
||||||
for (semicol0 = 0; semicol0 < whole.size(); semicol0++) {
|
|
||||||
if (whole[semicol0] == '"') {
|
|
||||||
inquote = !inquote;
|
|
||||||
} else if (whole[semicol0] == ';' && !inquote) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
value = whole.substr(0, semicol0);
|
value = whole.substr(0, semicol0);
|
||||||
trimstring(value);
|
trimstring(value);
|
||||||
string attrstr;
|
string attrstr;
|
||||||
@ -1025,14 +1014,15 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
|||||||
valuetype = FieldTraits::INT;
|
valuetype = FieldTraits::INT;
|
||||||
} else {
|
} else {
|
||||||
LOGERR("readFieldsConfig: bad type for value for " <<
|
LOGERR("readFieldsConfig: bad type for value for " <<
|
||||||
fieldname << " : " << tval << "\n");
|
fieldname << " : " << tval << endl);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int valuelen = (int)attrs.getInt("len", 0);
|
int valuelen = (int)attrs.getInt("len", 0);
|
||||||
// Find or insert traits entry
|
// Find or insert traits entry
|
||||||
const auto pit =
|
const auto pit =
|
||||||
m_fldtotraits.insert(pair<string, FieldTraits>(canonic, FieldTraits())).first;
|
m_fldtotraits.insert(
|
||||||
|
pair<string, FieldTraits>(canonic, FieldTraits())).first;
|
||||||
pit->second.valueslot = valueslot;
|
pit->second.valueslot = valueslot;
|
||||||
pit->second.valuetype = valuetype;
|
pit->second.valuetype = valuetype;
|
||||||
pit->second.valuelen = valuelen;
|
pit->second.valuelen = valuelen;
|
||||||
@ -1109,7 +1099,8 @@ bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp,
|
|||||||
pit->second.pfx << "]\n");
|
pit->second.pfx << "]\n");
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
LOGDEB1("RclConfig::getFieldTraits: no prefix for field [" << fld << "]\n");
|
LOGDEB1("RclConfig::getFieldTraits: no prefix for field [" << fld <<
|
||||||
|
"]\n");
|
||||||
*ftpp = 0;
|
*ftpp = 0;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1131,7 +1122,8 @@ string RclConfig::fieldCanon(const string& f) const
|
|||||||
string fld = stringtolower(f);
|
string fld = stringtolower(f);
|
||||||
const auto it = m_aliastocanon.find(fld);
|
const auto it = m_aliastocanon.find(fld);
|
||||||
if (it != m_aliastocanon.end()) {
|
if (it != m_aliastocanon.end()) {
|
||||||
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second << "]\n");
|
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << it->second <<
|
||||||
|
"]\n");
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << fld << "]\n");
|
LOGDEB1("RclConfig::fieldCanon: [" << f << "] -> [" << fld << "]\n");
|
||||||
@ -1142,7 +1134,8 @@ string RclConfig::fieldQCanon(const string& f) const
|
|||||||
{
|
{
|
||||||
const auto it = m_aliastoqcanon.find(stringtolower(f));
|
const auto it = m_aliastoqcanon.find(stringtolower(f));
|
||||||
if (it != m_aliastoqcanon.end()) {
|
if (it != m_aliastoqcanon.end()) {
|
||||||
LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> [" << it->second << "]\n");
|
LOGDEB1("RclConfig::fieldQCanon: [" << f << "] -> [" << it->second <<
|
||||||
|
"]\n");
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
return fieldCanon(f);
|
return fieldCanon(f);
|
||||||
@ -1172,14 +1165,15 @@ set<string> RclConfig::getMimeViewerAllEx() const
|
|||||||
|
|
||||||
string base, plus, minus;
|
string base, plus, minus;
|
||||||
mimeview->get("xallexcepts", base, "");
|
mimeview->get("xallexcepts", base, "");
|
||||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): base: " << base << "\n");
|
LOGDEB1("RclConfig::getMimeViewerAllEx(): base: " << base << endl);
|
||||||
mimeview->get("xallexcepts+", plus, "");
|
mimeview->get("xallexcepts+", plus, "");
|
||||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): plus: " << plus << "\n");
|
LOGDEB1("RclConfig::getMimeViewerAllEx(): plus: " << plus << endl);
|
||||||
mimeview->get("xallexcepts-", minus, "");
|
mimeview->get("xallexcepts-", minus, "");
|
||||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): minus: " << minus << "\n");
|
LOGDEB1("RclConfig::getMimeViewerAllEx(): minus: " << minus << endl);
|
||||||
|
|
||||||
computeBasePlusMinus(res, base, plus, minus);
|
computeBasePlusMinus(res, base, plus, minus);
|
||||||
LOGDEB1("RclConfig::getMimeViewerAllEx(): res: " << stringsToString(res) << "\n");
|
LOGDEB1("RclConfig::getMimeViewerAllEx(): res: " << stringsToString(res)
|
||||||
|
<< endl);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1206,9 +1200,11 @@ bool RclConfig::setMimeViewerAllEx(const set<string>& allex)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag, bool useall) const
|
string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag,
|
||||||
|
bool useall) const
|
||||||
{
|
{
|
||||||
LOGDEB2("RclConfig::getMimeViewerDef: mtype [" << mtype << "] apptag [" << apptag << "]\n");
|
LOGDEB2("RclConfig::getMimeViewerDef: mtype [" << mtype << "] apptag ["
|
||||||
|
<< apptag << "]\n");
|
||||||
string hs;
|
string hs;
|
||||||
if (mimeview == 0)
|
if (mimeview == 0)
|
||||||
return hs;
|
return hs;
|
||||||
@ -1235,18 +1231,9 @@ string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag, bo
|
|||||||
// Fallthrough to normal case.
|
// Fallthrough to normal case.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (apptag.empty() || !mimeview->get(mtype + string("|") + apptag, hs, "view"))
|
if (apptag.empty() || !mimeview->get(mtype + string("|") + apptag,
|
||||||
|
hs, "view"))
|
||||||
mimeview->get(mtype, hs, "view");
|
mimeview->get(mtype, hs, "view");
|
||||||
|
|
||||||
// Last try for text/xxx if alltext is set
|
|
||||||
if (hs.empty() && mtype.find("text/") == 0 && mtype != "text/plain") {
|
|
||||||
bool alltext{false};
|
|
||||||
getConfParam("textunknownasplain", &alltext);
|
|
||||||
if (alltext) {
|
|
||||||
return getMimeViewerDef("text/plain", apptag, useall);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return hs;
|
return hs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1255,8 +1242,9 @@ bool RclConfig::getMimeViewerDefs(vector<pair<string, string> >& defs) const
|
|||||||
if (mimeview == 0)
|
if (mimeview == 0)
|
||||||
return false;
|
return false;
|
||||||
vector<string>tps = mimeview->getNames("view");
|
vector<string>tps = mimeview->getNames("view");
|
||||||
for (const auto& tp : tps) {
|
for (vector<string>::const_iterator it = tps.begin();
|
||||||
defs.push_back(pair<string, string>(tp, getMimeViewerDef(tp, "", 0)));
|
it != tps.end();it++) {
|
||||||
|
defs.push_back(pair<string, string>(*it, getMimeViewerDef(*it, "", 0)));
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1410,39 +1398,17 @@ string RclConfig::getIdxStatusFile() const
|
|||||||
// Thanks to user Madhu for this fix.
|
// Thanks to user Madhu for this fix.
|
||||||
string RclConfig::getPidfile() const
|
string RclConfig::getPidfile() const
|
||||||
{
|
{
|
||||||
static string fn;
|
const char *p = getenv("XDG_RUNTIME_DIR");
|
||||||
if (fn.empty()) {
|
if (p) {
|
||||||
#ifndef _WIN32
|
string base = path_canon(p);
|
||||||
const char *p = getenv("XDG_RUNTIME_DIR");
|
string digest, hex;
|
||||||
string rundir;
|
string cfdir = path_canon(getConfDir());
|
||||||
if (nullptr == p) {
|
path_catslash(cfdir);
|
||||||
// Problem is, we may have been launched outside the desktop, maybe by cron. Basing
|
MD5String(cfdir, digest);
|
||||||
// everything on XDG_RUNTIME_DIR was a mistake, sometimes resulting in different pidfiles
|
MD5HexPrint(digest, hex);
|
||||||
// being used by recollindex instances. So explicitely test for /run/user/$uid, still
|
return path_cat(base, "/recoll-" + hex + "-index.pid");
|
||||||
// leaving open the remote possibility that XDG_RUNTIME_DIR would be set to something
|
|
||||||
// else...
|
|
||||||
rundir = path_cat("/run/user", lltodecstr(getuid()));
|
|
||||||
if (path_isdir(rundir)) {
|
|
||||||
p = rundir.c_str();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (p) {
|
|
||||||
string base = path_canon(p);
|
|
||||||
string digest, hex;
|
|
||||||
string cfdir = path_canon(getConfDir());
|
|
||||||
path_catslash(cfdir);
|
|
||||||
MD5String(cfdir, digest);
|
|
||||||
MD5HexPrint(digest, hex);
|
|
||||||
fn = path_cat(base, "recoll-" + hex + "-index.pid");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
#endif // ! _WIN32
|
|
||||||
|
|
||||||
fn = path_cat(getCacheDir(), "index.pid");
|
|
||||||
out:
|
|
||||||
LOGINF("RclConfig: pid/lock file: " << fn << "\n");
|
|
||||||
}
|
}
|
||||||
return fn;
|
return path_cat(getCacheDir(), "index.pid");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1475,7 +1441,7 @@ static string path_diffstems(const string& p1, const string& p2,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//cerr << "Common length = " << cl << "\n";
|
//cerr << "Common length = " << cl << endl;
|
||||||
if (cl == 0) {
|
if (cl == 0) {
|
||||||
reason = "Input paths are empty or have no common part";
|
reason = "Input paths are empty or have no common part";
|
||||||
return reason;
|
return reason;
|
||||||
@ -1507,12 +1473,13 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
|||||||
cur_confdir = m_confdir;
|
cur_confdir = m_confdir;
|
||||||
}
|
}
|
||||||
LOGDEB1("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
|
LOGDEB1("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
|
||||||
" cur_confdir " << cur_confdir << "\n");
|
" cur_confdir " << cur_confdir << endl);
|
||||||
string reason = path_diffstems(orig_confdir, cur_confdir, confstemorg, confstemrep);
|
string reason = path_diffstems(orig_confdir, cur_confdir,
|
||||||
|
confstemorg, confstemrep);
|
||||||
if (!reason.empty()) {
|
if (!reason.empty()) {
|
||||||
LOGERR("urlrewrite: path_diffstems failed: " << reason <<
|
LOGERR("urlrewrite: path_diffstems failed: " << reason <<
|
||||||
" : orig_confdir [" << orig_confdir <<
|
" : orig_confdir [" << orig_confdir <<
|
||||||
"] cur_confdir [" << cur_confdir << "\n");
|
"] cur_confdir [" << cur_confdir << endl);
|
||||||
confstemorg = confstemrep = "";
|
confstemorg = confstemrep = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1520,7 +1487,8 @@ void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
|||||||
// Do path translations exist for this index ?
|
// Do path translations exist for this index ?
|
||||||
bool needptrans = true;
|
bool needptrans = true;
|
||||||
if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
|
if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
|
||||||
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " << m_ptrans << ")\n");
|
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
|
||||||
|
m_ptrans << ")\n");
|
||||||
needptrans = false;
|
needptrans = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1671,7 +1639,6 @@ vector<string> RclConfig::getDaemSkippedPaths() const
|
|||||||
// and filtersdir from the config file to the PATH, then use execmd::which()
|
// and filtersdir from the config file to the PATH, then use execmd::which()
|
||||||
string RclConfig::findFilter(const string &icmd) const
|
string RclConfig::findFilter(const string &icmd) const
|
||||||
{
|
{
|
||||||
LOGDEB2("findFilter: " << icmd << "\n");
|
|
||||||
// If the path is absolute, this is it
|
// If the path is absolute, this is it
|
||||||
if (path_isabsolute(icmd))
|
if (path_isabsolute(icmd))
|
||||||
return icmd;
|
return icmd;
|
||||||
@ -1719,18 +1686,12 @@ bool RclConfig::processFilterCmd(std::vector<std::string>& cmd) const
|
|||||||
LOGDEB0("processFilterCmd: in: " << stringsToString(cmd) << "\n");
|
LOGDEB0("processFilterCmd: in: " << stringsToString(cmd) << "\n");
|
||||||
auto it = cmd.begin();
|
auto it = cmd.begin();
|
||||||
|
|
||||||
#ifdef _WIN32
|
// Special-case python and perl on windows: we need to also locate the
|
||||||
// Special-case interpreters on windows: we used to have an additional 1st argument "python" in
|
// first argument which is the script name "python somescript.py".
|
||||||
// mimeconf, but we now rely on the .py extension for better sharing of mimeconf.
|
// On Unix, thanks to #!, we usually just run "somescript.py", but need
|
||||||
std::string ext = path_suffix(*it);
|
// the same change if we ever want to use the same cmd line as windows
|
||||||
if ("py" == ext) {
|
bool hasinterp = !stringlowercmp("python", *it) ||
|
||||||
it = cmd.insert(it, findFilter("python"));
|
!stringlowercmp("perl", *it);
|
||||||
it++;
|
|
||||||
} else if ("pl" == ext) {
|
|
||||||
it = cmd.insert(it, findFilter("perl"));
|
|
||||||
it++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Note that, if the cmd vector size is 1, post-incrementing the
|
// Note that, if the cmd vector size is 1, post-incrementing the
|
||||||
// iterator in the following statement, which works on x86, leads
|
// iterator in the following statement, which works on x86, leads
|
||||||
@ -1739,15 +1700,25 @@ bool RclConfig::processFilterCmd(std::vector<std::string>& cmd) const
|
|||||||
// whatever... We do it later then.
|
// whatever... We do it later then.
|
||||||
*it = findFilter(*it);
|
*it = findFilter(*it);
|
||||||
|
|
||||||
|
if (hasinterp) {
|
||||||
|
if (cmd.size() < 2) {
|
||||||
|
LOGERR("processFilterCmd: python/perl cmd: no script?. [" <<
|
||||||
|
stringsToString(cmd) << "]\n");
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
*it = findFilter(*it);
|
||||||
|
}
|
||||||
|
}
|
||||||
LOGDEB0("processFilterCmd: out: " << stringsToString(cmd) << "\n");
|
LOGDEB0("processFilterCmd: out: " << stringsToString(cmd) << "\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This now does nothing more than processFilterCmd (after we changed to relying on the py extension)
|
bool RclConfig::pythonCmd(const std::string& scriptname,
|
||||||
bool RclConfig::pythonCmd(const std::string& scriptname, std::vector<std::string>& cmd) const
|
std::vector<std::string>& cmd) const
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
cmd = {scriptname};
|
cmd = {"python", scriptname};
|
||||||
#else
|
#else
|
||||||
cmd = {scriptname};
|
cmd = {scriptname};
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -103,14 +103,6 @@ public:
|
|||||||
freeAll();
|
freeAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
RclConfig& operator=(const RclConfig &r) {
|
|
||||||
if (this != &r) {
|
|
||||||
freeAll();
|
|
||||||
initFrom(r);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return a writable clone of the main config. This belongs to the
|
// Return a writable clone of the main config. This belongs to the
|
||||||
// caller (must delete it when done)
|
// caller (must delete it when done)
|
||||||
ConfNull *cloneMainConfig();
|
ConfNull *cloneMainConfig();
|
||||||
@ -259,18 +251,12 @@ public:
|
|||||||
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false,
|
string getMimeHandlerDef(const string &mimetype, bool filtertypes=false,
|
||||||
const std::string& fn = std::string());
|
const std::string& fn = std::string());
|
||||||
|
|
||||||
/** For lines like: [name = some value; attr1 = value1; attr2 = val2]
|
/** For lines like: "name = some value; attr1 = value1; attr2 = val2"
|
||||||
* Separate the value and store the attributes in a ConfSimple
|
* Separate the value and store the attributes in a ConfSimple
|
||||||
*
|
* @param whole the raw value. No way to escape a semi-colon in there.
|
||||||
* In the value part, semi-colons inside double quotes are ignored, and double quotes are
|
|
||||||
* conserved. In the common case where the string is then processed by stringToStrings() to
|
|
||||||
* build a command line, this allows having semi-colons inside arguments. However, no backslash
|
|
||||||
* escaping is possible, so that, for example "bla\"1;2\"" would not work (the value part
|
|
||||||
* would stop at the semi-colon).
|
|
||||||
*
|
|
||||||
* @param whole the raw value.
|
|
||||||
*/
|
*/
|
||||||
static bool valueSplitAttributes(const string& whole, string& value, ConfSimple& attrs) ;
|
static bool valueSplitAttributes(const string& whole, string& value,
|
||||||
|
ConfSimple& attrs) ;
|
||||||
|
|
||||||
/** Compute difference between 'base' and 'changed', as elements to be
|
/** Compute difference between 'base' and 'changed', as elements to be
|
||||||
* added and substracted from base. Input and output strings are in
|
* added and substracted from base. Input and output strings are in
|
||||||
@ -376,6 +362,14 @@ public:
|
|||||||
return o_origcwd;
|
return o_origcwd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RclConfig& operator=(const RclConfig &r) {
|
||||||
|
if (this != &r) {
|
||||||
|
freeAll();
|
||||||
|
initFrom(r);
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
friend class ParamStale;
|
friend class ParamStale;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -312,7 +312,7 @@ RclConfig *recollinit(int flags,
|
|||||||
#if defined(MACPORTS)
|
#if defined(MACPORTS)
|
||||||
PATH = string("/opt/local/bin/") + ":" + PATH;
|
PATH = string("/opt/local/bin/") + ":" + PATH;
|
||||||
#elif defined(HOMEBREW)
|
#elif defined(HOMEBREW)
|
||||||
PATH = string("/opt/homebrew/bin:/usr/local/bin/") + ":" + PATH;
|
PATH = string("/usr/local/bin/") + ":" + PATH;
|
||||||
#else
|
#else
|
||||||
// Native qt build. Add our own directory to the path so that
|
// Native qt build. Add our own directory to the path so that
|
||||||
// recoll finds recollindex pkgdatadir:
|
// recoll finds recollindex pkgdatadir:
|
||||||
|
|||||||
@ -81,8 +81,6 @@ unsigned int TextSplit::o_CJKNgramLen{2};
|
|||||||
bool TextSplit::o_noNumbers{false};
|
bool TextSplit::o_noNumbers{false};
|
||||||
bool TextSplit::o_deHyphenate{false};
|
bool TextSplit::o_deHyphenate{false};
|
||||||
int TextSplit::o_maxWordLength{40};
|
int TextSplit::o_maxWordLength{40};
|
||||||
int TextSplit::o_maxWordsInSpan{6};
|
|
||||||
|
|
||||||
static const int o_CJKMaxNgramLen{5};
|
static const int o_CJKMaxNgramLen{5};
|
||||||
bool o_exthangultagger{false};
|
bool o_exthangultagger{false};
|
||||||
|
|
||||||
@ -92,7 +90,6 @@ static char underscoreatend = '_';
|
|||||||
void TextSplit::staticConfInit(RclConfig *config)
|
void TextSplit::staticConfInit(RclConfig *config)
|
||||||
{
|
{
|
||||||
config->getConfParam("maxtermlength", &o_maxWordLength);
|
config->getConfParam("maxtermlength", &o_maxWordLength);
|
||||||
config->getConfParam("maxwordsinspan", &o_maxWordsInSpan);
|
|
||||||
|
|
||||||
bool bvalue{false};
|
bool bvalue{false};
|
||||||
if (config->getConfParam("nocjk", &bvalue) && bvalue == true) {
|
if (config->getConfParam("nocjk", &bvalue) && bvalue == true) {
|
||||||
@ -209,26 +206,32 @@ public:
|
|||||||
};
|
};
|
||||||
static const CharClassInit charClassInitInstance;
|
static const CharClassInit charClassInitInstance;
|
||||||
|
|
||||||
static inline bool isvisiblewhite(int c)
|
static inline int whatcc(unsigned int c, char *asciirep = nullptr)
|
||||||
{
|
|
||||||
return visiblewhite.find(c) != visiblewhite.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int whatcc(unsigned int c)
|
|
||||||
{
|
{
|
||||||
if (c <= 127) {
|
if (c <= 127) {
|
||||||
return charclasses[c];
|
return charclasses[c];
|
||||||
} else {
|
} else {
|
||||||
if (c == 0x2010 || c == 0x2019 || c == 0x275c || c == 0x02bc) {
|
if (c == 0x2010) {
|
||||||
|
// Special treatment for hyphen: handle as ascii minus. See
|
||||||
|
// doc/notes/minus-hyphen-dash.txt
|
||||||
|
if (asciirep)
|
||||||
|
*asciirep = '-';
|
||||||
|
return c;
|
||||||
|
} else if (c == 0x2019 || c == 0x275c || c == 0x02bc) {
|
||||||
|
// Things sometimes replacing a single quote. Use single
|
||||||
|
// quote so that span processing works ok
|
||||||
|
if (asciirep)
|
||||||
|
*asciirep = '\'';
|
||||||
return c;
|
return c;
|
||||||
} else if (sskip.find(c) != sskip.end()) {
|
} else if (sskip.find(c) != sskip.end()) {
|
||||||
return SKIP;
|
return SKIP;
|
||||||
} else if (spunc.find(c) != spunc.end()) {
|
} else if (spunc.find(c) != spunc.end()) {
|
||||||
return SPACE;
|
return SPACE;
|
||||||
} else {
|
} else {
|
||||||
auto it = lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
|
vector<unsigned int>::iterator it =
|
||||||
if (it == vpuncblocks.end())
|
lower_bound(vpuncblocks.begin(), vpuncblocks.end(), c);
|
||||||
return LETTER;
|
if (it == vpuncblocks.end())
|
||||||
|
return LETTER;
|
||||||
if (c == *it)
|
if (c == *it)
|
||||||
return SPACE;
|
return SPACE;
|
||||||
if ((it - vpuncblocks.begin()) % 2 == 1) {
|
if ((it - vpuncblocks.begin()) % 2 == 1) {
|
||||||
@ -242,16 +245,16 @@ static inline int whatcc(unsigned int c)
|
|||||||
|
|
||||||
// testing whatcc...
|
// testing whatcc...
|
||||||
#if 0
|
#if 0
|
||||||
unsigned int testvalues[] = {'a', '0', 0x80, 0xbf, 0xc0, 0x05c3, 0x1000,
|
unsigned int testvalues[] = {'a', '0', 0x80, 0xbf, 0xc0, 0x05c3, 0x1000,
|
||||||
0x2000, 0x2001, 0x206e, 0x206f, 0x20d0, 0x2399,
|
0x2000, 0x2001, 0x206e, 0x206f, 0x20d0, 0x2399,
|
||||||
0x2400, 0x2401, 0x243f, 0x2440, 0xff65};
|
0x2400, 0x2401, 0x243f, 0x2440, 0xff65};
|
||||||
int ntest = sizeof(testvalues) / sizeof(int);
|
int ntest = sizeof(testvalues) / sizeof(int);
|
||||||
for (int i = 0; i < ntest; i++) {
|
for (int i = 0; i < ntest; i++) {
|
||||||
int ret = whatcc(testvalues[i]);
|
int ret = whatcc(testvalues[i]);
|
||||||
printf("Tested value 0x%x, returned value %d %s\n",
|
printf("Tested value 0x%x, returned value %d %s\n",
|
||||||
testvalues[i], ret, ret == LETTER ? "LETTER" :
|
testvalues[i], ret, ret == LETTER ? "LETTER" :
|
||||||
ret == SPACE ? "SPACE" : "OTHER");
|
ret == SPACE ? "SPACE" : "OTHER");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// CJK Unicode character detection. CJK text is indexed using an n-gram
|
// CJK Unicode character detection. CJK text is indexed using an n-gram
|
||||||
@ -284,16 +287,16 @@ for (int i = 0; i < ntest; i++) {
|
|||||||
// FF00..FFEF; Halfwidth and Fullwidth Forms
|
// FF00..FFEF; Halfwidth and Fullwidth Forms
|
||||||
// 20000..2A6DF; CJK Unified Ideographs Extension B
|
// 20000..2A6DF; CJK Unified Ideographs Extension B
|
||||||
// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
|
||||||
#define UNICODE_IS_CJK(p) \
|
#define UNICODE_IS_CJK(p) \
|
||||||
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
||||||
((p) >= 0x2E80 && (p) <= 0x2EFF) || \
|
((p) >= 0x2E80 && (p) <= 0x2EFF) || \
|
||||||
((p) >= 0x3000 && (p) <= 0x9FFF) || \
|
((p) >= 0x3000 && (p) <= 0x9FFF) || \
|
||||||
((p) >= 0xA700 && (p) <= 0xA71F) || \
|
((p) >= 0xA700 && (p) <= 0xA71F) || \
|
||||||
((p) >= 0xAC00 && (p) <= 0xD7AF) || \
|
((p) >= 0xAC00 && (p) <= 0xD7AF) || \
|
||||||
((p) >= 0xF900 && (p) <= 0xFAFF) || \
|
((p) >= 0xF900 && (p) <= 0xFAFF) || \
|
||||||
((p) >= 0xFE30 && (p) <= 0xFE4F) || \
|
((p) >= 0xFE30 && (p) <= 0xFE4F) || \
|
||||||
((p) >= 0xFF00 && (p) <= 0xFFEF) || \
|
((p) >= 0xFF00 && (p) <= 0xFFEF) || \
|
||||||
((p) >= 0x20000 && (p) <= 0x2A6DF) || \
|
((p) >= 0x20000 && (p) <= 0x2A6DF) || \
|
||||||
((p) >= 0x2F800 && (p) <= 0x2FA1F))
|
((p) >= 0x2F800 && (p) <= 0x2FA1F))
|
||||||
|
|
||||||
// We should probably map 'fullwidth ascii variants' and 'halfwidth
|
// We should probably map 'fullwidth ascii variants' and 'halfwidth
|
||||||
@ -301,9 +304,9 @@ for (int i = 0; i < ntest; i++) {
|
|||||||
// filter, KuromojiNormalizeFilter.java
|
// filter, KuromojiNormalizeFilter.java
|
||||||
// 309F is Hiragana.
|
// 309F is Hiragana.
|
||||||
#ifdef KATAKANA_AS_WORDS
|
#ifdef KATAKANA_AS_WORDS
|
||||||
#define UNICODE_IS_KATAKANA(p) \
|
#define UNICODE_IS_KATAKANA(p) \
|
||||||
((p) != 0x309F && \
|
((p) != 0x309F && \
|
||||||
(((p) >= 0x3099 && (p) <= 0x30FF) || \
|
(((p) >= 0x3099 && (p) <= 0x30FF) || \
|
||||||
((p) >= 0x31F0 && (p) <= 0x31FF)))
|
((p) >= 0x31F0 && (p) <= 0x31FF)))
|
||||||
#else
|
#else
|
||||||
#define UNICODE_IS_KATAKANA(p) false
|
#define UNICODE_IS_KATAKANA(p) false
|
||||||
@ -312,14 +315,14 @@ for (int i = 0; i < ntest; i++) {
|
|||||||
#ifdef HANGUL_AS_WORDS
|
#ifdef HANGUL_AS_WORDS
|
||||||
// If no external tagger is configured, we process HANGUL as generic
|
// If no external tagger is configured, we process HANGUL as generic
|
||||||
// cjk (n-grams)
|
// cjk (n-grams)
|
||||||
#define UNICODE_IS_HANGUL(p) ( \
|
#define UNICODE_IS_HANGUL(p) ( \
|
||||||
o_exthangultagger && \
|
o_exthangultagger && \
|
||||||
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
(((p) >= 0x1100 && (p) <= 0x11FF) || \
|
||||||
((p) >= 0x3130 && (p) <= 0x318F) || \
|
((p) >= 0x3130 && (p) <= 0x318F) || \
|
||||||
((p) >= 0x3200 && (p) <= 0x321e) || \
|
((p) >= 0x3200 && (p) <= 0x321e) || \
|
||||||
((p) >= 0x3248 && (p) <= 0x327F) || \
|
((p) >= 0x3248 && (p) <= 0x327F) || \
|
||||||
((p) >= 0x3281 && (p) <= 0x32BF) || \
|
((p) >= 0x3281 && (p) <= 0x32BF) || \
|
||||||
((p) >= 0xAC00 && (p) <= 0xD7AF)) \
|
((p) >= 0xAC00 && (p) <= 0xD7AF)) \
|
||||||
)
|
)
|
||||||
#else
|
#else
|
||||||
#define UNICODE_IS_HANGUL(p) false
|
#define UNICODE_IS_HANGUL(p) false
|
||||||
@ -348,16 +351,19 @@ bool TextSplit::isNGRAMMED(int c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// This is used to detect katakana/other transitions, which must trigger a word split (there is not
|
// This is used to detect katakana/other transitions, which must
|
||||||
// always a separator, and katakana is otherwise treated like other, in the same routine, unless cjk
|
// trigger a word split (there is not always a separator, and katakana
|
||||||
|
// is otherwise treated like other, in the same routine, unless cjk
|
||||||
// which has its span reader causing a word break)
|
// which has its span reader causing a word break)
|
||||||
enum CharSpanClass {CSC_HANGUL, CSC_CJK, CSC_KATAKANA, CSC_OTHER};
|
enum CharSpanClass {CSC_HANGUL, CSC_CJK, CSC_KATAKANA, CSC_OTHER};
|
||||||
std::vector<CharFlags> csc_names {CHARFLAGENTRY(CSC_HANGUL), CHARFLAGENTRY(CSC_CJK),
|
std::vector<CharFlags> csc_names {CHARFLAGENTRY(CSC_HANGUL),
|
||||||
CHARFLAGENTRY(CSC_KATAKANA), CHARFLAGENTRY(CSC_OTHER)};
|
CHARFLAGENTRY(CSC_CJK), CHARFLAGENTRY(CSC_KATAKANA),
|
||||||
|
CHARFLAGENTRY(CSC_OTHER)};
|
||||||
|
|
||||||
// Final term checkpoint: do some checking (the kind which is simpler to do here than in the main
|
// Final term checkpoint: do some checking (the kind which is simpler
|
||||||
// loop), then send term to our client.
|
// to do here than in the main loop), then send term to our client.
|
||||||
inline bool TextSplit::emitterm(bool isspan, string &w, int pos, size_t btstart, size_t btend)
|
inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
||||||
|
size_t btstart, size_t btend)
|
||||||
{
|
{
|
||||||
LOGDEB2("TextSplit::emitterm: [" << w << "] pos " << pos << "\n");
|
LOGDEB2("TextSplit::emitterm: [" << w << "] pos " << pos << "\n");
|
||||||
|
|
||||||
@ -372,38 +378,39 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos, size_t btstart,
|
|||||||
PRETEND_USE(isspan);
|
PRETEND_USE(isspan);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (l == 0 || l > o_maxWordLength) {
|
if (l > 0 && l <= o_maxWordLength) {
|
||||||
return true;
|
// 1 byte word: we index single ascii letters and digits, but
|
||||||
}
|
// nothing else. We might want to turn this into a test for a
|
||||||
if (l == 1) {
|
// single utf8 character instead ?
|
||||||
// 1 byte word: we index single ascii letters and digits, but nothing else. We might want to
|
if (l == 1) {
|
||||||
// turn this into a test for a single utf8 character instead ?
|
unsigned int c = ((unsigned int)w[0]) & 0xff;
|
||||||
unsigned int c = ((unsigned int)w[0]) & 0xff;
|
if (charclasses[c] != A_ULETTER && charclasses[c] != A_LLETTER &&
|
||||||
if (charclasses[c] != A_ULETTER && charclasses[c] != A_LLETTER &&
|
charclasses[c] != DIGIT &&
|
||||||
charclasses[c] != DIGIT &&
|
(!(m_flags & TXTS_KEEPWILD) || charclasses[c] != WILD)
|
||||||
(!(m_flags & TXTS_KEEPWILD) || charclasses[c] != WILD)
|
) {
|
||||||
) {
|
//cerr << "ERASING single letter term " << c << endl;
|
||||||
//cerr << "ERASING single letter term " << c << endl;
|
return true;
|
||||||
return true;
|
}
|
||||||
}
|
}
|
||||||
|
if (pos != m_prevpos || l != m_prevlen) {
|
||||||
|
bool ret = takeword(w, pos, int(btstart), int(btend));
|
||||||
|
m_prevpos = pos;
|
||||||
|
m_prevlen = int(w.length());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
LOGDEB2("TextSplit::emitterm:dup: [" << w << "] pos " << pos << "\n");
|
||||||
}
|
}
|
||||||
if (pos != m_prevpos || l != m_prevlen) {
|
|
||||||
bool ret = takeword(w, pos, int(btstart), int(btend));
|
|
||||||
m_prevpos = pos;
|
|
||||||
m_prevlen = int(w.length());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
LOGDEB2("TextSplit::emitterm:dup: [" << w << "] pos " << pos << "\n");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for an acronym/abbreviation ie I.B.M. This only works with ascii (we do not detect
|
// Check for an acronym/abbreviation ie I.B.M. This only works with
|
||||||
// non-ascii utf-8 acronyms)
|
// ascii (no non-ascii utf-8 acronym are possible)
|
||||||
bool TextSplit::span_is_acronym(string *acronym)
|
bool TextSplit::span_is_acronym(string *acronym)
|
||||||
{
|
{
|
||||||
bool acron = false;
|
bool acron = false;
|
||||||
|
|
||||||
if (m_wordLen != m_span.length() && m_span.length() > 2 && m_span.length() <= 20) {
|
if (m_wordLen != m_span.length() &&
|
||||||
|
m_span.length() > 2 && m_span.length() <= 20) {
|
||||||
acron = true;
|
acron = true;
|
||||||
// Check odd chars are '.'
|
// Check odd chars are '.'
|
||||||
for (unsigned int i = 1 ; i < m_span.length(); i += 2) {
|
for (unsigned int i = 1 ; i < m_span.length(); i += 2) {
|
||||||
@ -432,23 +439,27 @@ bool TextSplit::span_is_acronym(string *acronym)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Generate terms from span. Have to take into account the flags: ONLYSPANS, NOSPANS, noNumbers
|
// Generate terms from span. Have to take into account the
|
||||||
|
// flags: ONLYSPANS, NOSPANS, noNumbers
|
||||||
bool TextSplit::words_from_span(size_t bp)
|
bool TextSplit::words_from_span(size_t bp)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
cerr << "Span: [" << m_span << "] " << " bp " << bp <<
|
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
||||||
" w_i_s size: " << m_words_in_span.size() << " : ";
|
m_words_in_span.size() << " : ";
|
||||||
for (unsigned int i = 0; i < m_words_in_span.size(); i++) {
|
for (unsigned int i = 0; i < m_words_in_span.size(); i++) {
|
||||||
cerr << " [" << m_words_in_span[i].first << " " << m_words_in_span[i].second << "] ";
|
cerr << " [" << m_words_in_span[i].first << " " <<
|
||||||
|
m_words_in_span[i].second << "] ";
|
||||||
|
|
||||||
}
|
}
|
||||||
cerr << endl;
|
cerr << endl;
|
||||||
#endif
|
#endif
|
||||||
int spanwords = int(m_words_in_span.size());
|
int spanwords = int(m_words_in_span.size());
|
||||||
// It seems that something like: tv_combo-sample_util.Po@am_quote can get the splitter to call
|
// It seems that something like: tv_combo-sample_util.Po@am_quote
|
||||||
// doemit with a span of '@' and words_in_span==0, which then causes a crash when accessing
|
// can get the splitter to call doemit with a span of '@' and
|
||||||
// words_in_span[0] if the stl assertions are active (e.g. Fedora RPM build). Not too sure what
|
// words_in_span==0, which then causes a crash when accessing
|
||||||
// the right fix would be, but for now, just defend against it
|
// words_in_span[0] if the stl assertions are active (e.g. Fedora
|
||||||
|
// RPM build). Not too sure what the right fix would be, but for
|
||||||
|
// now, just defend against it
|
||||||
if (spanwords == 0) {
|
if (spanwords == 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -456,17 +467,21 @@ bool TextSplit::words_from_span(size_t bp)
|
|||||||
// Byte position of the span start
|
// Byte position of the span start
|
||||||
size_t spboffs = bp - m_span.size();
|
size_t spboffs = bp - m_span.size();
|
||||||
|
|
||||||
if (o_deHyphenate && spanwords == 2 && m_span[m_words_in_span[0].second] == '-') {
|
if (o_deHyphenate && spanwords == 2 &&
|
||||||
|
m_span[m_words_in_span[0].second] == '-') {
|
||||||
unsigned int s0 = m_words_in_span[0].first;
|
unsigned int s0 = m_words_in_span[0].first;
|
||||||
unsigned int l0 = m_words_in_span[0].second - m_words_in_span[0].first;
|
unsigned int l0 = m_words_in_span[0].second - m_words_in_span[0].first;
|
||||||
unsigned int s1 = m_words_in_span[1].first;
|
unsigned int s1 = m_words_in_span[1].first;
|
||||||
unsigned int l1 = m_words_in_span[1].second - m_words_in_span[1].first;
|
unsigned int l1 = m_words_in_span[1].second - m_words_in_span[1].first;
|
||||||
string word = m_span.substr(s0, l0) + m_span.substr(s1, l1);
|
string word = m_span.substr(s0, l0) + m_span.substr(s1, l1);
|
||||||
if (l0 && l1)
|
if (l0 && l1)
|
||||||
emitterm(false, word, m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
|
emitterm(false, word,
|
||||||
|
m_spanpos, spboffs, spboffs + m_words_in_span[1].second);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords); i++) {
|
for (int i = 0;
|
||||||
|
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
||||||
|
i++) {
|
||||||
|
|
||||||
int deb = m_words_in_span[i].first;
|
int deb = m_words_in_span[i].first;
|
||||||
bool noposinc = m_words_in_span[i].second == deb;
|
bool noposinc = m_words_in_span[i].second == deb;
|
||||||
@ -475,7 +490,8 @@ bool TextSplit::words_from_span(size_t bp)
|
|||||||
j++) {
|
j++) {
|
||||||
|
|
||||||
int fin = m_words_in_span[j].second;
|
int fin = m_words_in_span[j].second;
|
||||||
//cerr << "i " << i << " j " << j << " deb " << deb << " fin " << fin << endl;
|
//cerr << "i " << i << " j " << j << " deb " << deb <<
|
||||||
|
//" fin " << fin << endl;
|
||||||
if (fin - deb > int(m_span.size()))
|
if (fin - deb > int(m_span.size()))
|
||||||
break;
|
break;
|
||||||
string word(m_span.substr(deb, fin-deb));
|
string word(m_span.substr(deb, fin-deb));
|
||||||
@ -503,7 +519,7 @@ bool TextSplit::words_from_span(size_t bp)
|
|||||||
*
|
*
|
||||||
* @return true if ok, false for error. Splitting should stop in this case.
|
* @return true if ok, false for error. Splitting should stop in this case.
|
||||||
* @param spanerase Set if the current span is at its end. Process it.
|
* @param spanerase Set if the current span is at its end. Process it.
|
||||||
* @param bp The current BYTE position in the stream (it's beyond the current span data).
|
* @param bp The current BYTE position in the stream
|
||||||
*/
|
*/
|
||||||
inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
||||||
{
|
{
|
||||||
@ -516,7 +532,7 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
|||||||
if (m_wordLen) {
|
if (m_wordLen) {
|
||||||
// We have a current word. Remember it
|
// We have a current word. Remember it
|
||||||
|
|
||||||
if (int(m_words_in_span.size()) >= o_maxWordsInSpan) {
|
if (m_words_in_span.size() >= 6) {
|
||||||
// Limit max span word count
|
// Limit max span word count
|
||||||
spanerase = true;
|
spanerase = true;
|
||||||
}
|
}
|
||||||
@ -534,13 +550,38 @@ inline bool TextSplit::doemit(bool spanerase, size_t _bp)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Span is done (too long or span-terminating character). Produce terms and reset it.
|
|
||||||
|
// Span is done (too long or span-terminating character). Produce
|
||||||
|
// terms and reset it.
|
||||||
string acronym;
|
string acronym;
|
||||||
if (span_is_acronym(&acronym)) {
|
if (span_is_acronym(&acronym)) {
|
||||||
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
|
if (!emitterm(false, acronym, m_spanpos, bp - m_span.length(), bp))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Maybe trim at end. These are chars that we might keep
|
||||||
|
// inside a span, but not at the end.
|
||||||
|
string::size_type trimsz{0};
|
||||||
|
while (trimsz < m_span.length()) {
|
||||||
|
auto c = m_span[m_span.length() - 1 - trimsz];
|
||||||
|
if (c == '.' || c == '-' || c == ',' || c == '@' || c == '\'' ||
|
||||||
|
c == underscoreatend) {
|
||||||
|
trimsz++;
|
||||||
|
if (m_words_in_span.size() &&
|
||||||
|
m_words_in_span.back().second > int(m_span.size())) {
|
||||||
|
m_words_in_span.back().second = int(m_span.size());
|
||||||
|
}
|
||||||
|
if (--bp < 0) {
|
||||||
|
bp = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (trimsz > 0) {
|
||||||
|
m_span.resize(m_span.length() - trimsz);
|
||||||
|
}
|
||||||
|
|
||||||
if (!words_from_span(bp)) {
|
if (!words_from_span(bp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -599,7 +640,6 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
clearsplitstate();
|
clearsplitstate();
|
||||||
|
|
||||||
bool pagepending = false;
|
bool pagepending = false;
|
||||||
bool nlpending = false;
|
|
||||||
bool softhyphenpending = false;
|
bool softhyphenpending = false;
|
||||||
|
|
||||||
// Running count of non-alphanum chars. Reset when we see one;
|
// Running count of non-alphanum chars. Reset when we see one;
|
||||||
@ -673,7 +713,8 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
prev_csc = csc;
|
prev_csc = csc;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int cc = whatcc(c);
|
char asciirep = 0;
|
||||||
|
int cc = whatcc(c, &asciirep);
|
||||||
|
|
||||||
switch (cc) {
|
switch (cc) {
|
||||||
case SKIP:
|
case SKIP:
|
||||||
@ -709,10 +750,6 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
pagepending = false;
|
pagepending = false;
|
||||||
newpage(m_wordpos);
|
newpage(m_wordpos);
|
||||||
}
|
}
|
||||||
if (nlpending) {
|
|
||||||
nlpending = false;
|
|
||||||
newline(m_wordpos);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case WILD:
|
case WILD:
|
||||||
@ -736,7 +773,7 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
}
|
}
|
||||||
} else if (m_inNumber) {
|
} else if (m_inNumber) {
|
||||||
if ((m_span[m_span.length() - 1] == 'e' ||
|
if ((m_span[m_span.length() - 1] == 'e' ||
|
||||||
m_span[m_span.length() - 1] == 'E')) {
|
m_span[m_span.length() - 1] == 'E')) {
|
||||||
if (isdigit(whatcc(it[it.getCpos()+1]), m_flags)) {
|
if (isdigit(whatcc(it[it.getCpos()+1]), m_flags)) {
|
||||||
m_wordLen += it.appendchartostring(m_span);
|
m_wordLen += it.appendchartostring(m_span);
|
||||||
STATS_INC_WORDCHARS;
|
STATS_INC_WORDCHARS;
|
||||||
@ -744,24 +781,17 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int nextc = it[it.getCpos()+1];
|
|
||||||
if (cc == '+') {
|
if (cc == '+') {
|
||||||
if (nextc == '+' || nextc == -1 || isvisiblewhite(nextc)) {
|
int nextc = it[it.getCpos()+1];
|
||||||
|
if (nextc == '+' || nextc == -1 || visiblewhite.find(nextc)
|
||||||
|
!= visiblewhite.end()) {
|
||||||
// someword++[+...] !
|
// someword++[+...] !
|
||||||
m_wordLen += it.appendchartostring(m_span);
|
m_wordLen += it.appendchartostring(m_span);
|
||||||
STATS_INC_WORDCHARS;
|
STATS_INC_WORDCHARS;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Note about dangling hyphens: we always strip '-' found before whitespace,
|
// Treat '-' inside span as glue char
|
||||||
// even before a newline, then generate two terms, before and after the line
|
|
||||||
// break. We have no way to know if '-' is there because a word was broken by
|
|
||||||
// justification or if it was part of an actual compound word (would need a
|
|
||||||
// dictionary to check). As soft-hyphen *should* be used if the '-' is not part
|
|
||||||
// of the text.
|
|
||||||
if (nextc == -1 || isvisiblewhite(nextc)) {
|
|
||||||
goto SPACE;
|
|
||||||
}
|
|
||||||
if (!doemit(false, it.getBpos()))
|
if (!doemit(false, it.getBpos()))
|
||||||
return false;
|
return false;
|
||||||
m_inNumber = false;
|
m_inNumber = false;
|
||||||
@ -797,7 +827,8 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
m_inNumber = true;
|
m_inNumber = true;
|
||||||
m_wordLen += it.appendchartostring(m_span);
|
m_wordLen += it.appendchartostring(m_span);
|
||||||
} else {
|
} else {
|
||||||
m_words_in_span.push_back(pair<int,int>(m_wordStart, m_wordStart));
|
m_words_in_span.
|
||||||
|
push_back(pair<int,int>(m_wordStart, m_wordStart));
|
||||||
m_wordStart += it.appendchartostring(m_span);
|
m_wordStart += it.appendchartostring(m_span);
|
||||||
}
|
}
|
||||||
STATS_INC_WORDCHARS;
|
STATS_INC_WORDCHARS;
|
||||||
@ -814,28 +845,38 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x2010: // hyphen
|
case 0x2010:
|
||||||
case 0x2019: // variations on single quote
|
case 0x2019:
|
||||||
case 0x275c:
|
case 0x275c:
|
||||||
case 0x02bc:
|
case 0x02bc:
|
||||||
|
// Unicode chars which we replace with ascii for
|
||||||
|
// processing (2010 -> -,others -> '). It happens that
|
||||||
|
// they all work as glue chars and use the same code, but
|
||||||
|
// there might be cases needing different processing.
|
||||||
|
// Hyphen is replaced with ascii minus
|
||||||
|
if (m_wordLen) {
|
||||||
|
// Inside span: glue char
|
||||||
|
if (!doemit(false, it.getBpos()))
|
||||||
|
return false;
|
||||||
|
m_inNumber = false;
|
||||||
|
m_span += asciirep;
|
||||||
|
m_wordStart++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
goto SPACE;
|
||||||
|
|
||||||
case '@':
|
case '@':
|
||||||
case '_': // If underscoreasletter is set, we'll never get this
|
case '_': // If underscoreasletter is set, we'll never get this
|
||||||
case '\'':
|
case '\'':
|
||||||
{
|
|
||||||
// If in word, potential span: o'brien, jf@dockes.org,
|
// If in word, potential span: o'brien, jf@dockes.org,
|
||||||
// else just ignore
|
// else just ignore
|
||||||
int nextc = it[it.getCpos()+1];
|
|
||||||
if (nextc == -1 || isvisiblewhite(nextc)) {
|
|
||||||
goto SPACE;
|
|
||||||
}
|
|
||||||
if (m_wordLen) {
|
if (m_wordLen) {
|
||||||
if (!doemit(false, it.getBpos()))
|
if (!doemit(false, it.getBpos()))
|
||||||
return false;
|
return false;
|
||||||
m_inNumber = false;
|
m_inNumber = false;
|
||||||
m_wordStart += it.appendchartostring(m_span);
|
m_wordStart += it.appendchartostring(m_span);
|
||||||
}
|
}
|
||||||
}
|
break;
|
||||||
break;
|
|
||||||
|
|
||||||
case '#': {
|
case '#': {
|
||||||
int w = whatcc(it[it.getCpos()+1]);
|
int w = whatcc(it[it.getCpos()+1]);
|
||||||
@ -858,10 +899,19 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case '\n':
|
case '\n':
|
||||||
nlpending = true;
|
|
||||||
/* FALLTHROUGH */
|
|
||||||
case '\r':
|
case '\r':
|
||||||
if (softhyphenpending) {
|
if (m_span.length() && *m_span.rbegin() == '-') {
|
||||||
|
// if '-' is the last char before end of line, we
|
||||||
|
// strip it. We have no way to know if this is added
|
||||||
|
// because of the line split or if it was part of an
|
||||||
|
// actual compound word (would need a dictionary to
|
||||||
|
// check). As soft-hyphen *should* be used if the '-'
|
||||||
|
// is not part of the text, it is better to properly
|
||||||
|
// process a real compound word, and produce wrong
|
||||||
|
// output from wrong text. The word-emitting routine
|
||||||
|
// will strip the trailing '-'.
|
||||||
|
goto SPACE;
|
||||||
|
} else if (softhyphenpending) {
|
||||||
// Don't reset soft-hyphen
|
// Don't reset soft-hyphen
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
@ -1057,7 +1107,7 @@ bool TextSplit::cjk_to_words(Utf8Iter& it, unsigned int *cp)
|
|||||||
|
|
||||||
// Specialization for countWords
|
// Specialization for countWords
|
||||||
class TextSplitCW : public TextSplit {
|
class TextSplitCW : public TextSplit {
|
||||||
public:
|
public:
|
||||||
int wcnt;
|
int wcnt;
|
||||||
TextSplitCW(Flags flags) : TextSplit(flags), wcnt(0) {}
|
TextSplitCW(Flags flags) : TextSplit(flags), wcnt(0) {}
|
||||||
bool takeword(const string &, int, int, int) {
|
bool takeword(const string &, int, int, int) {
|
||||||
@ -1082,7 +1132,7 @@ bool TextSplit::hasVisibleWhite(const string &in)
|
|||||||
LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
|
LOGERR("hasVisibleWhite: error while scanning UTF-8 string\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (isvisiblewhite(c))
|
if (visiblewhite.find(c) != visiblewhite.end())
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -1107,7 +1157,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '"':
|
case '"':
|
||||||
switch(state) {
|
switch(state) {
|
||||||
case SPACE: state = INQUOTE; continue;
|
case SPACE: state = INQUOTE; continue;
|
||||||
case TOKEN: goto push_char;
|
case TOKEN: goto push_char;
|
||||||
@ -1116,7 +1166,7 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
|||||||
state = SPACE; continue;
|
state = SPACE; continue;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case '\\':
|
case '\\':
|
||||||
switch(state) {
|
switch(state) {
|
||||||
case SPACE:
|
case SPACE:
|
||||||
case TOKEN: state=TOKEN; goto push_char;
|
case TOKEN: state=TOKEN; goto push_char;
|
||||||
@ -1125,25 +1175,25 @@ template <class T> bool u8stringToStrings(const string &s, T &tokens)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ' ':
|
case ' ':
|
||||||
case '\t':
|
case '\t':
|
||||||
case '\n':
|
case '\n':
|
||||||
case '\r':
|
case '\r':
|
||||||
switch(state) {
|
switch(state) {
|
||||||
case SPACE: continue;
|
case SPACE: continue;
|
||||||
case TOKEN: tokens.push_back(current); current.clear();
|
case TOKEN: tokens.push_back(current); current.clear();
|
||||||
state = SPACE; continue;
|
state = SPACE; continue;
|
||||||
case INQUOTE:
|
case INQUOTE:
|
||||||
case ESCAPE: goto push_char;
|
case ESCAPE: goto push_char;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
switch(state) {
|
switch(state) {
|
||||||
case ESCAPE: state = INQUOTE; break;
|
case ESCAPE: state = INQUOTE; break;
|
||||||
case SPACE: state = TOKEN; break;
|
case SPACE: state = TOKEN; break;
|
||||||
case TOKEN:
|
case TOKEN:
|
||||||
case INQUOTE: break;
|
case INQUOTE: break;
|
||||||
}
|
}
|
||||||
push_char:
|
push_char:
|
||||||
it.appendchartostring(current);
|
it.appendchartostring(current);
|
||||||
@ -1164,3 +1214,4 @@ bool TextSplit::stringToStrings(const string &s, vector<string> &tokens)
|
|||||||
{
|
{
|
||||||
return u8stringToStrings<vector<string> >(s, tokens);
|
return u8stringToStrings<vector<string> >(s, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -50,8 +50,6 @@ public:
|
|||||||
TextSplit(Flags flags = Flags(TXTS_NONE))
|
TextSplit(Flags flags = Flags(TXTS_NONE))
|
||||||
: m_flags(flags) {}
|
: m_flags(flags) {}
|
||||||
virtual ~TextSplit() {}
|
virtual ~TextSplit() {}
|
||||||
TextSplit(const TextSplit&) = delete;
|
|
||||||
TextSplit& operator=(const TextSplit&) = delete;
|
|
||||||
|
|
||||||
/** Call at program initialization to read non default values from the
|
/** Call at program initialization to read non default values from the
|
||||||
configuration */
|
configuration */
|
||||||
@ -73,9 +71,6 @@ public:
|
|||||||
* just don't know about pages. */
|
* just don't know about pages. */
|
||||||
virtual void newpage(int /*pos*/) {}
|
virtual void newpage(int /*pos*/) {}
|
||||||
|
|
||||||
/** Called when we encounter newline \n 0x0a. Override to use the event. */
|
|
||||||
virtual void newline(int /*pos*/) {}
|
|
||||||
|
|
||||||
// Static utility functions:
|
// Static utility functions:
|
||||||
|
|
||||||
/** Count words in string, as the splitter would generate them */
|
/** Count words in string, as the splitter would generate them */
|
||||||
@ -162,16 +157,12 @@ private:
|
|||||||
static bool o_deHyphenate; // false
|
static bool o_deHyphenate; // false
|
||||||
static unsigned int o_CJKNgramLen; // 2
|
static unsigned int o_CJKNgramLen; // 2
|
||||||
static int o_maxWordLength; // 40
|
static int o_maxWordLength; // 40
|
||||||
static int o_maxWordsInSpan; // 6
|
|
||||||
|
|
||||||
Flags m_flags;
|
Flags m_flags;
|
||||||
|
|
||||||
// Current span. Might be jf.dockes@wanadoo.f
|
// Current span. Might be jf.dockes@wanadoo.f
|
||||||
std::string m_span;
|
std::string m_span;
|
||||||
|
|
||||||
// Words in span: byte positions of start and end of words in m_span. For example:
|
|
||||||
// 0 4 9
|
|
||||||
// bill@some.com -> (0,4) (5,9) (10,13)
|
|
||||||
std::vector <std::pair<int, int> > m_words_in_span;
|
std::vector <std::pair<int, int> > m_words_in_span;
|
||||||
|
|
||||||
// Current word: no punctuation at all in there. Byte offset
|
// Current word: no punctuation at all in there. Byte offset
|
||||||
|
|||||||
@ -133,7 +133,7 @@ bool TextSplit::ko_to_words(Utf8Iter *itp, unsigned int *cp)
|
|||||||
unordered_map<string, string> args;
|
unordered_map<string, string> args;
|
||||||
|
|
||||||
args.insert(pair<string,string>{"data", string()});
|
args.insert(pair<string,string>{"data", string()});
|
||||||
string& inputdata(args.begin()->second);
|
string& inputdata{args.begin()->second};
|
||||||
|
|
||||||
// We send the tagger name every time but it's only used the first
|
// We send the tagger name every time but it's only used the first
|
||||||
// one: can't change it after init. We could avoid sending it
|
// one: can't change it after init. We could avoid sending it
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2004-2021 J.F.Dockes
|
/* Copyright (C) 2004-2019 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -29,7 +29,7 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
bool unacmaybefold(const string &in, string &out,
|
bool unacmaybefold(const string &in, string &out,
|
||||||
const char *encoding, UnacOp what)
|
const char *encoding, UnacOp what)
|
||||||
{
|
{
|
||||||
char *cout = 0;
|
char *cout = 0;
|
||||||
size_t out_len;
|
size_t out_len;
|
||||||
@ -37,13 +37,16 @@ bool unacmaybefold(const string &in, string &out,
|
|||||||
|
|
||||||
switch (what) {
|
switch (what) {
|
||||||
case UNACOP_UNAC:
|
case UNACOP_UNAC:
|
||||||
status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
status = unac_string(encoding, in.c_str(), in.length(),
|
||||||
|
&cout, &out_len);
|
||||||
break;
|
break;
|
||||||
case UNACOP_UNACFOLD:
|
case UNACOP_UNACFOLD:
|
||||||
status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
status = unacfold_string(encoding, in.c_str(), in.length(),
|
||||||
|
&cout, &out_len);
|
||||||
break;
|
break;
|
||||||
case UNACOP_FOLD:
|
case UNACOP_FOLD:
|
||||||
status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
status = fold_string(encoding, in.c_str(), in.length(),
|
||||||
|
&cout, &out_len);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -34,8 +34,6 @@ class WebStore {
|
|||||||
public:
|
public:
|
||||||
WebStore(RclConfig *config);
|
WebStore(RclConfig *config);
|
||||||
~WebStore();
|
~WebStore();
|
||||||
WebStore(const WebStore&) = delete;
|
|
||||||
WebStore& operator=(const WebStore&) = delete;
|
|
||||||
|
|
||||||
bool getFromCache(const std::string& udi, Rcl::Doc &doc, std::string& data,
|
bool getFromCache(const std::string& udi, Rcl::Doc &doc, std::string& data,
|
||||||
std::string *hittype = 0);
|
std::string *hittype = 0);
|
||||||
|
|||||||
279
src/configure.ac
279
src/configure.ac
@ -1,7 +1,7 @@
|
|||||||
AC_INIT([Recoll],[m4_esyscmd_s(cat RECOLL-VERSION.txt)])
|
AC_INIT([Recoll], m4_esyscmd_s(cat VERSION))
|
||||||
AC_CONFIG_HEADERS([common/autoconfig.h])
|
AC_CONFIG_HEADERS([common/autoconfig.h])
|
||||||
AH_BOTTOM([#include "conf_post.h"])
|
AH_BOTTOM([#include "conf_post.h"])
|
||||||
AC_PREREQ([2.69])
|
AC_PREREQ(2.53)
|
||||||
AC_CONFIG_SRCDIR(index/recollindex.cpp)
|
AC_CONFIG_SRCDIR(index/recollindex.cpp)
|
||||||
|
|
||||||
AM_INIT_AUTOMAKE([1.10 no-define subdir-objects foreign])
|
AM_INIT_AUTOMAKE([1.10 no-define subdir-objects foreign])
|
||||||
@ -21,7 +21,7 @@ if test C$CXX = C ; then
|
|||||||
AC_MSG_ERROR([C++ compiler needed. Please install one (ie: gnu g++)])
|
AC_MSG_ERROR([C++ compiler needed. Please install one (ie: gnu g++)])
|
||||||
fi
|
fi
|
||||||
AC_LANG_PUSH([C++])
|
AC_LANG_PUSH([C++])
|
||||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[]])],[rcl_link_ok=yes],[rcl_link_ok=no])
|
AC_TRY_LINK([],[], rcl_link_ok=yes, rcl_link_ok=no)
|
||||||
if test "$rcl_link_ok" = "no" ; then
|
if test "$rcl_link_ok" = "no" ; then
|
||||||
AC_MSG_ERROR([No working C++ compiler was found])
|
AC_MSG_ERROR([No working C++ compiler was found])
|
||||||
fi
|
fi
|
||||||
@ -42,7 +42,7 @@ esac
|
|||||||
|
|
||||||
AC_PROG_YACC
|
AC_PROG_YACC
|
||||||
|
|
||||||
LT_INIT
|
AC_PROG_LIBTOOL
|
||||||
AC_C_BIGENDIAN
|
AC_C_BIGENDIAN
|
||||||
|
|
||||||
AC_SYS_LARGEFILE
|
AC_SYS_LARGEFILE
|
||||||
@ -53,7 +53,8 @@ AC_CHECK_HEADERS([sys/param.h, spawn.h])
|
|||||||
|
|
||||||
if test "x$ac_cv_func_posix_spawn" = xyes; then :
|
if test "x$ac_cv_func_posix_spawn" = xyes; then :
|
||||||
AC_ARG_ENABLE(posix_spawn,
|
AC_ARG_ENABLE(posix_spawn,
|
||||||
AS_HELP_STRING([--enable-posix_spawn],[Enable the use of posix_spawn().]),
|
AC_HELP_STRING([--enable-posix_spawn],
|
||||||
|
[Enable the use of posix_spawn().]),
|
||||||
posixSpawnEnabled=$enableval, posixSpawnEnabled=no)
|
posixSpawnEnabled=$enableval, posixSpawnEnabled=no)
|
||||||
fi
|
fi
|
||||||
if test X$posixSpawnEnabled = Xyes ; then
|
if test X$posixSpawnEnabled = Xyes ; then
|
||||||
@ -68,35 +69,11 @@ AC_CHECK_HEADERS([sys/mount.h sys/statfs.h sys/statvfs.h sys/vfs.h malloc.h mall
|
|||||||
|
|
||||||
AC_CHECK_FUNCS([posix_spawn setrlimit kqueue vsnprintf malloc_trim posix_fadvise])
|
AC_CHECK_FUNCS([posix_spawn setrlimit kqueue vsnprintf malloc_trim posix_fadvise])
|
||||||
|
|
||||||
AC_CHECK_FUNCS(mkdtemp)
|
|
||||||
AC_CHECK_LIB([pthread], [pthread_create], [], [])
|
|
||||||
AC_SEARCH_LIBS([dlopen], [dl], [], [])
|
|
||||||
if test X$ac_cv_search_function != Xno ; then
|
|
||||||
AC_DEFINE(HAVE_DLOPEN, 1, [dlopen function is available])
|
|
||||||
fi
|
|
||||||
AC_CHECK_LIB([z], [zlibVersion], [], [])
|
|
||||||
|
|
||||||
############# Putenv
|
|
||||||
AC_MSG_CHECKING(for type of string parameter to putenv)
|
|
||||||
AC_LANG_PUSH([C++])
|
|
||||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
|
||||||
#include <stdlib.h>
|
|
||||||
]], [[
|
|
||||||
putenv((const char *)0);
|
|
||||||
]])],[rcl_putenv_string_const="1"],[rcl_putenv_string_const="0"])
|
|
||||||
if test X$rcl_putenv_string_const = X1 ; then
|
|
||||||
AC_DEFINE(PUTENV_ARG_CONST, 1, [putenv parameter is const])
|
|
||||||
fi
|
|
||||||
AC_LANG_POP([C++])
|
|
||||||
|
|
||||||
|
|
||||||
PKG_CHECK_MODULES([XSLT], [libxslt], [], AC_MSG_ERROR([libxslt]))
|
|
||||||
|
|
||||||
|
|
||||||
# Use specific 'file' command ? (Useful on solaris to specify
|
# Use specific 'file' command ? (Useful on solaris to specify
|
||||||
# /usr/local/bin/file instead of the system's which doesn't understand '-i'
|
# /usr/local/bin/file instead of the system's which doesn't understand '-i'
|
||||||
AC_ARG_WITH(file-command,
|
AC_ARG_WITH(file-command,
|
||||||
AS_HELP_STRING([--with-file-command],[Specify version of 'file' command (ie: --with-file-command=/usr/local/bin/file)]),
|
AC_HELP_STRING([--with-file-command],
|
||||||
|
[Specify version of 'file' command (ie: --with-file-command=/usr/local/bin/file)]),
|
||||||
withFileCommand=$withval, withFileCommand=file)
|
withFileCommand=$withval, withFileCommand=file)
|
||||||
case $withFileCommand in
|
case $withFileCommand in
|
||||||
file)
|
file)
|
||||||
@ -119,7 +96,8 @@ AC_DEFINE(USE_SYSTEM_FILE_COMMAND, 1,
|
|||||||
# we do compile the aspell module using an internal copy of aspell.h
|
# we do compile the aspell module using an internal copy of aspell.h
|
||||||
# Only --with-aspell=no will completely disable aspell support
|
# Only --with-aspell=no will completely disable aspell support
|
||||||
AC_ARG_WITH(aspell,
|
AC_ARG_WITH(aspell,
|
||||||
AS_HELP_STRING([--without-aspell],[Disable use of aspell spelling package to provide term expansion to other spellings]),
|
AC_HELP_STRING([--without-aspell],
|
||||||
|
[Disable use of aspell spelling package to provide term expansion to other spellings]),
|
||||||
withAspell=$withval, withAspell=yes)
|
withAspell=$withval, withAspell=yes)
|
||||||
case $withAspell in
|
case $withAspell in
|
||||||
no);;
|
no);;
|
||||||
@ -148,7 +126,8 @@ fi
|
|||||||
|
|
||||||
# Real time monitoring with inotify
|
# Real time monitoring with inotify
|
||||||
AC_ARG_WITH(inotify,
|
AC_ARG_WITH(inotify,
|
||||||
AS_HELP_STRING([--with-inotify],[Use inotify for almost real time indexing of modified files (the default
|
AC_HELP_STRING([--with-inotify],
|
||||||
|
[Use inotify for almost real time indexing of modified files (the default
|
||||||
is yes on Linux).]),
|
is yes on Linux).]),
|
||||||
withInotify=$withval, withInotify=$inot_default)
|
withInotify=$withval, withInotify=$inot_default)
|
||||||
|
|
||||||
@ -162,7 +141,8 @@ fi
|
|||||||
|
|
||||||
# Real time monitoring with FAM
|
# Real time monitoring with FAM
|
||||||
AC_ARG_WITH(fam,
|
AC_ARG_WITH(fam,
|
||||||
AS_HELP_STRING([--with-fam],[Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
|
AC_HELP_STRING([--with-fam],
|
||||||
|
[Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
|
||||||
withFam=$withval, withFam=yes)
|
withFam=$withval, withFam=yes)
|
||||||
|
|
||||||
if test X$withFam != Xno -a X$withInotify != Xno ; then
|
if test X$withFam != Xno -a X$withInotify != Xno ; then
|
||||||
@ -226,15 +206,21 @@ if test X$idxthreadsEnabled = Xyes ; then
|
|||||||
AC_DEFINE(IDX_THREADS, 1, [Use multiple threads for indexing])
|
AC_DEFINE(IDX_THREADS, 1, [Use multiple threads for indexing])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(testmains,
|
||||||
|
AC_HELP_STRING([--enable-testmains],
|
||||||
|
[Enable building small test drivers. These are not unit tests.]),
|
||||||
|
buildtestmains=$enableval, buildtestmains=no)
|
||||||
|
AM_CONDITIONAL([COND_TESTMAINS], [test "$buildtestmains" = yes])
|
||||||
|
|
||||||
# Enable CamelCase word splitting. This is optional because it causes
|
# Enable CamelCase word splitting. This is optional because it causes
|
||||||
# problems with phrases: with camelcase enabled, "MySQL manual"
|
# problems with phrases: with camelcase enabled, "MySQL manual"
|
||||||
# will be matched by "MySQL manual" and "my sql manual" but not
|
# will be matched by "MySQL manual" and "my sql manual" but not
|
||||||
# "mysql manual" (which would need increased slack as manual is now at pos
|
# "mysql manual" (which would need increased slack as manual is now at pos
|
||||||
# 2 instead of 1
|
# 2 instead of 1
|
||||||
AC_ARG_ENABLE(camelcase,
|
AC_ARG_ENABLE(camelcase,
|
||||||
AS_HELP_STRING([--enable-camelcase],
|
AC_HELP_STRING([--enable-camelcase],
|
||||||
[Enable splitting camelCase words. This is not enabled by default as
|
[Enable splitting camelCase words. This is not enabled by default as
|
||||||
it makes phrase matches more difficult: you need to use matching
|
this makes phrase matches more difficult: you need to use matching
|
||||||
case in the phrase query to get a match. Ie querying for
|
case in the phrase query to get a match. Ie querying for
|
||||||
"MySQL manual" and "my sql manual" are the same, but not the same as
|
"MySQL manual" and "my sql manual" are the same, but not the same as
|
||||||
"mysql manual" (in phrases only and you could raise the phrase slack to
|
"mysql manual" (in phrases only and you could raise the phrase slack to
|
||||||
@ -244,46 +230,109 @@ if test X$camelcaseEnabled = Xyes ; then
|
|||||||
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
|
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
AC_ARG_ENABLE(testmains,
|
|
||||||
AS_HELP_STRING([--enable-testmains],[Enable building small test drivers. These are not unit tests.]),
|
|
||||||
buildtestmains=$enableval, buildtestmains=no)
|
|
||||||
AM_CONDITIONAL([COND_TESTMAINS], [test "$buildtestmains" = yes])
|
|
||||||
|
|
||||||
AC_ARG_ENABLE(rclgrep,
|
|
||||||
AS_HELP_STRING([--enable-rclgrep],[Enable building the index-less search tool.]),
|
|
||||||
buildrclgrep=$enableval, buildrclgrep=no)
|
|
||||||
AM_CONDITIONAL([COND_RCLGREP], [test "$buildrclgrep" = yes])
|
|
||||||
|
|
||||||
# Disable building the python module.
|
# Disable building the python module.
|
||||||
AC_ARG_ENABLE(python-module,
|
AC_ARG_ENABLE(python-module,
|
||||||
AS_HELP_STRING([--disable-python-module],[Do not build the Python module.]),
|
AC_HELP_STRING([--disable-python-module],
|
||||||
|
[Do not build the Python module.]),
|
||||||
pythonEnabled=$enableval, pythonEnabled=yes)
|
pythonEnabled=$enableval, pythonEnabled=yes)
|
||||||
|
|
||||||
AM_CONDITIONAL(MAKEPYTHON, [test X$pythonEnabled = Xyes])
|
AM_CONDITIONAL(MAKEPYTHON, [test X$pythonEnabled = Xyes])
|
||||||
|
|
||||||
# Disable building the libchm python wrapper
|
# Disable building the libchm python wrapper
|
||||||
AC_ARG_ENABLE(python-chm,
|
AC_ARG_ENABLE(python-chm, AC_HELP_STRING([--disable-python-chm],
|
||||||
AS_HELP_STRING([--disable-python-chm], [Do not build the libchm Python wrapper.]),
|
[Do not build the libchm Python wrapper.]),
|
||||||
pythonChmEnabled=$enableval, pythonChmEnabled=yes)
|
pythonChmEnabled=$enableval, pythonChmEnabled=yes)
|
||||||
|
|
||||||
if test X$pythonChmEnabled = Xyes; then
|
if test X$pythonChmEnabled = Xyes; then
|
||||||
AC_CHECK_LIB([chm], [chm_resolve_object], [],
|
AC_CHECK_LIB([chm], [chm_resolve_object], [],
|
||||||
[AC_MSG_ERROR([--enable-python-chm is set but libchm is not found])])
|
[AC_MSG_ERROR([--enable-python-chm is set but libchm is not found])])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AM_CONDITIONAL(MAKEPYTHONCHM, [test X$pythonChmEnabled = Xyes])
|
AM_CONDITIONAL(MAKEPYTHONCHM, [test X$pythonChmEnabled = Xyes])
|
||||||
|
|
||||||
|
|
||||||
AC_ARG_ENABLE(indexer,
|
AC_CHECK_FUNCS(mkdtemp)
|
||||||
AS_HELP_STRING([--disable-indexer],[Disable building the recollindex indexer.]),
|
AC_CHECK_LIB([pthread], [pthread_create], [], [])
|
||||||
enableINDEXER=$enableval, enableINDEXER="yes")
|
AC_SEARCH_LIBS([dlopen], [dl], [], [])
|
||||||
AM_CONDITIONAL(MAKEINDEXER, [test X$enableINDEXER = Xyes])
|
if test X$ac_cv_search_function != Xno ; then
|
||||||
|
AC_DEFINE(HAVE_DLOPEN, 1, [dlopen function is available])
|
||||||
|
fi
|
||||||
|
AC_CHECK_LIB([z], [zlibVersion], [], [])
|
||||||
|
|
||||||
|
############# Putenv
|
||||||
|
AC_MSG_CHECKING(for type of string parameter to putenv)
|
||||||
|
AC_LANG_PUSH([C++])
|
||||||
|
AC_TRY_COMPILE([
|
||||||
|
#include <stdlib.h>
|
||||||
|
],[
|
||||||
|
putenv((const char *)0);
|
||||||
|
], rcl_putenv_string_const="1", rcl_putenv_string_const="0")
|
||||||
|
if test X$rcl_putenv_string_const = X1 ; then
|
||||||
|
AC_DEFINE(PUTENV_ARG_CONST, 1, [putenv parameter is const])
|
||||||
|
fi
|
||||||
|
AC_LANG_POP([C++])
|
||||||
|
|
||||||
|
|
||||||
|
#### Look for Xapian. Done in a strange way to work around autoconf
|
||||||
|
# cache
|
||||||
|
XAPIAN_CONFIG=${XAPIAN_CONFIG:-no}
|
||||||
|
if test "$XAPIAN_CONFIG" = "no"; then
|
||||||
|
AC_PATH_PROG(XAPIAN_CONFIG0, [xapian-config], no)
|
||||||
|
XAPIAN_CONFIG=$XAPIAN_CONFIG0
|
||||||
|
fi
|
||||||
|
if test "$XAPIAN_CONFIG" = "no"; then
|
||||||
|
AC_PATH_PROG(XAPIAN_CONFIG1, [xapian-config-1.3], no)
|
||||||
|
XAPIAN_CONFIG=$XAPIAN_CONFIG1
|
||||||
|
fi
|
||||||
|
if test "$XAPIAN_CONFIG" = "no"; then
|
||||||
|
AC_PATH_PROG(XAPIAN_CONFIG2, [xapian-config-1.1], no)
|
||||||
|
XAPIAN_CONFIG=$XAPIAN_CONFIG2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$XAPIAN_CONFIG" = "no" ; then
|
||||||
|
AC_MSG_ERROR([Cannot find xapian-config command in $PATH. Is
|
||||||
|
xapian-core installed ?])
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
LIBXAPIAN=`$XAPIAN_CONFIG --libs`
|
||||||
|
# The --static thing fails with older Xapians. Happily enough they don't
|
||||||
|
# need it either (because there are no needed libraries (no uuid and we
|
||||||
|
# deal explicitly with libz)
|
||||||
|
LIBXAPIANSTATICEXTRA=`$XAPIAN_CONFIG --static --libs 2> /dev/null`
|
||||||
|
# Workaround for problem in xapian-config in some versions: wrongly lists
|
||||||
|
# libstdc++.la in the lib list
|
||||||
|
for i in $LIBXAPIAN ; do
|
||||||
|
case $i in
|
||||||
|
*stdc++*|-lm|-lgcc_s|-lc);;
|
||||||
|
*) tmpxaplib="$tmpxaplib $i";;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
LIBXAPIAN=$tmpxaplib
|
||||||
|
LIBXAPIANDIR=`$XAPIAN_CONFIG --libs | awk '{print $1}'`
|
||||||
|
case A"$LIBXAPIANDIR" in
|
||||||
|
A-L*) LIBXAPIANDIR=`echo $LIBXAPIANDIR | sed -e 's/-L//'`;;
|
||||||
|
*) LIBXAPIANDIR="";;
|
||||||
|
esac
|
||||||
|
XAPIANCXXFLAGS=`$XAPIAN_CONFIG --cxxflags`
|
||||||
|
|
||||||
|
#echo XAPIAN_CONFIG: $XAPIAN_CONFIG
|
||||||
|
#echo LIBXAPIAN: $LIBXAPIAN
|
||||||
|
#echo LIBXAPIANDIR: $LIBXAPIANDIR
|
||||||
|
#echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA
|
||||||
|
#echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS
|
||||||
|
|
||||||
|
|
||||||
|
PKG_CHECK_MODULES([XSLT], [libxslt], [], AC_MSG_ERROR([libxslt]))
|
||||||
|
|
||||||
AC_ARG_ENABLE(xadump,
|
AC_ARG_ENABLE(xadump,
|
||||||
AS_HELP_STRING([--enable-xadump],[Enable building the xadump low level Xapian access program.]),
|
AC_HELP_STRING([--enable-xadump],
|
||||||
|
[Enable building the xadump low level Xapian access program.]),
|
||||||
enableXADUMP=$enableval, enableXADUMP="no")
|
enableXADUMP=$enableval, enableXADUMP="no")
|
||||||
AM_CONDITIONAL(MAKEXADUMP, [test X$enableXADUMP = Xyes])
|
AM_CONDITIONAL(MAKEXADUMP, [test X$enableXADUMP = Xyes])
|
||||||
|
|
||||||
AC_ARG_ENABLE(userdoc,
|
AC_ARG_ENABLE(userdoc,
|
||||||
AS_HELP_STRING([--disable-userdoc],[Disable building the user manual. (Avoids the need for docbook xml/xsl files and TeX tools.]),
|
AC_HELP_STRING([--disable-userdoc],
|
||||||
|
[Disable building the user manual. (Avoids the need for docbook xml/xsl files and TeX tools.]),
|
||||||
enableUserdoc=$enableval, enableUserdoc="yes")
|
enableUserdoc=$enableval, enableUserdoc="yes")
|
||||||
AM_CONDITIONAL(MAKEUSERDOC, [test X$enableUserdoc = Xyes])
|
AM_CONDITIONAL(MAKEUSERDOC, [test X$enableUserdoc = Xyes])
|
||||||
|
|
||||||
@ -311,12 +360,14 @@ AM_CONDITIONAL(MAKEUSERDOC, [test X$enableUserdoc = Xyes])
|
|||||||
# will have failed, and we tell the user to check his environment.
|
# will have failed, and we tell the user to check his environment.
|
||||||
#
|
#
|
||||||
AC_ARG_ENABLE(qtgui,
|
AC_ARG_ENABLE(qtgui,
|
||||||
AS_HELP_STRING([--disable-qtgui],[Disable the QT-based graphical user interface.]),
|
AC_HELP_STRING([--disable-qtgui],
|
||||||
|
[Disable the QT-based graphical user interface.]),
|
||||||
enableQT=$enableval, enableQT="yes")
|
enableQT=$enableval, enableQT="yes")
|
||||||
AM_CONDITIONAL(MAKEQT, [test X$enableQT = Xyes])
|
AM_CONDITIONAL(MAKEQT, [test X$enableQT = Xyes])
|
||||||
|
|
||||||
AC_ARG_ENABLE(recollq,
|
AC_ARG_ENABLE(recollq,
|
||||||
AS_HELP_STRING([--enable-recollq],[Enable building the recollq command line query tool (recoll -t without
|
AC_HELP_STRING([--enable-recollq],
|
||||||
|
[Enable building the recollq command line query tool (recoll -t without
|
||||||
need for Qt). This is done by default if --disable-qtgui is set but this
|
need for Qt). This is done by default if --disable-qtgui is set but this
|
||||||
option enables forcing it.]),
|
option enables forcing it.]),
|
||||||
enableRECOLLQ=$enableval, enableRECOLLQ="no")
|
enableRECOLLQ=$enableval, enableRECOLLQ="no")
|
||||||
@ -347,11 +398,28 @@ if test X$enableQT = Xyes ; then
|
|||||||
qt development files and tools and/or set the QTDIR environment variable?])
|
qt development files and tools and/or set the QTDIR environment variable?])
|
||||||
fi
|
fi
|
||||||
QMAKE=$QMAKEPATH
|
QMAKE=$QMAKEPATH
|
||||||
QTGUI=qtgui
|
|
||||||
|
# Check Qt version
|
||||||
|
qmakevers="`${QMAKE} --version 2>&1`"
|
||||||
|
#echo "qmake version: $qmakevers"
|
||||||
|
v4=`expr "$qmakevers" : '.*Qt[ ][ ]*version[ ][ ]*4.*'`
|
||||||
|
v5=`expr "$qmakevers" : '.*Qt[ ][ ]*version[ ][ ]*5.*'`
|
||||||
|
if test X$v4 = X0 -a X$v5 = X0; then
|
||||||
|
AC_MSG_ERROR([Bad qt/qmake version string (not 4 or 5?): $qmakevers])
|
||||||
|
else
|
||||||
|
if test X$v4 != X0 ; then
|
||||||
|
AC_MSG_ERROR([Qt version (from qmake found with QMAKE/QTDIR/PATH) is 4 but Recoll now needs version 5])
|
||||||
|
else
|
||||||
|
AC_MSG_NOTICE([using qt version 5 user interface])
|
||||||
|
fi
|
||||||
|
QTGUI=qtgui
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
##### Using Qt webkit for reslist display? Else Qt textbrowser
|
##### Using Qt webkit for reslist display? Else Qt textbrowser
|
||||||
AC_ARG_ENABLE(webkit,
|
AC_ARG_ENABLE(webkit,
|
||||||
AS_HELP_STRING([--disable-webkit],[Disable use of qt-webkit (only meaningful if qtgui is enabled).]),
|
AC_HELP_STRING([--disable-webkit],
|
||||||
|
[Disable use of qt-webkit (only meaningful if qtgui is enabled).]),
|
||||||
enableWebkit=$enableval, enableWebkit="yes")
|
enableWebkit=$enableval, enableWebkit="yes")
|
||||||
|
|
||||||
if test "$enableWebkit" = "yes" ; then
|
if test "$enableWebkit" = "yes" ; then
|
||||||
@ -363,7 +431,8 @@ if test X$enableQT = Xyes ; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
AC_ARG_ENABLE(webengine,
|
AC_ARG_ENABLE(webengine,
|
||||||
AS_HELP_STRING([--enable-webengine],[Enable use of qt-webengine (only meaningful if qtgui is enabled), in
|
AC_HELP_STRING([--enable-webengine],
|
||||||
|
[Enable use of qt-webengine (only meaningful if qtgui is enabled), in
|
||||||
place or qt-webkit.]),
|
place or qt-webkit.]),
|
||||||
enableWebengine=$enableval, enableWebengine="no")
|
enableWebengine=$enableval, enableWebengine="no")
|
||||||
|
|
||||||
@ -379,7 +448,8 @@ if test X$enableQT = Xyes ; then
|
|||||||
|
|
||||||
##### Using QZeitGeist lib ? Default no for now
|
##### Using QZeitGeist lib ? Default no for now
|
||||||
AC_ARG_WITH(qzeitgeist,
|
AC_ARG_WITH(qzeitgeist,
|
||||||
AS_HELP_STRING([--with-qzeitgeist],[Enable the use of the qzeitgeist library to send zeitgeist events.]),
|
AC_HELP_STRING([--with-qzeitgeist],
|
||||||
|
[Enable the use of the qzeitgeist library to send zeitgeist events.]),
|
||||||
withQZeitgeist=$withval, withQZeitgeist="no")
|
withQZeitgeist=$withval, withQZeitgeist="no")
|
||||||
|
|
||||||
case "$withQZeitgeist" in
|
case "$withQZeitgeist" in
|
||||||
@ -396,73 +466,22 @@ if test X$enableQT = Xyes ; then
|
|||||||
QMAKE_DISABLE_ZEITGEIST=""
|
QMAKE_DISABLE_ZEITGEIST=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Retain debugging symbols in GUI recoll ? This makes it enormous (~50MB)
|
|
||||||
AC_ARG_ENABLE(guidebug,
|
|
||||||
AS_HELP_STRING([--enable-guidebug],[Generate and retain debug symbols in GUI program (makes the file very big).]),
|
|
||||||
enableGuiDebug=$enableval, enableGuiDebug="no")
|
|
||||||
|
|
||||||
if test "$enableGuiDebug" = "yes" ; then
|
|
||||||
QMAKE_ENABLE_GUIDEBUG=""
|
|
||||||
else
|
|
||||||
QMAKE_ENABLE_GUIDEBUG="#"
|
|
||||||
fi
|
|
||||||
|
|
||||||
AC_CONFIG_FILES($QTGUI/recoll.pro)
|
AC_CONFIG_FILES($QTGUI/recoll.pro)
|
||||||
|
|
||||||
##################### End QT stuff
|
##################### End QT stuff
|
||||||
fi
|
fi
|
||||||
|
|
||||||
dnl Borrow a macro definition from pkg.config,
|
|
||||||
dnl for older installs that lack it.
|
|
||||||
m4_ifndef([PKG_CHECK_VAR], [
|
|
||||||
dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
|
|
||||||
dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
|
|
||||||
dnl -------------------------------------------
|
|
||||||
dnl Retrieves the value of the pkg-config variable for the given module.
|
|
||||||
AC_DEFUN([PKG_CHECK_VAR],
|
|
||||||
[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
|
|
||||||
AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
|
|
||||||
|
|
||||||
_PKG_CONFIG([$1], [variable="][$3]["], [$2])
|
|
||||||
AS_VAR_COPY([$1], [pkg_cv_][$1])
|
|
||||||
|
|
||||||
AS_VAR_IF([$1], [""], [$5], [$4])dnl
|
|
||||||
])dnl PKG_CHECK_VAR
|
|
||||||
])
|
|
||||||
|
|
||||||
### Systemd
|
|
||||||
AC_ARG_WITH([systemd],
|
|
||||||
AS_HELP_STRING([--without-systemd],[Disable installation of the systemd unit files.]))
|
|
||||||
AC_ARG_WITH([system-unit-dir],
|
|
||||||
AS_HELP_STRING([--with-system-unit-dir=DIR],[Install location for systemd system unit files]),
|
|
||||||
[SYSTEMD_SYSTEM_UNIT_DIR="$withval"],
|
|
||||||
[PKG_CHECK_VAR([SYSTEMD_SYSTEM_UNIT_DIR], [systemd], [systemdsystemunitdir])])
|
|
||||||
AC_ARG_WITH([user-unit-dir],
|
|
||||||
AS_HELP_STRING([--with-user-unit-dir=DIR],[Install location for systemd user unit files]),
|
|
||||||
[SYSTEMD_USER_UNIT_DIR="$withval"],
|
|
||||||
[PKG_CHECK_VAR([SYSTEMD_USER_UNIT_DIR], [systemd], [systemduserunitdir])])
|
|
||||||
|
|
||||||
if test X$enableINDEXER = Xno -o "x$SYSTEMD_SYSTEM_UNIT_DIR" = "x" -o \
|
|
||||||
"x$SYSTEMD_USER_UNIT_DIR" = "x"; then
|
|
||||||
with_systemd="no"
|
|
||||||
fi
|
|
||||||
|
|
||||||
AM_CONDITIONAL([INSTALL_SYSTEMD_UNITS], [test "X$with_systemd" != "Xno"])
|
|
||||||
|
|
||||||
### X11: this is needed for the session monitoring code (in recollindex -m)
|
### X11: this is needed for the session monitoring code (in recollindex -m)
|
||||||
AC_ARG_ENABLE(x11mon,
|
AC_ARG_ENABLE(x11mon,
|
||||||
AS_HELP_STRING([--disable-x11mon],[Disable recollindex support for X11 session monitoring.]),
|
AC_HELP_STRING([--disable-x11mon],
|
||||||
|
[Disable recollindex support for X11 session monitoring.]),
|
||||||
enableX11mon=$enableval, enableX11mon="yes")
|
enableX11mon=$enableval, enableX11mon="yes")
|
||||||
|
|
||||||
if test X$enableINDEXER = Xno ; then
|
if test X$withInotify = Xno -a X$withFam = Xno ; then
|
||||||
enableX11mon=no
|
enableX11mon=no
|
||||||
else
|
|
||||||
if test X$withInotify = Xno -a X$withFam = Xno ; then
|
|
||||||
enableX11mon=no
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enableX11mon" = yes ; then
|
if test "$enableX11mon" = "yes" ; then
|
||||||
AC_PATH_XTRA
|
AC_PATH_XTRA
|
||||||
X_LIBX11=-lX11
|
X_LIBX11=-lX11
|
||||||
else
|
else
|
||||||
@ -472,17 +491,6 @@ fi
|
|||||||
#echo X_CFLAGS "'$X_CFLAGS'" X_PRE_LIBS "'$X_PRE_LIBS'" X_LIBS \
|
#echo X_CFLAGS "'$X_CFLAGS'" X_PRE_LIBS "'$X_PRE_LIBS'" X_LIBS \
|
||||||
# "'$X_LIBS'" X_LIBX11 "'$X_LIBX11'" X_EXTRA_LIBS "'$X_EXTRA_LIBS'"
|
# "'$X_LIBS'" X_LIBX11 "'$X_LIBX11'" X_EXTRA_LIBS "'$X_EXTRA_LIBS'"
|
||||||
|
|
||||||
# Check if anything needs Xapian. We also need to build the shared lib if this is the case.
|
|
||||||
xapian_needed=yes
|
|
||||||
if test X$buildtestmains = Xno -a X$pythonEnabled = Xno -a X$enableINDEXER = Xno \
|
|
||||||
-a X$enableXADUMP = Xno -a X$enableQT = Xno -a X$enableRECOLLQ = Xno ; then
|
|
||||||
xapian_needed=no
|
|
||||||
fi
|
|
||||||
if test X$xapian_needed = Xyes; then
|
|
||||||
PKG_CHECK_MODULES([XAPIAN], xapian-core, [], AC_MSG_ERROR([libxapian]))
|
|
||||||
fi
|
|
||||||
AM_CONDITIONAL([MAKE_RECOLL_LIB], [test X$xapian_needed = Xyes])
|
|
||||||
|
|
||||||
# For communicating the value of RECOLL_DATADIR to non-make-based
|
# For communicating the value of RECOLL_DATADIR to non-make-based
|
||||||
# subpackages like python-recoll, we have to expand prefix in here, because
|
# subpackages like python-recoll, we have to expand prefix in here, because
|
||||||
# things like "datadir = ${prefix}/share" (which is what we'd get by
|
# things like "datadir = ${prefix}/share" (which is what we'd get by
|
||||||
@ -506,17 +514,17 @@ AC_SUBST(X_LIBX11)
|
|||||||
AC_SUBST(X_EXTRA_LIBS)
|
AC_SUBST(X_EXTRA_LIBS)
|
||||||
AC_SUBST(INCICONV)
|
AC_SUBST(INCICONV)
|
||||||
AC_SUBST(LIBICONV)
|
AC_SUBST(LIBICONV)
|
||||||
AC_SUBST(XAPIAN_LIBS)
|
AC_SUBST(LIBXAPIAN)
|
||||||
AC_SUBST(XAPIAN_CFLAGS)
|
AC_SUBST(LIBXAPIANDIR)
|
||||||
|
AC_SUBST(LIBXAPIANSTATICEXTRA)
|
||||||
AC_SUBST(LIBFAM)
|
AC_SUBST(LIBFAM)
|
||||||
AC_SUBST(QMAKE)
|
AC_SUBST(QMAKE)
|
||||||
AC_SUBST(QTGUI)
|
AC_SUBST(QTGUI)
|
||||||
|
AC_SUBST(XAPIANCXXFLAGS)
|
||||||
AC_SUBST(QMAKE_ENABLE_WEBKIT)
|
AC_SUBST(QMAKE_ENABLE_WEBKIT)
|
||||||
AC_SUBST(QMAKE_DISABLE_WEBKIT)
|
AC_SUBST(QMAKE_DISABLE_WEBKIT)
|
||||||
AC_SUBST(QMAKE_ENABLE_WEBENGINE)
|
AC_SUBST(QMAKE_ENABLE_WEBENGINE)
|
||||||
AC_SUBST(QMAKE_DISABLE_WEBENGINE)
|
AC_SUBST(QMAKE_DISABLE_WEBENGINE)
|
||||||
AC_SUBST(QMAKE_ENABLE_GUIDEBUG)
|
|
||||||
AC_SUBST(QMAKE_DISABLE_GUIDEBUG)
|
|
||||||
AC_SUBST(QMAKE_ENABLE_ZEITGEIST)
|
AC_SUBST(QMAKE_ENABLE_ZEITGEIST)
|
||||||
AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
|
AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
|
||||||
AC_SUBST(LIBQZEITGEIST)
|
AC_SUBST(LIBQZEITGEIST)
|
||||||
@ -524,8 +532,6 @@ AC_SUBST(RCLVERSION)
|
|||||||
AC_SUBST(RCLLIBVERSION)
|
AC_SUBST(RCLLIBVERSION)
|
||||||
AC_SUBST(XSLT_CFLAGS)
|
AC_SUBST(XSLT_CFLAGS)
|
||||||
AC_SUBST(XSLT_LIBS)
|
AC_SUBST(XSLT_LIBS)
|
||||||
AC_SUBST([SYSTEMD_SYSTEM_UNIT_DIR])
|
|
||||||
AC_SUBST([SYSTEMD_USER_UNIT_DIR])
|
|
||||||
|
|
||||||
AC_CONFIG_FILES([Makefile python/recoll/setup.py
|
AC_CONFIG_FILES([Makefile python/recoll/setup.py
|
||||||
python/pychm/setup.py])
|
python/pychm/setup.py])
|
||||||
@ -533,8 +539,5 @@ AC_CONFIG_FILES([Makefile python/recoll/setup.py
|
|||||||
if test X$buildtestmains = Xyes ; then
|
if test X$buildtestmains = Xyes ; then
|
||||||
AC_CONFIG_FILES([testmains/Makefile])
|
AC_CONFIG_FILES([testmains/Makefile])
|
||||||
fi
|
fi
|
||||||
if test X$buildrclgrep = Xyes ; then
|
|
||||||
AC_CONFIG_FILES([rclgrep/Makefile])
|
|
||||||
fi
|
|
||||||
|
|
||||||
AC_OUTPUT
|
AC_OUTPUT
|
||||||
|
|||||||
@ -1,12 +0,0 @@
|
|||||||
.TH RCLGREP 1 "20 September 2022"
|
|
||||||
.SH NAME
|
|
||||||
rclgrep \- grep-like program based on the recoll data extraction functions
|
|
||||||
.SH SYNOPSIS
|
|
||||||
.B rclgrep
|
|
||||||
[
|
|
||||||
.B \--config
|
|
||||||
<configdir>
|
|
||||||
]
|
|
||||||
|
|
||||||
.SH DESCRIPTION
|
|
||||||
Some bla bla
|
|
||||||
@ -148,7 +148,7 @@ not set, the daemon uses skippedPaths.
|
|||||||
.TP
|
.TP
|
||||||
.BI "zipUseSkippedNames = "bool
|
.BI "zipUseSkippedNames = "bool
|
||||||
Use skippedNames inside Zip archives. Fetched
|
Use skippedNames inside Zip archives. Fetched
|
||||||
directly by the rclzip.py handler. Skip the patterns defined by skippedNames
|
directly by the rclzip handler. Skip the patterns defined by skippedNames
|
||||||
inside Zip archives. Can be redefined for subdirectories.
|
inside Zip archives. Can be redefined for subdirectories.
|
||||||
See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
||||||
|
|
||||||
@ -195,7 +195,7 @@ lets you turn off md5 computation for selected types. It is global (no
|
|||||||
redefinition for subtrees). At the moment, it only has an effect for
|
redefinition for subtrees). At the moment, it only has an effect for
|
||||||
external handlers (exec and execm). The file types can be specified by
|
external handlers (exec and execm). The file types can be specified by
|
||||||
listing either MIME types (e.g. audio/mpeg) or handler names
|
listing either MIME types (e.g. audio/mpeg) or handler names
|
||||||
(e.g. rclaudio.py).
|
(e.g. rclaudio).
|
||||||
.TP
|
.TP
|
||||||
.BI "compressedfilemaxkbs = "int
|
.BI "compressedfilemaxkbs = "int
|
||||||
Size limit for compressed
|
Size limit for compressed
|
||||||
@ -613,7 +613,8 @@ location before copy, to allow path translation computations. For
|
|||||||
example if a dataset originally indexed as '/home/me/mydata/config' has
|
example if a dataset originally indexed as '/home/me/mydata/config' has
|
||||||
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
||||||
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
||||||
curidxconfdir (as set in the copied configuration) would be '/media/me/mydata/config'.
|
curidxconfdir (as set in the copied configuration) would be
|
||||||
|
'/media/me/mydata/config'.
|
||||||
.TP
|
.TP
|
||||||
.BI "idxrundir = "dfn
|
.BI "idxrundir = "dfn
|
||||||
Indexing process current directory. The input
|
Indexing process current directory. The input
|
||||||
|
|||||||
@ -59,10 +59,6 @@ recollq \- command line / standard output Recoll query command.
|
|||||||
.B \-F
|
.B \-F
|
||||||
<quoted space separated field name list>
|
<quoted space separated field name list>
|
||||||
]
|
]
|
||||||
[
|
|
||||||
.B \--extract-to
|
|
||||||
<file path>
|
|
||||||
]
|
|
||||||
<query string>
|
<query string>
|
||||||
|
|
||||||
.B recollq \-P
|
.B recollq \-P
|
||||||
@ -124,10 +120,9 @@ sorts the results according to the specified field. Use
|
|||||||
for descending order.
|
for descending order.
|
||||||
.PP
|
.PP
|
||||||
.B \-n
|
.B \-n
|
||||||
<[first-]cnt>
|
<cnt>
|
||||||
can be used to set the maximum number of results that should be
|
can be used to set the maximum number of results that should be
|
||||||
printed. The default is 2000. Use a value of 0 for no limit. If the argument is of the form
|
printed. The default is 2000. Use a value of 0 for no limit.
|
||||||
first-cnt, it also defines the first result to output (from 0).
|
|
||||||
.PP
|
.PP
|
||||||
.B \-s
|
.B \-s
|
||||||
<language>
|
<language>
|
||||||
@ -149,11 +144,6 @@ base64 and separated by one space character. Empty fields are indicated by
|
|||||||
consecutive space characters. There is one additional space character at
|
consecutive space characters. There is one additional space character at
|
||||||
the end of each line.
|
the end of each line.
|
||||||
.PP
|
.PP
|
||||||
.B \--extract-to
|
|
||||||
<file path>
|
|
||||||
Will extract the first result document of the query to the argument path, which must not exist. Use
|
|
||||||
-n first-cnt to select the document.
|
|
||||||
.PP
|
|
||||||
.B recollq \-P
|
.B recollq \-P
|
||||||
(Period) will print the minimum and maximum modification years for
|
(Period) will print the minimum and maximum modification years for
|
||||||
documents in the index.
|
documents in the index.
|
||||||
|
|||||||
@ -13,7 +13,6 @@
|
|||||||
#XSLDIR="/opt/local/share/xsl/docbook-xsl/"
|
#XSLDIR="/opt/local/share/xsl/docbook-xsl/"
|
||||||
#Linux
|
#Linux
|
||||||
XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"
|
XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"
|
||||||
UTILBUILDS=/home/dockes/tmp/builds/medocutils/
|
|
||||||
|
|
||||||
|
|
||||||
# Options common to the single-file and chunked versions
|
# Options common to the single-file and chunked versions
|
||||||
@ -49,10 +48,11 @@ index.html: usermanual.xml recoll.conf.xml
|
|||||||
usermanual.pdf: usermanual.xml recoll.conf.xml
|
usermanual.pdf: usermanual.xml recoll.conf.xml
|
||||||
dblatex --xslt-opts="--xinclude" -tpdf $<
|
dblatex --xslt-opts="--xinclude" -tpdf $<
|
||||||
|
|
||||||
recoll.conf.xml: ../../sampleconf/recoll.conf
|
UTILBUILDS=/home/dockes/tmp/builds/medocutils/
|
||||||
test -x $(UTILBUILDS)/confxml && $(UTILBUILDS)/confxml --docbook \
|
recoll-conf-xml:
|
||||||
|
$(UTILBUILDS)/confxml --docbook \
|
||||||
--idprefix=RCL.INSTALL.CONFIG.RECOLLCONF \
|
--idprefix=RCL.INSTALL.CONFIG.RECOLLCONF \
|
||||||
../../sampleconf/recoll.conf > recoll.conf.xml || touch recoll.conf.xml
|
../../sampleconf/recoll.conf > recoll.conf.xml
|
||||||
|
|
||||||
# Generating a restructured text version, for uploading to readthedocs.
|
# Generating a restructured text version, for uploading to readthedocs.
|
||||||
# Does not really work, the internal links are botched. pandoc
|
# Does not really work, the internal links are botched. pandoc
|
||||||
@ -65,7 +65,7 @@ recoll.conf.xml: ../../sampleconf/recoll.conf
|
|||||||
# script.
|
# script.
|
||||||
# Also could not get readthedocs to generate the left pane TOC? could
|
# Also could not get readthedocs to generate the left pane TOC? could
|
||||||
# probably be fixed...
|
# probably be fixed...
|
||||||
#usermanual-rst: recoll.conf.xml
|
#usermanual-rst: recoll-conf-xml
|
||||||
# tail -n +2 recoll.conf.xml > rcl-conf-tail.xml
|
# tail -n +2 recoll.conf.xml > rcl-conf-tail.xml
|
||||||
# sed -e '/xi:include/r rcl-conf-tail.xml' \
|
# sed -e '/xi:include/r rcl-conf-tail.xml' \
|
||||||
# < usermanual.xml > full-man.xml
|
# < usermanual.xml > full-man.xml
|
||||||
|
|||||||
@ -8,34 +8,28 @@
|
|||||||
<listitem><para>Space-separated list of files or
|
<listitem><para>Space-separated list of files or
|
||||||
directories to recursively index. Default to ~ (indexes
|
directories to recursively index. Default to ~ (indexes
|
||||||
$HOME). You can use symbolic links in the list, they will be followed,
|
$HOME). You can use symbolic links in the list, they will be followed,
|
||||||
independently of the value of the followLinks variable.
|
independently of the value of the followLinks variable.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONITORDIRS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONITORDIRS">
|
||||||
<term><varname>monitordirs</varname></term>
|
<term><varname>monitordirs</varname></term>
|
||||||
<listitem><para>Space-separated list of files or directories to monitor for
|
<listitem><para>Space-separated list of files or directories to monitor for
|
||||||
updates. When running the real-time indexer, this allows monitoring only a
|
updates. When running the real-time indexer, this allows monitoring only a
|
||||||
subset of the whole indexed area. The elements must be included in the
|
subset of the whole indexed area. The elements must be included in the
|
||||||
tree defined by the 'topdirs' members.
|
tree defined by the 'topdirs' members.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES">
|
||||||
<term><varname>skippedNames</varname></term>
|
<term><varname>skippedNames</varname></term>
|
||||||
<listitem><para>Files and directories which should be ignored. White space separated list of wildcard patterns (simple ones, not paths, must contain no
|
<listitem><para>Files and directories which should be ignored.
|
||||||
'/' characters), which will be tested against file and directory names.
|
White space separated list of wildcard patterns (simple ones, not paths,
|
||||||
</para><para>
|
must contain no / ), which will be tested against file and directory
|
||||||
Have a look at the default configuration for the initial value, some entries may not suit your
|
names. The list in the default configuration does not exclude hidden
|
||||||
situation. The easiest way to see it is through the GUI Index configuration "local parameters"
|
directories (names beginning with a dot), which means that it may index
|
||||||
panel.
|
quite a few things that you do not want. On the other hand, email user
|
||||||
</para><para>
|
agents like Thunderbird usually store messages in hidden directories, and
|
||||||
The list in the default configuration does not exclude hidden directories (names beginning with a
|
you probably want this indexed. One possible solution is to have ".*" in
|
||||||
dot), which means that it may index quite a few things that you do not want. On the other hand,
|
"skippedNames", and add things like "~/.thunderbird" "~/.evolution" to
|
||||||
email user agents like Thunderbird usually store messages in hidden directories, and you probably
|
"topdirs". Not even the file names are indexed for patterns in this
|
||||||
want this indexed. One possible solution is to have ".*" in "skippedNames", and add things like
|
list, see the "noContentSuffixes" variable for an alternative approach
|
||||||
"~/.thunderbird" "~/.evolution" to "topdirs".
|
which indexes the file names. Can be redefined for any
|
||||||
</para><para>
|
subtree.</para></listitem></varlistentry>
|
||||||
Not even the file names are indexed for patterns in this list, see the "noContentSuffixes"
|
|
||||||
variable for an alternative approach which indexes the file names. Can be redefined for any
|
|
||||||
subtree.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES-">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDNAMES-">
|
||||||
<term><varname>skippedNames-</varname></term>
|
<term><varname>skippedNames-</varname></term>
|
||||||
<listitem><para>List of name endings to remove from the default skippedNames
|
<listitem><para>List of name endings to remove from the default skippedNames
|
||||||
@ -48,8 +42,7 @@ list. </para></listitem></varlistentry>
|
|||||||
<term><varname>onlyNames</varname></term>
|
<term><varname>onlyNames</varname></term>
|
||||||
<listitem><para>Regular file name filter patterns If this is set, only the file names not in skippedNames and
|
<listitem><para>Regular file name filter patterns If this is set, only the file names not in skippedNames and
|
||||||
matching one of the patterns will be considered for indexing. Can be
|
matching one of the patterns will be considered for indexing. Can be
|
||||||
redefined per subtree. Does not apply to directories.
|
redefined per subtree. Does not apply to directories.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES">
|
||||||
<term><varname>noContentSuffixes</varname></term>
|
<term><varname>noContentSuffixes</varname></term>
|
||||||
<listitem><para>List of name endings (not necessarily dot-separated suffixes) for
|
<listitem><para>List of name endings (not necessarily dot-separated suffixes) for
|
||||||
@ -60,8 +53,7 @@ which will go away in a future release (the move from mimemap to
|
|||||||
recoll.conf allows editing the list through the GUI). This is different
|
recoll.conf allows editing the list through the GUI). This is different
|
||||||
from skippedNames because these are name ending matches only (not
|
from skippedNames because these are name ending matches only (not
|
||||||
wildcard patterns), and the file name itself gets indexed normally. This
|
wildcard patterns), and the file name itself gets indexed normally. This
|
||||||
can be redefined for subdirectories.
|
can be redefined for subdirectories.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES-">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCONTENTSUFFIXES-">
|
||||||
<term><varname>noContentSuffixes-</varname></term>
|
<term><varname>noContentSuffixes-</varname></term>
|
||||||
<listitem><para>List of name endings to remove from the default noContentSuffixes
|
<listitem><para>List of name endings to remove from the default noContentSuffixes
|
||||||
@ -72,26 +64,19 @@ list. </para></listitem></varlistentry>
|
|||||||
list. </para></listitem></varlistentry>
|
list. </para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHS">
|
||||||
<term><varname>skippedPaths</varname></term>
|
<term><varname>skippedPaths</varname></term>
|
||||||
<listitem><para>Absolute paths we should not go into. Space-separated list of wildcard expressions for absolute filesystem paths (for files or
|
<listitem><para>Absolute paths we should not go into. Space-separated list of wildcard expressions for absolute
|
||||||
directories). The variable must be defined at the top level of the configuration file, not in a
|
filesystem paths. Must be defined at the top level of the configuration
|
||||||
subsection.
|
file, not in a subsection. Can contain files and directories. The database and
|
||||||
</para><para>
|
configuration directories will automatically be added. The expressions
|
||||||
Any value in the list must be textually consistent with the values in topdirs, no attempts are
|
are matched using 'fnmatch(3)' with the FNM_PATHNAME flag set by
|
||||||
made to resolve symbolic links. In practise, if, as is frequently the case, /home is a link to
|
default. This means that '/' characters must be matched explicitly. You
|
||||||
/usr/home, your default topdirs will have a single entry '~' which will be translated to
|
can set 'skippedPathsFnmPathname' to 0 to disable the use of FNM_PATHNAME
|
||||||
'/home/yourlogin'. In this case, any skippedPaths entry should start with '/home/yourlogin' *not*
|
(meaning that '/*/dir3' will match '/dir1/dir2/dir3'). The default value
|
||||||
with '/usr/home/yourlogin'.
|
contains the usual mount point for removable media to remind you that it
|
||||||
</para><para>
|
is a bad idea to have Recoll work on these (esp. with the monitor: media
|
||||||
The index and configuration directories will automatically be added to the list.
|
gets indexed on mount, all data gets erased on unmount). Explicitly
|
||||||
</para><para>
|
adding '/media/xxx' to the 'topdirs' variable will override
|
||||||
The expressions are matched using 'fnmatch(3)' with the FNM_PATHNAME flag set by default. This
|
this.</para></listitem></varlistentry>
|
||||||
means that '/' characters must be matched explicitly. You can set 'skippedPathsFnmPathname' to 0
|
|
||||||
to disable the use of FNM_PATHNAME (meaning that '/*/dir3' will match '/dir1/dir2/dir3').
|
|
||||||
</para><para>
|
|
||||||
The default value contains the usual mount point for removable media to remind you that it is in
|
|
||||||
most cases a bad idea to have Recoll work on these Explicitly adding '/media/xxx' to the 'topdirs'
|
|
||||||
variable will override this.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHSFNMPATHNAME">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SKIPPEDPATHSFNMPATHNAME">
|
||||||
<term><varname>skippedPathsFnmPathname</varname></term>
|
<term><varname>skippedPathsFnmPathname</varname></term>
|
||||||
<listitem><para>Set to 0 to
|
<listitem><para>Set to 0 to
|
||||||
@ -100,19 +85,17 @@ paths. </para></listitem></varlistentry>
|
|||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOWALKFN">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOWALKFN">
|
||||||
<term><varname>nowalkfn</varname></term>
|
<term><varname>nowalkfn</varname></term>
|
||||||
<listitem><para>File name which will cause its parent directory to be skipped. Any directory containing a file with this name will be skipped as
|
<listitem><para>File name which will cause its parent directory to be skipped. Any directory containing a file with this name will be skipped as
|
||||||
if it was part of the skippedPaths list. Ex: .recoll-noindex
|
if it was part of the skippedPaths list. Ex: .recoll-noindex</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMSKIPPEDPATHS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMSKIPPEDPATHS">
|
||||||
<term><varname>daemSkippedPaths</varname></term>
|
<term><varname>daemSkippedPaths</varname></term>
|
||||||
<listitem><para>skippedPaths equivalent specific to
|
<listitem><para>skippedPaths equivalent specific to
|
||||||
real time indexing. This enables having parts of the tree
|
real time indexing. This enables having parts of the tree
|
||||||
which are initially indexed but not monitored. If daemSkippedPaths is
|
which are initially indexed but not monitored. If daemSkippedPaths is
|
||||||
not set, the daemon uses skippedPaths.
|
not set, the daemon uses skippedPaths.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ZIPUSESKIPPEDNAMES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ZIPUSESKIPPEDNAMES">
|
||||||
<term><varname>zipUseSkippedNames</varname></term>
|
<term><varname>zipUseSkippedNames</varname></term>
|
||||||
<listitem><para>Use skippedNames inside Zip archives. Fetched
|
<listitem><para>Use skippedNames inside Zip archives. Fetched
|
||||||
directly by the rclzip.py handler. Skip the patterns defined by skippedNames
|
directly by the rclzip handler. Skip the patterns defined by skippedNames
|
||||||
inside Zip archives. Can be redefined for subdirectories.
|
inside Zip archives. Can be redefined for subdirectories.
|
||||||
See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
See https://www.lesbonscomptes.com/recoll/faqsandhowtos/FilteringOutZipArchiveMembers.html
|
||||||
</para></listitem></varlistentry>
|
</para></listitem></varlistentry>
|
||||||
@ -134,8 +117,7 @@ multiple indexing of linked files. No effort is made to avoid duplication
|
|||||||
when this option is set to true. This option can be set individually for
|
when this option is set to true. This option can be set individually for
|
||||||
each of the 'topdirs' members by using sections. It can not be changed
|
each of the 'topdirs' members by using sections. It can not be changed
|
||||||
below the 'topdirs' level. Links in the 'topdirs' list itself are always
|
below the 'topdirs' level. Links in the 'topdirs' list itself are always
|
||||||
followed.
|
followed.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXEDMIMETYPES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXEDMIMETYPES">
|
||||||
<term><varname>indexedmimetypes</varname></term>
|
<term><varname>indexedmimetypes</varname></term>
|
||||||
<listitem><para>Restrictive list of
|
<listitem><para>Restrictive list of
|
||||||
@ -144,16 +126,14 @@ supported types are indexed). If it is set, only the types from the list
|
|||||||
will have their contents indexed. The names will be indexed anyway if
|
will have their contents indexed. The names will be indexed anyway if
|
||||||
indexallfilenames is set (default). MIME type names should be taken from
|
indexallfilenames is set (default). MIME type names should be taken from
|
||||||
the mimemap file (the values may be different from xdg-mime or file -i
|
the mimemap file (the values may be different from xdg-mime or file -i
|
||||||
output in some cases). Can be redefined for subtrees.
|
output in some cases). Can be redefined for subtrees.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.EXCLUDEDMIMETYPES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.EXCLUDEDMIMETYPES">
|
||||||
<term><varname>excludedmimetypes</varname></term>
|
<term><varname>excludedmimetypes</varname></term>
|
||||||
<listitem><para>List of excluded MIME
|
<listitem><para>List of excluded MIME
|
||||||
types. Lets you exclude some types from indexing. MIME type
|
types. Lets you exclude some types from indexing. MIME type
|
||||||
names should be taken from the mimemap file (the values may be different
|
names should be taken from the mimemap file (the values may be different
|
||||||
from xdg-mime or file -i output in some cases) Can be redefined for
|
from xdg-mime or file -i output in some cases) Can be redefined for
|
||||||
subtrees.
|
subtrees.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOMD5TYPES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOMD5TYPES">
|
||||||
<term><varname>nomd5types</varname></term>
|
<term><varname>nomd5types</varname></term>
|
||||||
<listitem><para>Don't compute md5 for these types. md5 checksums are used only for deduplicating results, and can be
|
<listitem><para>Don't compute md5 for these types. md5 checksums are used only for deduplicating results, and can be
|
||||||
@ -162,43 +142,32 @@ lets you turn off md5 computation for selected types. It is global (no
|
|||||||
redefinition for subtrees). At the moment, it only has an effect for
|
redefinition for subtrees). At the moment, it only has an effect for
|
||||||
external handlers (exec and execm). The file types can be specified by
|
external handlers (exec and execm). The file types can be specified by
|
||||||
listing either MIME types (e.g. audio/mpeg) or handler names
|
listing either MIME types (e.g. audio/mpeg) or handler names
|
||||||
(e.g. rclaudio.py).
|
(e.g. rclaudio).</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.COMPRESSEDFILEMAXKBS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.COMPRESSEDFILEMAXKBS">
|
||||||
<term><varname>compressedfilemaxkbs</varname></term>
|
<term><varname>compressedfilemaxkbs</varname></term>
|
||||||
<listitem><para>Size limit for compressed
|
<listitem><para>Size limit for compressed
|
||||||
files. We need to decompress these in a
|
files. We need to decompress these in a
|
||||||
temporary directory for identification, which can be wasteful in some
|
temporary directory for identification, which can be wasteful in some
|
||||||
cases. Limit the waste. Negative means no limit. 0 results in no
|
cases. Limit the waste. Negative means no limit. 0 results in no
|
||||||
processing of any compressed file. Default 100 MB.
|
processing of any compressed file. Default 50 MB.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEMAXMBS">
|
||||||
<term><varname>textfilemaxmbs</varname></term>
|
<term><varname>textfilemaxmbs</varname></term>
|
||||||
<listitem><para>Size limit for text files. Mostly for skipping monster logs. Default 20 MB. Use a value of -1 to
|
<listitem><para>Size limit for text
|
||||||
disable.
|
files. Mostly for skipping monster
|
||||||
</para></listitem></varlistentry>
|
logs. Default 20 MB.</para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTUNKNOWNASPLAIN">
|
|
||||||
<term><varname>textunknownasplain</varname></term>
|
|
||||||
<listitem><para>Process unknown text/xxx files as text/plain Allows indexing misc. text files identified as text/whatever by 'file' or 'xdg-mime'
|
|
||||||
without having to explicitely set config entries for them. This works fine for indexing (but will
|
|
||||||
cause processing of a lot of garbage though), but the documents indexed this way will be opened by
|
|
||||||
the desktop viewer, even if text/plain has a specific editor.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXALLFILENAMES">
|
||||||
<term><varname>indexallfilenames</varname></term>
|
<term><varname>indexallfilenames</varname></term>
|
||||||
<listitem><para>Index the file names of
|
<listitem><para>Index the file names of
|
||||||
unprocessed files Index the names of files the contents of
|
unprocessed files Index the names of files the contents of
|
||||||
which we don't index because of an excluded or unsupported MIME
|
which we don't index because of an excluded or unsupported MIME
|
||||||
type.
|
type.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.USESYSTEMFILECOMMAND">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.USESYSTEMFILECOMMAND">
|
||||||
<term><varname>usesystemfilecommand</varname></term>
|
<term><varname>usesystemfilecommand</varname></term>
|
||||||
<listitem><para>Use a system command
|
<listitem><para>Use a system command
|
||||||
for file MIME type guessing as a final step in file type
|
for file MIME type guessing as a final step in file type
|
||||||
identification This is generally useful, but will usually
|
identification This is generally useful, but will usually
|
||||||
cause the indexing of many bogus 'text' files. See 'systemfilecommand'
|
cause the indexing of many bogus 'text' files. See 'systemfilecommand'
|
||||||
for the command used.
|
for the command used.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SYSTEMFILECOMMAND">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SYSTEMFILECOMMAND">
|
||||||
<term><varname>systemfilecommand</varname></term>
|
<term><varname>systemfilecommand</varname></term>
|
||||||
<listitem><para>Command used to guess
|
<listitem><para>Command used to guess
|
||||||
@ -206,14 +175,12 @@ MIME types if the internal methods fails This should be a
|
|||||||
"file -i" workalike. The file path will be added as a last parameter to
|
"file -i" workalike. The file path will be added as a last parameter to
|
||||||
the command line. "xdg-mime" works better than the traditional "file"
|
the command line. "xdg-mime" works better than the traditional "file"
|
||||||
command, and is now the configured default (with a hard-coded fallback to
|
command, and is now the configured default (with a hard-coded fallback to
|
||||||
"file")
|
"file")</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PROCESSWEBQUEUE">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PROCESSWEBQUEUE">
|
||||||
<term><varname>processwebqueue</varname></term>
|
<term><varname>processwebqueue</varname></term>
|
||||||
<listitem><para>Decide if we process the
|
<listitem><para>Decide if we process the
|
||||||
Web queue. The queue is a directory where the Recoll Web
|
Web queue. The queue is a directory where the Recoll Web
|
||||||
browser plugins create the copies of visited pages.
|
browser plugins create the copies of visited pages.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEPAGEKBS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TEXTFILEPAGEKBS">
|
||||||
<term><varname>textfilepagekbs</varname></term>
|
<term><varname>textfilepagekbs</varname></term>
|
||||||
<listitem><para>Page size for text
|
<listitem><para>Page size for text
|
||||||
@ -222,14 +189,12 @@ into documents of approximately this size. Will reduce memory usage at
|
|||||||
index time and help with loading data in the preview window at query
|
index time and help with loading data in the preview window at query
|
||||||
time. Particularly useful with very big files, such as application or
|
time. Particularly useful with very big files, such as application or
|
||||||
system logs. Also see textfilemaxmbs and
|
system logs. Also see textfilemaxmbs and
|
||||||
compressedfilemaxkbs.
|
compressedfilemaxkbs.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MEMBERMAXKBS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MEMBERMAXKBS">
|
||||||
<term><varname>membermaxkbs</varname></term>
|
<term><varname>membermaxkbs</varname></term>
|
||||||
<listitem><para>Size limit for archive
|
<listitem><para>Size limit for archive
|
||||||
members. This is passed to the filters in the environment
|
members. This is passed to the filters in the environment
|
||||||
as RECOLL_FILTER_MAXMEMBERKB.
|
as RECOLL_FILTER_MAXMEMBERKB.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.TERMS">
|
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.TERMS">
|
||||||
<title>Parameters affecting how we generate terms and organize the index </title><variablelist>
|
<title>Parameters affecting how we generate terms and organize the index </title><variablelist>
|
||||||
@ -241,34 +206,28 @@ searches sensitive to case and diacritics can be performed, but the index
|
|||||||
will be bigger, and some marginal weirdness may sometimes occur. The
|
will be bigger, and some marginal weirdness may sometimes occur. The
|
||||||
default is a stripped index. When using multiple indexes for a search,
|
default is a stripped index. When using multiple indexes for a search,
|
||||||
this parameter must be defined identically for all. Changing the value
|
this parameter must be defined identically for all. Changing the value
|
||||||
implies an index reset.
|
implies an index reset.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTOREDOCTEXT">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTOREDOCTEXT">
|
||||||
<term><varname>indexStoreDocText</varname></term>
|
<term><varname>indexStoreDocText</varname></term>
|
||||||
<listitem><para>Decide if we store the
|
<listitem><para>Decide if we store the
|
||||||
documents' text content in the index. Storing the text
|
documents' text content in the index. Storing the text
|
||||||
allows extracting snippets from it at query time, instead of building
|
allows extracting snippets from it at query time, instead of building
|
||||||
them from index position data.
|
them from index position data.
|
||||||
</para><para>
|
|
||||||
Newer Xapian index formats have rendered our use of positions list
|
Newer Xapian index formats have rendered our use of positions list
|
||||||
unacceptably slow in some cases. The last Xapian index format with good
|
unacceptably slow in some cases. The last Xapian index format with good
|
||||||
performance for the old method is Chert, which is default for 1.2, still
|
performance for the old method is Chert, which is default for 1.2, still
|
||||||
supported but not default in 1.4 and will be dropped in 1.6.
|
supported but not default in 1.4 and will be dropped in 1.6.
|
||||||
</para><para>
|
|
||||||
The stored document text is translated from its original format to UTF-8
|
The stored document text is translated from its original format to UTF-8
|
||||||
plain text, but not stripped of upper-case, diacritics, or punctuation
|
plain text, but not stripped of upper-case, diacritics, or punctuation
|
||||||
signs. Storing it increases the index size by 10-20% typically, but also
|
signs. Storing it increases the index size by 10-20% typically, but also
|
||||||
allows for nicer snippets, so it may be worth enabling it even if not
|
allows for nicer snippets, so it may be worth enabling it even if not
|
||||||
strictly needed for performance if you can afford the space.
|
strictly needed for performance if you can afford the space.
|
||||||
</para><para>
|
|
||||||
The variable only has an effect when creating an index, meaning that the
|
The variable only has an effect when creating an index, meaning that the
|
||||||
xapiandb directory must not exist yet. Its exact effect depends on the
|
xapiandb directory must not exist yet. Its exact effect depends on the
|
||||||
Xapian version.
|
Xapian version.
|
||||||
</para><para>
|
|
||||||
For Xapian 1.4, if the variable is set to 0, the Chert format will be
|
For Xapian 1.4, if the variable is set to 0, the Chert format will be
|
||||||
used, and the text will not be stored. If the variable is 1, Glass will
|
used, and the text will not be stored. If the variable is 1, Glass will
|
||||||
be used, and the text stored.
|
be used, and the text stored.
|
||||||
</para><para>
|
|
||||||
For Xapian 1.2, and for versions after 1.5 and newer, the index format is
|
For Xapian 1.2, and for versions after 1.5 and newer, the index format is
|
||||||
always the default, but the variable controls if the text is stored or
|
always the default, but the variable controls if the text is stored or
|
||||||
not, and the abstract generation method. With Xapian 1.5 and later, and
|
not, and the abstract generation method. With Xapian 1.5 and later, and
|
||||||
@ -285,31 +244,26 @@ still be). Numbers are often quite interesting to search for, and this
|
|||||||
should probably not be set except for special situations, ie, scientific
|
should probably not be set except for special situations, ie, scientific
|
||||||
documents with huge amounts of numbers in them, where setting nonumbers
|
documents with huge amounts of numbers in them, where setting nonumbers
|
||||||
will reduce the index size. This can only be set for a whole index, not
|
will reduce the index size. This can only be set for a whole index, not
|
||||||
for a subtree.
|
for a subtree.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEHYPHENATE">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEHYPHENATE">
|
||||||
<term><varname>dehyphenate</varname></term>
|
<term><varname>dehyphenate</varname></term>
|
||||||
<listitem><para>Determines if we index 'coworker'
|
<listitem><para>Determines if we index 'coworker'
|
||||||
also when the input is 'co-worker'. This is new
|
also when the input is 'co-worker'. This is new
|
||||||
in version 1.22, and on by default. Setting the variable to off allows
|
in version 1.22, and on by default. Setting the variable to off allows
|
||||||
restoring the previous behaviour.
|
restoring the previous behaviour.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.BACKSLASHASLETTER">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.BACKSLASHASLETTER">
|
||||||
<term><varname>backslashasletter</varname></term>
|
<term><varname>backslashasletter</varname></term>
|
||||||
<listitem><para>Process backslash as normal letter. This may make sense for people wanting to index TeX commands as
|
<listitem><para>Process backslash as normal letter. This may make sense for people wanting to index TeX commands as
|
||||||
such but is not of much general use.
|
such but is not of much general use.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNDERSCOREASLETTER">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNDERSCOREASLETTER">
|
||||||
<term><varname>underscoreasletter</varname></term>
|
<term><varname>underscoreasletter</varname></term>
|
||||||
<listitem><para>Process underscore as normal letter. This makes sense in so many cases that one wonders if it should
|
<listitem><para>Process underscore as normal letter. This makes sense in so many cases that one wonders if it should
|
||||||
not be the default.
|
not be the default.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMLENGTH">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMLENGTH">
|
||||||
<term><varname>maxtermlength</varname></term>
|
<term><varname>maxtermlength</varname></term>
|
||||||
<listitem><para>Maximum term length. Words longer than this will be discarded.
|
<listitem><para>Maximum term length. Words longer than this will be discarded.
|
||||||
The default is 40 and used to be hard-coded, but it can now be
|
The default is 40 and used to be hard-coded, but it can now be
|
||||||
adjusted. You need an index reset if you change the value.
|
adjusted. You need an index reset if you change the value.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCJK">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOCJK">
|
||||||
<term><varname>nocjk</varname></term>
|
<term><varname>nocjk</varname></term>
|
||||||
<listitem><para>Decides if specific East Asian
|
<listitem><para>Decides if specific East Asian
|
||||||
@ -317,23 +271,20 @@ adjusted. You need an index reset if you change the value.
|
|||||||
off. This will save a small amount of CPU if you have no CJK
|
off. This will save a small amount of CPU if you have no CJK
|
||||||
documents. If your document base does include such text but you are not
|
documents. If your document base does include such text but you are not
|
||||||
interested in searching it, setting nocjk may be a
|
interested in searching it, setting nocjk may be a
|
||||||
significant time and space saver.
|
significant time and space saver.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CJKNGRAMLEN">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CJKNGRAMLEN">
|
||||||
<term><varname>cjkngramlen</varname></term>
|
<term><varname>cjkngramlen</varname></term>
|
||||||
<listitem><para>This lets you adjust the size of
|
<listitem><para>This lets you adjust the size of
|
||||||
n-grams used for indexing CJK text. The default value of 2 is
|
n-grams used for indexing CJK text. The default value of 2 is
|
||||||
probably appropriate in most cases. A value of 3 would allow more precision
|
probably appropriate in most cases. A value of 3 would allow more precision
|
||||||
and efficiency on longer words, but the index will be approximately twice
|
and efficiency on longer words, but the index will be approximately twice
|
||||||
as large.
|
as large.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTEMMINGLANGUAGES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.INDEXSTEMMINGLANGUAGES">
|
||||||
<term><varname>indexstemminglanguages</varname></term>
|
<term><varname>indexstemminglanguages</varname></term>
|
||||||
<listitem><para>Languages for which to create stemming expansion
|
<listitem><para>Languages for which to create stemming expansion
|
||||||
data. Stemmer names can be found by executing 'recollindex
|
data. Stemmer names can be found by executing 'recollindex
|
||||||
-l', or this can also be set from a list in the GUI. The values are full
|
-l', or this can also be set from a list in the GUI. The values are full
|
||||||
language names, e.g. english, french...
|
language names, e.g. english, french...</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEFAULTCHARSET">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DEFAULTCHARSET">
|
||||||
<term><varname>defaultcharset</varname></term>
|
<term><varname>defaultcharset</varname></term>
|
||||||
<listitem><para>Default character
|
<listitem><para>Default character
|
||||||
@ -344,39 +295,37 @@ set, the default character set is the one defined by the NLS environment
|
|||||||
($LC_ALL, $LC_CTYPE, $LANG), or ultimately iso-8859-1 (cp-1252 in fact).
|
($LC_ALL, $LC_CTYPE, $LANG), or ultimately iso-8859-1 (cp-1252 in fact).
|
||||||
If for some reason you want a general default which does not match your
|
If for some reason you want a general default which does not match your
|
||||||
LANG and is not 8859-1, use this variable. This can be redefined for any
|
LANG and is not 8859-1, use this variable. This can be redefined for any
|
||||||
sub-directory.
|
sub-directory.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNAC_EXCEPT_TRANS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.UNAC_EXCEPT_TRANS">
|
||||||
<term><varname>unac_except_trans</varname></term>
|
<term><varname>unac_except_trans</varname></term>
|
||||||
<listitem><para>A list of characters, encoded in UTF-8, which should be handled specially when converting
|
<listitem><para>A list of characters,
|
||||||
text to unaccented lowercase. For example, in Swedish, the letter a with diaeresis has full alphabet citizenship and
|
encoded in UTF-8, which should be handled specially
|
||||||
should not be turned into an a. Each element in the space-separated list has the special
|
when converting text to unaccented lowercase. For
|
||||||
character as first element and the translation following. The handling of both the lowercase and
|
example, in Swedish, the letter a with diaeresis has full alphabet
|
||||||
upper-case versions of a character should be specified, as appartenance to the list will turn-off
|
citizenship and should not be turned into an a.
|
||||||
both standard accent and case processing. The value is global and affects both indexing and
|
Each element in the space-separated list has the special character as
|
||||||
querying. We also convert a few confusing Unicode characters (quotes, hyphen) to their ASCII
|
first element and the translation following. The handling of both the
|
||||||
equivalent to avoid "invisible" search failures.
|
lowercase and upper-case versions of a character should be specified, as
|
||||||
</para><para>
|
appartenance to the list will turn-off both standard accent and case
|
||||||
|
processing. The value is global and affects both indexing and querying.
|
||||||
Examples:
|
Examples:
|
||||||
Swedish:
|
Swedish:
|
||||||
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl åå Åå ’' ❜' ʼ' ‐-
|
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl åå Åå
|
||||||
. German:
|
. German:
|
||||||
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl ’' ❜' ʼ' ‐-
|
unac_except_trans = ää Ää öö Öö üü Üü ßss œoe Œoe æae Æae ffff fifi flfl
|
||||||
. French: you probably want to decompose oe and ae and nobody would type
|
In French, you probably want to decompose oe and ae and nobody would type
|
||||||
a German ß
|
a German ß
|
||||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl ’' ❜' ʼ' ‐-
|
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl
|
||||||
. The default for all until someone protests follows. These decompositions
|
. The default for all until someone protests follows. These decompositions
|
||||||
are not performed by unac, but it is unlikely that someone would type the
|
are not performed by unac, but it is unlikely that someone would type the
|
||||||
composed forms in a search.
|
composed forms in a search.
|
||||||
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl ’' ❜' ʼ' ‐-
|
unac_except_trans = ßss œoe Œoe æae Æae ffff fifi flfl</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAILDEFCHARSET">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAILDEFCHARSET">
|
||||||
<term><varname>maildefcharset</varname></term>
|
<term><varname>maildefcharset</varname></term>
|
||||||
<listitem><para>Overrides the default
|
<listitem><para>Overrides the default
|
||||||
character set for email messages which don't specify
|
character set for email messages which don't specify
|
||||||
one. This is mainly useful for readpst (libpst) dumps,
|
one. This is mainly useful for readpst (libpst) dumps,
|
||||||
which are utf-8 but do not say so.
|
which are utf-8 but do not say so.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOCALFIELDS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOCALFIELDS">
|
||||||
<term><varname>localfields</varname></term>
|
<term><varname>localfields</varname></term>
|
||||||
<listitem><para>Set fields on all files
|
<listitem><para>Set fields on all files
|
||||||
@ -384,8 +333,7 @@ which are utf-8 but do not say so.
|
|||||||
name = value ; attr1 = val1 ; [...]
|
name = value ; attr1 = val1 ; [...]
|
||||||
value is empty so this needs an initial semi-colon. This is useful, e.g.,
|
value is empty so this needs an initial semi-colon. This is useful, e.g.,
|
||||||
for setting the rclaptg field for application selection inside
|
for setting the rclaptg field for application selection inside
|
||||||
mimeview.
|
mimeview.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESTMODIFUSEMTIME">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESTMODIFUSEMTIME">
|
||||||
<term><varname>testmodifusemtime</varname></term>
|
<term><varname>testmodifusemtime</varname></term>
|
||||||
<listitem><para>Use mtime instead of
|
<listitem><para>Use mtime instead of
|
||||||
@ -407,12 +355,12 @@ undetected). Perform a full index reset after changing this.
|
|||||||
<term><varname>noxattrfields</varname></term>
|
<term><varname>noxattrfields</varname></term>
|
||||||
<listitem><para>Disable extended attributes
|
<listitem><para>Disable extended attributes
|
||||||
conversion to metadata fields. This probably needs to be
|
conversion to metadata fields. This probably needs to be
|
||||||
set if testmodifusemtime is set.
|
set if testmodifusemtime is set.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.METADATACMDS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.METADATACMDS">
|
||||||
<term><varname>metadatacmds</varname></term>
|
<term><varname>metadatacmds</varname></term>
|
||||||
<listitem><para>Define commands to
|
<listitem><para>Define commands to
|
||||||
gather external metadata, e.g. tmsu tags. There can be several entries, separated by semi-colons, each defining
|
gather external metadata, e.g. tmsu tags.
|
||||||
|
There can be several entries, separated by semi-colons, each defining
|
||||||
which field name the data goes into and the command to use. Don't forget the
|
which field name the data goes into and the command to use. Don't forget the
|
||||||
initial semi-colon. All the field names must be different. You can use
|
initial semi-colon. All the field names must be different. You can use
|
||||||
aliases in the "field" file if necessary.
|
aliases in the "field" file if necessary.
|
||||||
@ -437,15 +385,13 @@ cachedir is ~/.cache/recoll, the default dbdir would be
|
|||||||
mboxcachedir, aspellDicDir, which can still be individually specified to
|
mboxcachedir, aspellDicDir, which can still be individually specified to
|
||||||
override cachedir. Note that if you have multiple configurations, each
|
override cachedir. Note that if you have multiple configurations, each
|
||||||
must have a different cachedir, there is no automatic computation of a
|
must have a different cachedir, there is no automatic computation of a
|
||||||
subpath under cachedir.
|
subpath under cachedir.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXFSOCCUPPC">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXFSOCCUPPC">
|
||||||
<term><varname>maxfsoccuppc</varname></term>
|
<term><varname>maxfsoccuppc</varname></term>
|
||||||
<listitem><para>Maximum file system occupation
|
<listitem><para>Maximum file system occupation
|
||||||
over which we stop indexing. The value is a percentage,
|
over which we stop indexing. The value is a percentage,
|
||||||
corresponding to what the "Capacity" df output column shows. The default
|
corresponding to what the "Capacity" df output column shows. The default
|
||||||
value is 0, meaning no checking.
|
value is 0, meaning no checking.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DBDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DBDIR">
|
||||||
<term><varname>dbdir</varname></term>
|
<term><varname>dbdir</varname></term>
|
||||||
<listitem><para>Xapian database directory
|
<listitem><para>Xapian database directory
|
||||||
@ -453,43 +399,36 @@ location. This will be created on first indexing. If the
|
|||||||
value is not an absolute path, it will be interpreted as relative to
|
value is not an absolute path, it will be interpreted as relative to
|
||||||
cachedir if set, or the configuration directory (-c argument or
|
cachedir if set, or the configuration directory (-c argument or
|
||||||
$RECOLL_CONFDIR). If nothing is specified, the default is then
|
$RECOLL_CONFDIR). If nothing is specified, the default is then
|
||||||
~/.recoll/xapiandb/
|
~/.recoll/xapiandb/</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXSTATUSFILE">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXSTATUSFILE">
|
||||||
<term><varname>idxstatusfile</varname></term>
|
<term><varname>idxstatusfile</varname></term>
|
||||||
<listitem><para>Name of the scratch file where the indexer process updates its
|
<listitem><para>Name of the scratch file where the indexer process updates its
|
||||||
status. Default: idxstatus.txt inside the configuration
|
status. Default: idxstatus.txt inside the configuration
|
||||||
directory.
|
directory.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEDIR">
|
||||||
<term><varname>mboxcachedir</varname></term>
|
<term><varname>mboxcachedir</varname></term>
|
||||||
<listitem><para>Directory location for storing mbox message offsets cache
|
<listitem><para>Directory location for storing mbox message offsets cache
|
||||||
files. This is normally 'mboxcache' under cachedir if set,
|
files. This is normally 'mboxcache' under cachedir if set,
|
||||||
or else under the configuration directory, but it may be useful to share
|
or else under the configuration directory, but it may be useful to share
|
||||||
a directory between different configurations.
|
a directory between different configurations.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEMINMBS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXCACHEMINMBS">
|
||||||
<term><varname>mboxcacheminmbs</varname></term>
|
<term><varname>mboxcacheminmbs</varname></term>
|
||||||
<listitem><para>Minimum mbox file size over which we cache the offsets. There is really no sense in caching offsets for small files. The
|
<listitem><para>Minimum mbox file size over which we cache the offsets. There is really no sense in caching offsets for small files. The
|
||||||
default is 5 MB.
|
default is 5 MB.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXMAXMSGMBS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MBOXMAXMSGMBS">
|
||||||
<term><varname>mboxmaxmsgmbs</varname></term>
|
<term><varname>mboxmaxmsgmbs</varname></term>
|
||||||
<listitem><para>Maximum mbox member message size in megabytes. Size over which we assume that the mbox format is bad or we
|
<listitem><para>Maximum mbox member message size in megabytes. Size over which we assume that the mbox format is bad or we
|
||||||
misinterpreted it, at which point we just stop processing the file.
|
misinterpreted it, at which point we just stop processing the file.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEDIR">
|
||||||
<term><varname>webcachedir</varname></term>
|
<term><varname>webcachedir</varname></term>
|
||||||
<listitem><para>Directory where we store the archived web pages. This is only used by the web history indexing code
|
<listitem><para>Directory where we store the archived web pages. This is only used by the web history indexing code
|
||||||
Default: cachedir/webcache if cachedir is set, else
|
Default: cachedir/webcache if cachedir is set, else
|
||||||
$RECOLL_CONFDIR/webcache
|
$RECOLL_CONFDIR/webcache</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEMAXMBS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEMAXMBS">
|
||||||
<term><varname>webcachemaxmbs</varname></term>
|
<term><varname>webcachemaxmbs</varname></term>
|
||||||
<listitem><para>Maximum size in MB of the Web archive. This is only used by the web history indexing code.
|
<listitem><para>Maximum size in MB of the Web archive. This is only used by the web history indexing code.
|
||||||
Default: 40 MB.
|
Default: 40 MB.
|
||||||
Reducing the size will not physically truncate the file.
|
Reducing the size will not physically truncate the file.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBQUEUEDIR">
|
||||||
<term><varname>webqueuedir</varname></term>
|
<term><varname>webqueuedir</varname></term>
|
||||||
<listitem><para>The path to the Web indexing queue. This used to be
|
<listitem><para>The path to the Web indexing queue. This used to be
|
||||||
@ -497,42 +436,29 @@ hard-coded in the old plugin as ~/.recollweb/ToIndex so there would be no
|
|||||||
need or possibility to change it, but the WebExtensions plugin now downloads
|
need or possibility to change it, but the WebExtensions plugin now downloads
|
||||||
the files to the user Downloads directory, and a script moves them to
|
the files to the user Downloads directory, and a script moves them to
|
||||||
webqueuedir. The script reads this value from the config so it has become
|
webqueuedir. The script reads this value from the config so it has become
|
||||||
possible to change it.
|
possible to change it.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBDOWNLOADSDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBDOWNLOADSDIR">
|
||||||
<term><varname>webdownloadsdir</varname></term>
|
<term><varname>webdownloadsdir</varname></term>
|
||||||
<listitem><para>The path to browser downloads directory. This is
|
<listitem><para>The path to browser downloads directory. This is
|
||||||
where the new browser add-on extension has to create the files. They are
|
where the new browser add-on extension has to create the files. They are
|
||||||
then moved by a script to webqueuedir.
|
then moved by a script to webqueuedir.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.WEBCACHEKEEPINTERVAL">
|
|
||||||
<term><varname>webcachekeepinterval</varname></term>
|
|
||||||
<listitem><para>Page recycle interval By default, only one instance of an URL is kept in the cache. This
|
|
||||||
can be changed by setting this to a value determining at what frequency
|
|
||||||
we keep multiple instances ('day', 'week', 'month',
|
|
||||||
'year'). Note that increasing the interval will not erase existing
|
|
||||||
entries.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLDICDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLDICDIR">
|
||||||
<term><varname>aspellDicDir</varname></term>
|
<term><varname>aspellDicDir</varname></term>
|
||||||
<listitem><para>Aspell dictionary storage directory location. The
|
<listitem><para>Aspell dictionary storage directory location. The
|
||||||
aspell dictionary (aspdict.(lang).rws) is normally stored in the
|
aspell dictionary (aspdict.(lang).rws) is normally stored in the
|
||||||
directory specified by cachedir if set, or under the configuration
|
directory specified by cachedir if set, or under the configuration
|
||||||
directory.
|
directory.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERSDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERSDIR">
|
||||||
<term><varname>filtersdir</varname></term>
|
<term><varname>filtersdir</varname></term>
|
||||||
<listitem><para>Directory location for executable input handlers. If
|
<listitem><para>Directory location for executable input handlers. If
|
||||||
RECOLL_FILTERSDIR is set in the environment, we use it instead. Defaults
|
RECOLL_FILTERSDIR is set in the environment, we use it instead. Defaults
|
||||||
to $prefix/share/recoll/filters. Can be redefined for
|
to $prefix/share/recoll/filters. Can be redefined for
|
||||||
subdirectories.
|
subdirectories.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ICONSDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ICONSDIR">
|
||||||
<term><varname>iconsdir</varname></term>
|
<term><varname>iconsdir</varname></term>
|
||||||
<listitem><para>Directory location for icons. The only reason to
|
<listitem><para>Directory location for icons. The only reason to
|
||||||
change this would be if you want to change the icons displayed in the
|
change this would be if you want to change the icons displayed in the
|
||||||
result list. Defaults to $prefix/share/recoll/images
|
result list. Defaults to $prefix/share/recoll/images</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PERFS">
|
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PERFS">
|
||||||
<title>Parameters affecting indexing performance and resource usage </title><variablelist>
|
<title>Parameters affecting indexing performance and resource usage </title><variablelist>
|
||||||
@ -550,24 +476,20 @@ value (from this file) is now 50 MB, and should be ok in many cases.
|
|||||||
You can set it as low as 10 to conserve memory, but if you are looking
|
You can set it as low as 10 to conserve memory, but if you are looking
|
||||||
for maximum speed, you may want to experiment with values between 20 and
|
for maximum speed, you may want to experiment with values between 20 and
|
||||||
200. In my experience, values beyond this are always counterproductive. If
|
200. In my experience, values beyond this are always counterproductive. If
|
||||||
you find otherwise, please drop me a note.
|
you find otherwise, please drop me a note.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXSECONDS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXSECONDS">
|
||||||
<term><varname>filtermaxseconds</varname></term>
|
<term><varname>filtermaxseconds</varname></term>
|
||||||
<listitem><para>Maximum external filter execution time in
|
<listitem><para>Maximum external filter execution time in
|
||||||
seconds. Default 1200 (20mn). Set to 0 for no limit. This
|
seconds. Default 1200 (20mn). Set to 0 for no limit. This
|
||||||
is mainly to avoid infinite loops in postscript files
|
is mainly to avoid infinite loops in postscript files
|
||||||
(loop.ps)
|
(loop.ps)</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXMBYTES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.FILTERMAXMBYTES">
|
||||||
<term><varname>filtermaxmbytes</varname></term>
|
<term><varname>filtermaxmbytes</varname></term>
|
||||||
<listitem><para>Maximum virtual memory space for filter processes
|
<listitem><para>Maximum virtual memory space for filter processes
|
||||||
(setrlimit(RLIMIT_AS)), in megabytes. Note that this includes any mapped libs (there is no reliable
|
(setrlimit(RLIMIT_AS)), in megabytes. Note that this
|
||||||
Linux way to limit the data space only), so we need to be a bit generous
|
includes any mapped libs (there is no reliable Linux way to limit the
|
||||||
here. Anything over 2000 will be ignored on 32 bits machines. The
|
data space only), so we need to be a bit generous here. Anything over
|
||||||
previous default value of 2000 would prevent java pdftk to work when
|
2000 will be ignored on 32 bits machines.</para></listitem></varlistentry>
|
||||||
executed from Python rclpdf.py.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRQSIZES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRQSIZES">
|
||||||
<term><varname>thrQSizes</varname></term>
|
<term><varname>thrQSizes</varname></term>
|
||||||
<listitem><para>Stage input queues configuration. There are three
|
<listitem><para>Stage input queues configuration. There are three
|
||||||
@ -579,8 +501,7 @@ next stage. In practise, deep queues have not been shown to increase
|
|||||||
performance. Default: a value of 0 for the first queue tells Recoll to
|
performance. Default: a value of 0 for the first queue tells Recoll to
|
||||||
perform autoconfiguration based on the detected number of CPUs (no need
|
perform autoconfiguration based on the detected number of CPUs (no need
|
||||||
for the two other values in this case). Use thrQSizes = -1 -1 -1 to
|
for the two other values in this case). Use thrQSizes = -1 -1 -1 to
|
||||||
disable multithreading entirely.
|
disable multithreading entirely.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRTCOUNTS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.THRTCOUNTS">
|
||||||
<term><varname>thrTCounts</varname></term>
|
<term><varname>thrTCounts</varname></term>
|
||||||
<listitem><para>Number of threads used for each indexing stage. The
|
<listitem><para>Number of threads used for each indexing stage. The
|
||||||
@ -590,8 +511,7 @@ in thrQSizes: if the first queue depth is 0, all counts are ignored
|
|||||||
(autoconfigured); if a value of -1 is used for a queue depth, the
|
(autoconfigured); if a value of -1 is used for a queue depth, the
|
||||||
corresponding thread count is ignored. It makes no sense to use a value
|
corresponding thread count is ignored. It makes no sense to use a value
|
||||||
other than 1 for the last stage because updating the Xapian index is
|
other than 1 for the last stage because updating the Xapian index is
|
||||||
necessarily single-threaded (and protected by a mutex).
|
necessarily single-threaded (and protected by a mutex).</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.MISC">
|
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.MISC">
|
||||||
<title>Miscellaneous parameters </title><variablelist>
|
<title>Miscellaneous parameters </title><variablelist>
|
||||||
@ -599,8 +519,7 @@ necessarily single-threaded (and protected by a mutex).
|
|||||||
<term><varname>loglevel</varname></term>
|
<term><varname>loglevel</varname></term>
|
||||||
<listitem><para>Log file verbosity 1-6. A value of 2 will print
|
<listitem><para>Log file verbosity 1-6. A value of 2 will print
|
||||||
only errors and warnings. 3 will print information like document updates,
|
only errors and warnings. 3 will print information like document updates,
|
||||||
4 is quite verbose and 6 very verbose.
|
4 is quite verbose and 6 very verbose.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOGFILENAME">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.LOGFILENAME">
|
||||||
<term><varname>logfilename</varname></term>
|
<term><varname>logfilename</varname></term>
|
||||||
<listitem><para>Log file destination. Use 'stderr' (default) to write to the
|
<listitem><para>Log file destination. Use 'stderr' (default) to write to the
|
||||||
@ -611,25 +530,16 @@ console. </para></listitem></varlistentry>
|
|||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXLOGFILENAME">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXLOGFILENAME">
|
||||||
<term><varname>idxlogfilename</varname></term>
|
<term><varname>idxlogfilename</varname></term>
|
||||||
<listitem><para>Override logfilename for the indexer. </para></listitem></varlistentry>
|
<listitem><para>Override logfilename for the indexer. </para></listitem></varlistentry>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.HELPERLOGFILENAME">
|
|
||||||
<term><varname>helperlogfilename</varname></term>
|
|
||||||
<listitem><para>Destination file for external helpers standard error output. The external program error output is left alone by default,
|
|
||||||
e.g. going to the terminal when the recoll[index] program is executed
|
|
||||||
from the command line. Use /dev/null or a file inside a non-existent
|
|
||||||
directory to completely suppress the output.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGLEVEL">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGLEVEL">
|
||||||
<term><varname>daemloglevel</varname></term>
|
<term><varname>daemloglevel</varname></term>
|
||||||
<listitem><para>Override loglevel for the indexer in real time
|
<listitem><para>Override loglevel for the indexer in real time
|
||||||
mode. The default is to use the idx... values if set, else
|
mode. The default is to use the idx... values if set, else
|
||||||
the log... values.
|
the log... values.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGFILENAME">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.DAEMLOGFILENAME">
|
||||||
<term><varname>daemlogfilename</varname></term>
|
<term><varname>daemlogfilename</varname></term>
|
||||||
<listitem><para>Override logfilename for the indexer in real time
|
<listitem><para>Override logfilename for the indexer in real time
|
||||||
mode. The default is to use the idx... values if set, else
|
mode. The default is to use the idx... values if set, else
|
||||||
the log... values.
|
the log... values.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PYLOGLEVEL">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PYLOGLEVEL">
|
||||||
<term><varname>pyloglevel</varname></term>
|
<term><varname>pyloglevel</varname></term>
|
||||||
<listitem><para>Override loglevel for the python module. </para></listitem></varlistentry>
|
<listitem><para>Override loglevel for the python module. </para></listitem></varlistentry>
|
||||||
@ -642,8 +552,7 @@ the log... values.
|
|||||||
configuration directory inside the directory tree makes it possible to
|
configuration directory inside the directory tree makes it possible to
|
||||||
provide automatic query time path translations once the data set has
|
provide automatic query time path translations once the data set has
|
||||||
moved (for example, because it has been mounted on another
|
moved (for example, because it has been mounted on another
|
||||||
location).
|
location).</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CURIDXCONFDIR">
|
||||||
<term><varname>curidxconfdir</varname></term>
|
<term><varname>curidxconfdir</varname></term>
|
||||||
<listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
|
<listitem><para>Current location of the configuration directory. Complement orgidxconfdir for movable datasets. This should be used
|
||||||
@ -655,8 +564,7 @@ example if a dataset originally indexed as '/home/me/mydata/config' has
|
|||||||
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
been mounted to '/media/me/mydata', and the GUI is running from a copied
|
||||||
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
configuration, orgidxconfdir would be '/home/me/mydata/config', and
|
||||||
curidxconfdir (as set in the copied configuration) would be
|
curidxconfdir (as set in the copied configuration) would be
|
||||||
'/media/me/mydata/config'.
|
'/media/me/mydata/config'.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXRUNDIR">
|
||||||
<term><varname>idxrundir</varname></term>
|
<term><varname>idxrundir</varname></term>
|
||||||
<listitem><para>Indexing process current directory. The input
|
<listitem><para>Indexing process current directory. The input
|
||||||
@ -665,22 +573,17 @@ makes sense to have recollindex chdir to some temporary directory. If the
|
|||||||
value is empty, the current directory is not changed. If the
|
value is empty, the current directory is not changed. If the
|
||||||
value is (literal) tmp, we use the temporary directory as set by the
|
value is (literal) tmp, we use the temporary directory as set by the
|
||||||
environment (RECOLL_TMPDIR else TMPDIR else /tmp). If the value is an
|
environment (RECOLL_TMPDIR else TMPDIR else /tmp). If the value is an
|
||||||
absolute path to a directory, we go there.
|
absolute path to a directory, we go there.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CHECKNEEDRETRYINDEXSCRIPT">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.CHECKNEEDRETRYINDEXSCRIPT">
|
||||||
<term><varname>checkneedretryindexscript</varname></term>
|
<term><varname>checkneedretryindexscript</varname></term>
|
||||||
<listitem><para>Script used to heuristically check if we need to retry indexing
|
<listitem><para>Script used to heuristically check if we need to retry indexing
|
||||||
files which previously failed. The default script checks
|
files which previously failed. The default script checks
|
||||||
the modified dates on /usr/bin and /usr/local/bin. A relative path will
|
the modified dates on /usr/bin and /usr/local/bin. A relative path will
|
||||||
be looked up in the filters dirs, then in the path. Use an absolute path
|
be looked up in the filters dirs, then in the path. Use an absolute path
|
||||||
to do otherwise.
|
to do otherwise.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.RECOLLHELPERPATH">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.RECOLLHELPERPATH">
|
||||||
<term><varname>recollhelperpath</varname></term>
|
<term><varname>recollhelperpath</varname></term>
|
||||||
<listitem><para>Additional places to search for helper executables. This is used, e.g., on Windows by the Python code, and on Mac OS by the bundled recoll.app
|
<listitem><para>Additional places to search for helper executables. This is only used on Windows for now.</para></listitem></varlistentry>
|
||||||
(because I could find no reliable way to tell launchd to set the PATH). The example below is for
|
|
||||||
Windows. Use ':' as entry separator for Mac and Ux-like systems, ';' is for Windows only.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXABSMLEN">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXABSMLEN">
|
||||||
<term><varname>idxabsmlen</varname></term>
|
<term><varname>idxabsmlen</varname></term>
|
||||||
<listitem><para>Length of abstracts we store while indexing. Recoll stores an abstract for each indexed file.
|
<listitem><para>Length of abstracts we store while indexing. Recoll stores an abstract for each indexed file.
|
||||||
@ -692,72 +595,57 @@ defines the size of the stored abstract. The default value is 250
|
|||||||
bytes. The search interface gives you the choice to display this stored
|
bytes. The search interface gives you the choice to display this stored
|
||||||
text or a synthetic abstract built by extracting text around the search
|
text or a synthetic abstract built by extracting text around the search
|
||||||
terms. If you always prefer the synthetic abstract, you can reduce this
|
terms. If you always prefer the synthetic abstract, you can reduce this
|
||||||
value and save a little space.
|
value and save a little space.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXMETASTOREDLEN">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXMETASTOREDLEN">
|
||||||
<term><varname>idxmetastoredlen</varname></term>
|
<term><varname>idxmetastoredlen</varname></term>
|
||||||
<listitem><para>Truncation length of stored metadata fields. This
|
<listitem><para>Truncation length of stored metadata fields. This
|
||||||
does not affect indexing (the whole field is processed anyway), just the
|
does not affect indexing (the whole field is processed anyway), just the
|
||||||
amount of data stored in the index for the purpose of displaying fields
|
amount of data stored in the index for the purpose of displaying fields
|
||||||
inside result lists or previews. The default value is 150 bytes which
|
inside result lists or previews. The default value is 150 bytes which
|
||||||
may be too low if you have custom fields.
|
may be too low if you have custom fields.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXTEXTTRUNCATELEN">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXTEXTTRUNCATELEN">
|
||||||
<term><varname>idxtexttruncatelen</varname></term>
|
<term><varname>idxtexttruncatelen</varname></term>
|
||||||
<listitem><para>Truncation length for all document texts. Only index
|
<listitem><para>Truncation length for all document texts. Only index
|
||||||
the beginning of documents. This is not recommended except if you are
|
the beginning of documents. This is not recommended except if you are
|
||||||
sure that the interesting keywords are at the top and have severe disk
|
sure that the interesting keywords are at the top and have severe disk
|
||||||
space issues.
|
space issues.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXSYNONYMS">
|
|
||||||
<term><varname>idxsynonyms</varname></term>
|
|
||||||
<listitem><para>Name of the index-time synonyms file. This is used for indexing multiword synonyms as single terms,
|
|
||||||
which in turn is only useful if you want to perform proximity searches
|
|
||||||
with such terms.
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLLANGUAGE">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLLANGUAGE">
|
||||||
<term><varname>aspellLanguage</varname></term>
|
<term><varname>aspellLanguage</varname></term>
|
||||||
<listitem><para>Language definitions to use when creating the aspell
|
<listitem><para>Language definitions to use when creating the aspell
|
||||||
dictionary. The value must match a set of aspell language
|
dictionary. The value must match a set of aspell language
|
||||||
definition files. You can type "aspell dicts" to see a list The default
|
definition files. You can type "aspell dicts" to see a list The default
|
||||||
if this is not set is to use the NLS environment to guess the value. The
|
if this is not set is to use the NLS environment to guess the value. The
|
||||||
values are the 2-letter language codes (e.g. 'en', 'fr'...)
|
values are the 2-letter language codes (e.g. 'en', 'fr'...)</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLADDCREATEPARAM">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLADDCREATEPARAM">
|
||||||
<term><varname>aspellAddCreateParam</varname></term>
|
<term><varname>aspellAddCreateParam</varname></term>
|
||||||
<listitem><para>Additional option and parameter to aspell dictionary creation
|
<listitem><para>Additional option and parameter to aspell dictionary creation
|
||||||
command. Some aspell packages may need an additional option
|
command. Some aspell packages may need an additional option
|
||||||
(e.g. on Debian Jessie: --local-data-dir=/usr/lib/aspell). See Debian bug
|
(e.g. on Debian Jessie: --local-data-dir=/usr/lib/aspell). See Debian bug
|
||||||
772415.
|
772415.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLKEEPSTDERR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ASPELLKEEPSTDERR">
|
||||||
<term><varname>aspellKeepStderr</varname></term>
|
<term><varname>aspellKeepStderr</varname></term>
|
||||||
<listitem><para>Set this to have a look at aspell dictionary creation
|
<listitem><para>Set this to have a look at aspell dictionary creation
|
||||||
errors. There are always many, so this is mostly for
|
errors. There are always many, so this is mostly for
|
||||||
debugging.
|
debugging.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOASPELL">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.NOASPELL">
|
||||||
<term><varname>noaspell</varname></term>
|
<term><varname>noaspell</varname></term>
|
||||||
<listitem><para>Disable aspell use. The aspell dictionary generation
|
<listitem><para>Disable aspell use. The aspell dictionary generation
|
||||||
takes time, and some combinations of aspell version, language, and local
|
takes time, and some combinations of aspell version, language, and local
|
||||||
terms, result in aspell crashing, so it sometimes makes sense to just
|
terms, result in aspell crashing, so it sometimes makes sense to just
|
||||||
disable the thing.
|
disable the thing.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONAUXINTERVAL">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONAUXINTERVAL">
|
||||||
<term><varname>monauxinterval</varname></term>
|
<term><varname>monauxinterval</varname></term>
|
||||||
<listitem><para>Auxiliary database update interval. The real time
|
<listitem><para>Auxiliary database update interval. The real time
|
||||||
indexer only updates the auxiliary databases (stemdb, aspell)
|
indexer only updates the auxiliary databases (stemdb, aspell)
|
||||||
periodically, because it would be too costly to do it for every document
|
periodically, because it would be too costly to do it for every document
|
||||||
change. The default period is one hour.
|
change. The default period is one hour.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIXINTERVAL">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIXINTERVAL">
|
||||||
<term><varname>monixinterval</varname></term>
|
<term><varname>monixinterval</varname></term>
|
||||||
<listitem><para>Minimum interval (seconds) between processings of the indexing
|
<listitem><para>Minimum interval (seconds) between processings of the indexing
|
||||||
queue. The real time indexer does not process each event
|
queue. The real time indexer does not process each event
|
||||||
when it comes in, but lets the queue accumulate, to diminish overhead and
|
when it comes in, but lets the queue accumulate, to diminish overhead and
|
||||||
to aggregate multiple events affecting the same file. Default 30
|
to aggregate multiple events affecting the same file. Default 30
|
||||||
S.
|
S.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONDELAYPATTERNS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONDELAYPATTERNS">
|
||||||
<term><varname>mondelaypatterns</varname></term>
|
<term><varname>mondelaypatterns</varname></term>
|
||||||
<listitem><para>Timing parameters for the real time indexing. Definitions for files which get a longer delay before reindexing
|
<listitem><para>Timing parameters for the real time indexing. Definitions for files which get a longer delay before reindexing
|
||||||
@ -766,25 +654,21 @@ reindexed once in a while. A list of wildcardPattern:seconds pairs. The
|
|||||||
patterns are matched with fnmatch(pattern, path, 0) You can quote entries
|
patterns are matched with fnmatch(pattern, path, 0) You can quote entries
|
||||||
containing white space with double quotes (quote the whole entry, not the
|
containing white space with double quotes (quote the whole entry, not the
|
||||||
pattern). The default is empty.
|
pattern). The default is empty.
|
||||||
Example: mondelaypatterns = *.log:20 "*with spaces.*:30"
|
Example: mondelaypatterns = *.log:20 "*with spaces.*:30"</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXNICEPRIO">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.IDXNICEPRIO">
|
||||||
<term><varname>idxniceprio</varname></term>
|
<term><varname>idxniceprio</varname></term>
|
||||||
<listitem><para>"nice" process priority for the indexing processes. Default: 19
|
<listitem><para>"nice" process priority for the indexing processes. Default: 19
|
||||||
(lowest) Appeared with 1.26.5. Prior versions were fixed at 19.
|
(lowest) Appeared with 1.26.5. Prior versions were fixed at 19.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASS">
|
||||||
<term><varname>monioniceclass</varname></term>
|
<term><varname>monioniceclass</varname></term>
|
||||||
<listitem><para>ionice class for the indexing process. Despite the misleading name, and on platforms where this is
|
<listitem><para>ionice class for the indexing process. Despite the misleading name, and on platforms where this is
|
||||||
supported, this affects all indexing processes,
|
supported, this affects all indexing processes,
|
||||||
not only the real time/monitoring ones. The default value is 3 (use
|
not only the real time/monitoring ones. The default value is 3 (use
|
||||||
lowest "Idle" priority).
|
lowest "Idle" priority).</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASSDATA">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MONIONICECLASSDATA">
|
||||||
<term><varname>monioniceclassdata</varname></term>
|
<term><varname>monioniceclassdata</varname></term>
|
||||||
<listitem><para>ionice class level parameter if the class supports it. The default is empty, as the default "Idle" class has no
|
<listitem><para>ionice class level parameter if the class supports it. The default is empty, as the default "Idle" class has no
|
||||||
levels.
|
levels.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.QUERY">
|
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.QUERY">
|
||||||
<title>Query-time parameters (no impact on the index) </title><variablelist>
|
<title>Query-time parameters (no impact on the index) </title><variablelist>
|
||||||
@ -793,8 +677,7 @@ levels.
|
|||||||
<listitem><para>auto-trigger diacritics sensitivity (raw index only). IF the index is not stripped, decide if we automatically trigger
|
<listitem><para>auto-trigger diacritics sensitivity (raw index only). IF the index is not stripped, decide if we automatically trigger
|
||||||
diacritics sensitivity if the search term has accented characters (not in
|
diacritics sensitivity if the search term has accented characters (not in
|
||||||
unac_except_trans). Else you need to use the query language and the "D"
|
unac_except_trans). Else you need to use the query language and the "D"
|
||||||
modifier to specify diacritics sensitivity. Default is no.
|
modifier to specify diacritics sensitivity. Default is no.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.AUTOCASESENS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.AUTOCASESENS">
|
||||||
<term><varname>autocasesens</varname></term>
|
<term><varname>autocasesens</varname></term>
|
||||||
<listitem><para>auto-trigger case sensitivity (raw index only). IF
|
<listitem><para>auto-trigger case sensitivity (raw index only). IF
|
||||||
@ -802,46 +685,40 @@ the index is not stripped (see indexStripChars), decide if we
|
|||||||
automatically trigger character case sensitivity if the search term has
|
automatically trigger character case sensitivity if the search term has
|
||||||
upper-case characters in any but the first position. Else you need to use
|
upper-case characters in any but the first position. Else you need to use
|
||||||
the query language and the "C" modifier to specify character-case
|
the query language and the "C" modifier to specify character-case
|
||||||
sensitivity. Default is yes.
|
sensitivity. Default is yes.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMEXPAND">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXTERMEXPAND">
|
||||||
<term><varname>maxTermExpand</varname></term>
|
<term><varname>maxTermExpand</varname></term>
|
||||||
<listitem><para>Maximum query expansion count
|
<listitem><para>Maximum query expansion count
|
||||||
for a single term (e.g.: when using wildcards). This only
|
for a single term (e.g.: when using wildcards). This only
|
||||||
affects queries, not indexing. We used to not limit this at all (except
|
affects queries, not indexing. We used to not limit this at all (except
|
||||||
for filenames where the limit was too low at 1000), but it is
|
for filenames where the limit was too low at 1000), but it is
|
||||||
unreasonable with a big index. Default 10000.
|
unreasonable with a big index. Default 10000.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXXAPIANCLAUSES">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MAXXAPIANCLAUSES">
|
||||||
<term><varname>maxXapianClauses</varname></term>
|
<term><varname>maxXapianClauses</varname></term>
|
||||||
<listitem><para>Maximum number of clauses
|
<listitem><para>Maximum number of clauses
|
||||||
we add to a single Xapian query. This only affects queries,
|
we add to a single Xapian query. This only affects queries,
|
||||||
not indexing. In some cases, the result of term expansion can be
|
not indexing. In some cases, the result of term expansion can be
|
||||||
multiplicative, and we want to avoid eating all the memory. Default
|
multiplicative, and we want to avoid eating all the memory. Default
|
||||||
50000.
|
50000.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SNIPPETMAXPOSWALK">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.SNIPPETMAXPOSWALK">
|
||||||
<term><varname>snippetMaxPosWalk</varname></term>
|
<term><varname>snippetMaxPosWalk</varname></term>
|
||||||
<listitem><para>Maximum number of positions we walk while populating a snippet for
|
<listitem><para>Maximum number of positions we walk while populating a snippet for
|
||||||
the result list. The default of 1,000,000 may be
|
the result list. The default of 1,000,000 may be
|
||||||
insufficient for very big documents, the consequence would be snippets
|
insufficient for very big documents, the consequence would be snippets
|
||||||
with possibly meaning-altering missing words.
|
with possibly meaning-altering missing words.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PDF">
|
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.PDF">
|
||||||
<title>Parameters for the PDF input script </title><variablelist>
|
<title>Parameters for the PDF input script </title><variablelist>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFOCR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFOCR">
|
||||||
<term><varname>pdfocr</varname></term>
|
<term><varname>pdfocr</varname></term>
|
||||||
<listitem><para>Attempt OCR of PDF files with no text content. This can be defined in subdirectories. The default is off because
|
<listitem><para>Attempt OCR of PDF files with no text content. This can be defined in subdirectories. The default is off because
|
||||||
OCR is so very slow.
|
OCR is so very slow.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFATTACH">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFATTACH">
|
||||||
<term><varname>pdfattach</varname></term>
|
<term><varname>pdfattach</varname></term>
|
||||||
<listitem><para>Enable PDF attachment extraction by executing pdftk (if
|
<listitem><para>Enable PDF attachment extraction by executing pdftk (if
|
||||||
available). This is
|
available). This is
|
||||||
normally disabled, because it does slow down PDF indexing a bit even if
|
normally disabled, because it does slow down PDF indexing a bit even if
|
||||||
not one attachment is ever found.
|
not one attachment is ever found.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETA">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETA">
|
||||||
<term><varname>pdfextrameta</varname></term>
|
<term><varname>pdfextrameta</varname></term>
|
||||||
<listitem><para>Extract text from selected XMP metadata tags. This
|
<listitem><para>Extract text from selected XMP metadata tags. This
|
||||||
@ -849,8 +726,7 @@ is a space-separated list of qualified XMP tag names. Each element can also
|
|||||||
include a translation to a Recoll field name, separated by a '|'
|
include a translation to a Recoll field name, separated by a '|'
|
||||||
character. If the second element is absent, the tag name is used as the
|
character. If the second element is absent, the tag name is used as the
|
||||||
Recoll field names. You will also need to add specifications to the
|
Recoll field names. You will also need to add specifications to the
|
||||||
"fields" file to direct processing of the extracted data.
|
"fields" file to direct processing of the extracted data.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETAFIX">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.PDFEXTRAMETAFIX">
|
||||||
<term><varname>pdfextrametafix</varname></term>
|
<term><varname>pdfextrametafix</varname></term>
|
||||||
<listitem><para>Define name of XMP field editing script. This
|
<listitem><para>Define name of XMP field editing script. This
|
||||||
@ -859,8 +735,7 @@ values. The script should define a 'MetaFixer' class with a metafix()
|
|||||||
method which will be called with the qualified tag name and value of each
|
method which will be called with the qualified tag name and value of each
|
||||||
selected field, for editing or erasing. A new instance is created for
|
selected field, for editing or erasing. A new instance is created for
|
||||||
each document, so that the object can keep state for, e.g. eliminating
|
each document, so that the object can keep state for, e.g. eliminating
|
||||||
duplicate values.
|
duplicate values.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.OCR">
|
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.OCR">
|
||||||
<title>Parameters for OCR processing </title><variablelist>
|
<title>Parameters for OCR processing </title><variablelist>
|
||||||
@ -872,20 +747,17 @@ the input file. Modules for tesseract (tesseract) and ABBYY FineReader
|
|||||||
(abbyy) are present in the standard distribution. For compatibility with
|
(abbyy) are present in the standard distribution. For compatibility with
|
||||||
the previous version, if this is not defined at all, the default value is
|
the previous version, if this is not defined at all, the default value is
|
||||||
"tesseract". Use an explicit empty value if needed. A value of "abbyy
|
"tesseract". Use an explicit empty value if needed. A value of "abbyy
|
||||||
tesseract" will try everything.
|
tesseract" will try everything.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.OCRCACHEDIR">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.OCRCACHEDIR">
|
||||||
<term><varname>ocrcachedir</varname></term>
|
<term><varname>ocrcachedir</varname></term>
|
||||||
<listitem><para>Location for caching OCR data. The default if this is empty or undefined is to store the cached
|
<listitem><para>Location for caching OCR data. The default if this is empty or undefined is to store the cached
|
||||||
OCR data under $RECOLL_CONFDIR/ocrcache.
|
OCR data under $RECOLL_CONFDIR/ocrcache.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTLANG">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTLANG">
|
||||||
<term><varname>tesseractlang</varname></term>
|
<term><varname>tesseractlang</varname></term>
|
||||||
<listitem><para>Language to assume for tesseract OCR. Important for improving the OCR accuracy. This can also be set
|
<listitem><para>Language to assume for tesseract OCR. Important for improving the OCR accuracy. This can also be set
|
||||||
through the contents of a file in
|
through the contents of a file in
|
||||||
the currently processed directory. See the rclocrtesseract.py
|
the currently processed directory. See the rclocrtesseract.py
|
||||||
script. Example values: eng, fra... See the tesseract documentation.
|
script. Example values: eng, fra... See the tesseract documentation.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTCMD">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.TESSERACTCMD">
|
||||||
<term><varname>tesseractcmd</varname></term>
|
<term><varname>tesseractcmd</varname></term>
|
||||||
<listitem><para>Path for the tesseract command. Do not quote. This is mostly useful on Windows, or for specifying a non-default
|
<listitem><para>Path for the tesseract command. Do not quote. This is mostly useful on Windows, or for specifying a non-default
|
||||||
@ -904,19 +776,11 @@ script. Typical values: English, French... See the ABBYY documentation.
|
|||||||
<listitem><para>Path for the abbyy command The ABBY directory is usually not in the path, so you should set this.
|
<listitem><para>Path for the abbyy command The ABBY directory is usually not in the path, so you should set this.
|
||||||
</para></listitem></varlistentry>
|
</para></listitem></varlistentry>
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.MISCHANDLERPARAMS">
|
|
||||||
<title>Parameters for specific handlers </title><variablelist>
|
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.ORGMODESUBDOCS">
|
|
||||||
<term><varname>orgmodesubdocs</varname></term>
|
|
||||||
<listitem><para>Index org-mode level 1 sections as separate sub-documents This is the default. If set to false, org-mode files will be indexed as plain text
|
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
|
||||||
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS">
|
<sect3 id="RCL.INSTALL.CONFIG.RECOLLCONF.SPECLOCATIONS">
|
||||||
<title>Parameters set for specific locations </title><variablelist>
|
<title>Parameters set for specific locations </title><variablelist>
|
||||||
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MHMBOXQUIRKS">
|
<varlistentry id="RCL.INSTALL.CONFIG.RECOLLCONF.MHMBOXQUIRKS">
|
||||||
<term><varname>mhmboxquirks</varname></term>
|
<term><varname>mhmboxquirks</varname></term>
|
||||||
<listitem><para>Enable thunderbird/mozilla-seamonkey mbox format quirks Set this for the directory where the email mbox files are
|
<listitem><para>Enable thunderbird/mozilla-seamonkey mbox format quirks Set this for the directory where the email mbox files are
|
||||||
stored.
|
stored.</para></listitem></varlistentry>
|
||||||
</para></listitem></varlistentry>
|
|
||||||
</variablelist></sect3>
|
</variablelist></sect3>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -20,6 +20,8 @@
|
|||||||
# All data is binary. This is important for Python3
|
# All data is binary. This is important for Python3
|
||||||
# All parameter names are converted to and processed as str/unicode
|
# All parameter names are converted to and processed as str/unicode
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -27,13 +29,25 @@ import shutil
|
|||||||
import getopt
|
import getopt
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
def makebytes(data):
|
PY3 = sys.version > '3'
|
||||||
if data is None:
|
|
||||||
return b""
|
if PY3:
|
||||||
if isinstance(data, bytes):
|
def makebytes(data):
|
||||||
return data
|
if data is None:
|
||||||
else:
|
return b""
|
||||||
return data.encode("UTF-8")
|
if isinstance(data, bytes):
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
return data.encode("UTF-8")
|
||||||
|
else:
|
||||||
|
def makebytes(data):
|
||||||
|
if data is None:
|
||||||
|
return ""
|
||||||
|
if isinstance(data, unicode):
|
||||||
|
return data.encode("UTF-8")
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
############################################
|
############################################
|
||||||
# CmdTalk implements the communication protocol with the master
|
# CmdTalk implements the communication protocol with the master
|
||||||
@ -102,7 +116,10 @@ class CmdTalk(object):
|
|||||||
# followed by data. The param name is returned as str/unicode, the data
|
# followed by data. The param name is returned as str/unicode, the data
|
||||||
# as bytes
|
# as bytes
|
||||||
def readparam(self):
|
def readparam(self):
|
||||||
inf = self.infile.buffer
|
if PY3:
|
||||||
|
inf = self.infile.buffer
|
||||||
|
else:
|
||||||
|
inf = self.infile
|
||||||
s = inf.readline()
|
s = inf.readline()
|
||||||
if s == b'':
|
if s == b'':
|
||||||
if self.exitfunc:
|
if self.exitfunc:
|
||||||
@ -126,7 +143,7 @@ class CmdTalk(object):
|
|||||||
(paramsize, len(paramdata)), 1, 1)
|
(paramsize, len(paramdata)), 1, 1)
|
||||||
else:
|
else:
|
||||||
paramdata = b''
|
paramdata = b''
|
||||||
if not self.nodecodeinput:
|
if PY3 and not self.nodecodeinput:
|
||||||
try:
|
try:
|
||||||
paramdata = paramdata.decode('utf-8')
|
paramdata = paramdata.decode('utf-8')
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
@ -137,11 +154,18 @@ class CmdTalk(object):
|
|||||||
# (paramname, paramsize, paramdata))
|
# (paramname, paramsize, paramdata))
|
||||||
return (paramname, paramdata)
|
return (paramname, paramdata)
|
||||||
|
|
||||||
def senditem(self, nm, data):
|
if PY3:
|
||||||
data = makebytes(data)
|
def senditem(self, nm, data):
|
||||||
l = len(data)
|
data = makebytes(data)
|
||||||
self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
|
l = len(data)
|
||||||
self.breakwrite(self.outfile.buffer, data)
|
self.outfile.buffer.write(makebytes("%s: %d\n" % (nm, l)))
|
||||||
|
self.breakwrite(self.outfile.buffer, data)
|
||||||
|
else:
|
||||||
|
def senditem(self, nm, data):
|
||||||
|
data = makebytes(data)
|
||||||
|
l = len(data)
|
||||||
|
self.outfile.write(makebytes("%s: %d\n" % (nm, l)))
|
||||||
|
self.breakwrite(self.outfile, data)
|
||||||
|
|
||||||
# Send answer: document, ipath, possible eof.
|
# Send answer: document, ipath, possible eof.
|
||||||
def answer(self, outfields):
|
def answer(self, outfields):
|
||||||
@ -218,7 +242,7 @@ def main(proto, processor):
|
|||||||
params[args[2*i]] = args[2*i+1]
|
params[args[2*i]] = args[2*i+1]
|
||||||
res = processor.process(params)
|
res = processor.process(params)
|
||||||
|
|
||||||
ioout = sys.stdout.buffer
|
ioout = sys.stdout.buffer if PY3 else sys.stdout
|
||||||
|
|
||||||
for nm,value in res.items():
|
for nm,value in res.items():
|
||||||
#self.log("Senditem: [%s] -> [%s]" % (nm, value))
|
#self.log("Senditem: [%s] -> [%s]" % (nm, value))
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/python3
|
||||||
#################################
|
#################################
|
||||||
# Copyright (C) 2020 J.F.Dockes
|
# Copyright (C) 2020 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
# 7-Zip file filter for Recoll
|
# 7-Zip file filter for Recoll
|
||||||
|
|
||||||
# Thanks to Recoll user Martin Ziegler
|
# Thanks to Recoll user Martin Ziegler
|
||||||
# This is a modified version of rclzip.py, with some help from rcltar.py
|
# This is a modified version of rclzip, with some help from rcltar
|
||||||
#
|
#
|
||||||
# Normally using py7zr https://github.com/miurahr/py7zr
|
# Normally using py7zr https://github.com/miurahr/py7zr
|
||||||
#
|
#
|
||||||
@ -238,7 +238,7 @@ class AudioTagExtractor(RclBaseHandler):
|
|||||||
if tagname.startswith('APIC:'):
|
if tagname.startswith('APIC:'):
|
||||||
#self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
|
#self.em.rclog("mp3 img: %s" % mutf[tagname].mime)
|
||||||
return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
|
return 'jpg' if mutf[tagname].mime == 'image/jpeg' else 'png'
|
||||||
elif 'audio/flac' in mime:
|
elif 'audio/x-flac' in mime:
|
||||||
if mutf.pictures:
|
if mutf.pictures:
|
||||||
return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
|
return 'jpg' if mutf.pictures[0].mime == 'image/jpeg' else 'png'
|
||||||
elif 'audio/mp4' in mime:
|
elif 'audio/mp4' in mime:
|
||||||
@ -351,11 +351,6 @@ class AudioTagExtractor(RclBaseHandler):
|
|||||||
# Metadata tags. The names vary depending on the file type. We
|
# Metadata tags. The names vary depending on the file type. We
|
||||||
# just have a big translation dictionary for all
|
# just have a big translation dictionary for all
|
||||||
for tag,val in mutf.items():
|
for tag,val in mutf.items():
|
||||||
#print(f"TAG {tag} VAL {val}", file=sys.stderr)
|
|
||||||
# Mutagen sends out COMM==eng= with tag COMM::eng We don't know what to do with the
|
|
||||||
# language (or possible other attributes), so get rid of it for now:
|
|
||||||
if tag.find("COMM::") == 0:
|
|
||||||
tag = "COMM"
|
|
||||||
if tag.find('TXXX:') == 0:
|
if tag.find('TXXX:') == 0:
|
||||||
tag = tag[5:].upper()
|
tag = tag[5:].upper()
|
||||||
elif tag.find('TXX:') == 0:
|
elif tag.find('TXX:') == 0:
|
||||||
@ -18,6 +18,8 @@
|
|||||||
# Base for extractor classes. With some common generic implementations
|
# Base for extractor classes. With some common generic implementations
|
||||||
# for the boilerplate functions.
|
# for the boilerplate functions.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
|||||||
@ -17,15 +17,9 @@
|
|||||||
# with retry set).
|
# with retry set).
|
||||||
#
|
#
|
||||||
|
|
||||||
# If $HOME does not exist, there is nothing we can do (happens, for example when run as upmpdcli)
|
|
||||||
if test ! -d "$HOME" ; then
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Bin dirs to be tested:
|
# Bin dirs to be tested:
|
||||||
bindirs="/usr/bin /usr/local/bin $HOME/bin /opt/*/bin"
|
bindirs="/usr/bin /usr/local/bin $HOME/bin /opt/*/bin"
|
||||||
|
|
||||||
|
|
||||||
rfiledir=$HOME/.config/Recoll.org
|
rfiledir=$HOME/.config/Recoll.org
|
||||||
rfile=$rfiledir/needidxretrydate
|
rfile=$rfiledir/needidxretrydate
|
||||||
nrfile=$rfiledir/tneedidxretrydate
|
nrfile=$rfiledir/tneedidxretrydate
|
||||||
|
|||||||
@ -1,11 +1,12 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
# dia (http://live.gnome.org/Dia) file filter for recoll
|
# dia (http://live.gnome.org/Dia) file filter for recoll
|
||||||
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
# stefan.friedel@iwr.uni-heidelberg.de 2012
|
||||||
#
|
#
|
||||||
# add the following to ~/.recoll/mimeconf into the [index] section:
|
# add the following to ~/.recoll/mimeconf into the [index] section:
|
||||||
# application/x-dia-diagram = execm rcldia.py;mimetype=text/plain;charset=utf-8
|
# application/x-dia-diagram = execm rcldia;mimetype=text/plain;charset=utf-8
|
||||||
# and into the [icons] section:
|
# and into the [icons] section:
|
||||||
# application/x-dia-diagram = drawing
|
# application/x-dia-diagram = drawing
|
||||||
# and finally under [categories]:
|
# and finally under [categories]:
|
||||||
@ -17,6 +17,8 @@
|
|||||||
|
|
||||||
# Recoll DJVU extractor
|
# Recoll DJVU extractor
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclexec1
|
import rclexec1
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/python3
|
||||||
"""Extract Html content from an EPUB file (.epub)"""
|
"""Extract Html content from an EPUB file (.epub)"""
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
rclepub_html_mtype = "text/html"
|
rclepub_html_mtype = "text/html"
|
||||||
|
|
||||||
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Extract Html content from an EPUB file (.chm), concatenating all sections"""
|
"""Extract Html content from an EPUB file (.chm), concatenating all sections"""
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
@ -26,6 +26,8 @@
|
|||||||
# this would be to slow. So this helps implementing a permanent script
|
# this would be to slow. So this helps implementing a permanent script
|
||||||
# to repeatedly execute single commands.
|
# to repeatedly execute single commands.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import rclexecm
|
import rclexecm
|
||||||
from rclbasehandler import RclBaseHandler
|
from rclbasehandler import RclBaseHandler
|
||||||
|
|||||||
@ -20,6 +20,8 @@
|
|||||||
# All data is binary. This is important for Python3
|
# All data is binary. This is important for Python3
|
||||||
# All parameter names are converted to and processed as str/unicode
|
# All parameter names are converted to and processed as str/unicode
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -28,6 +30,7 @@ import getopt
|
|||||||
import rclconfig
|
import rclconfig
|
||||||
import cmdtalk
|
import cmdtalk
|
||||||
|
|
||||||
|
PY3 = (sys.version > '3')
|
||||||
_g_mswindows = (sys.platform == "win32")
|
_g_mswindows = (sys.platform == "win32")
|
||||||
_g_execdir = os.path.dirname(sys.argv[0])
|
_g_execdir = os.path.dirname(sys.argv[0])
|
||||||
|
|
||||||
@ -59,11 +62,12 @@ def makebytes(data):
|
|||||||
# Possibly decode binary file name for use as subprocess argument,
|
# Possibly decode binary file name for use as subprocess argument,
|
||||||
# depending on platform.
|
# depending on platform.
|
||||||
def subprocfile(fn):
|
def subprocfile(fn):
|
||||||
# On Windows Python 3 the list2cmdline() method in subprocess assumes that all args are str, and
|
# On Windows PY3 the list2cmdline() method in subprocess assumes that
|
||||||
# we receive file names as UTF-8. So we need to convert.
|
# all args are str, and we receive file names as UTF-8. So we need
|
||||||
# On Unix all list elements get converted to bytes in the C _posixsubprocess module, nothing to
|
# to convert.
|
||||||
# do.
|
# On Unix all list elements get converted to bytes in the C
|
||||||
if _g_mswindows and type(fn) != type(''):
|
# _posixsubprocess module, nothing to do.
|
||||||
|
if PY3 and _g_mswindows and type(fn) != type(''):
|
||||||
return fn.decode('UTF-8')
|
return fn.decode('UTF-8')
|
||||||
else:
|
else:
|
||||||
return fn
|
return fn
|
||||||
@ -261,29 +265,18 @@ def execPythonScript(icmd):
|
|||||||
|
|
||||||
# Temp dir helper
|
# Temp dir helper
|
||||||
class SafeTmpDir:
|
class SafeTmpDir:
|
||||||
def __init__(self, tag, em=None):
|
def __init__(self, em):
|
||||||
self.tag = tag
|
|
||||||
self.em = em
|
self.em = em
|
||||||
self.toptmp = None
|
self.toptmp = ""
|
||||||
self.tmpdir = None
|
self.tmpdir = ""
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
if self.toptmp:
|
try:
|
||||||
try:
|
if self.toptmp:
|
||||||
if self.tmpdir:
|
shutil.rmtree(self.tmpdir, True)
|
||||||
shutil.rmtree(self.tmpdir, True)
|
|
||||||
os.rmdir(self.toptmp)
|
os.rmdir(self.toptmp)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
if self.em:
|
self.em.rclog("delete dir failed for " + self.toptmp)
|
||||||
self.em.rclog("delete dir failed for " + self.toptmp)
|
|
||||||
|
|
||||||
def vacuumdir(self):
|
|
||||||
if self.tmpdir:
|
|
||||||
for fn in os.listdir(self.tmpdir):
|
|
||||||
path = os.path.join(self.tmpdir, fn)
|
|
||||||
if os.path.isfile(path):
|
|
||||||
os.unlink(path)
|
|
||||||
return True
|
|
||||||
|
|
||||||
def getpath(self):
|
def getpath(self):
|
||||||
if not self.tmpdir:
|
if not self.tmpdir:
|
||||||
@ -293,7 +286,7 @@ class SafeTmpDir:
|
|||||||
else:
|
else:
|
||||||
self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
|
self.toptmp = tempfile.mkdtemp(prefix='rcltmp')
|
||||||
|
|
||||||
self.tmpdir = os.path.join(self.toptmp, self.tag)
|
self.tmpdir = os.path.join(self.toptmp, 'rclsofftmp')
|
||||||
os.makedirs(self.tmpdir)
|
os.makedirs(self.tmpdir)
|
||||||
|
|
||||||
return self.tmpdir
|
return self.tmpdir
|
||||||
@ -312,7 +305,8 @@ def main(proto, extract):
|
|||||||
# Not running the main loop: either acting as single filter (when called
|
# Not running the main loop: either acting as single filter (when called
|
||||||
# from other filter for example), or debugging
|
# from other filter for example), or debugging
|
||||||
def usage():
|
def usage():
|
||||||
print("Usage: rclexecm.py [-d] [-f] [-h] [-i ipath] [-s] <filename>", file=sys.stderr)
|
print("Usage: rclexecm.py [-d] [-s] [-i ipath] <filename>",
|
||||||
|
file=sys.stderr)
|
||||||
print(" rclexecm.py -w <prog>", file=sys.stderr)
|
print(" rclexecm.py -w <prog>", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@ -367,7 +361,7 @@ def main(proto, extract):
|
|||||||
|
|
||||||
params = {'filename' : makebytes(path)}
|
params = {'filename' : makebytes(path)}
|
||||||
|
|
||||||
# Some filters (e.g. rclaudio.py) need/get a MIME type from the indexer.
|
# Some filters (e.g. rclaudio) need/get a MIME type from the indexer.
|
||||||
# We make a half-assed attempt to emulate:
|
# We make a half-assed attempt to emulate:
|
||||||
mimetype = _g_config.mimeType(path)
|
mimetype = _g_config.mimeType(path)
|
||||||
if not mimetype and not _g_mswindows:
|
if not mimetype and not _g_mswindows:
|
||||||
@ -379,7 +373,10 @@ def main(proto, extract):
|
|||||||
print("Open error", file=sys.stderr)
|
print("Open error", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
ioout = sys.stdout.buffer
|
if PY3:
|
||||||
|
ioout = sys.stdout.buffer
|
||||||
|
else:
|
||||||
|
ioout = sys.stdout
|
||||||
if ipath != b"" or actAsSingle:
|
if ipath != b"" or actAsSingle:
|
||||||
params['ipath'] = ipath
|
params['ipath'] = ipath
|
||||||
ok, data, ipath, eof = extract.getipath(params)
|
ok, data, ipath, eof = extract.getipath(params)
|
||||||
|
|||||||
@ -16,6 +16,8 @@
|
|||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
######################################
|
######################################
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclxslt
|
import rclxslt
|
||||||
|
|||||||
@ -18,6 +18,8 @@
|
|||||||
|
|
||||||
# Base class for simple (one stylesheet) xslt-based handlers
|
# Base class for simple (one stylesheet) xslt-based handlers
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import rclxslt
|
import rclxslt
|
||||||
import gzip
|
import gzip
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/python3
|
||||||
# Copyright (C) 2020 J.F.Dockes
|
# Copyright (C) 2020 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
# Read an ICS file, break it into "documents" which are events, todos,
|
# Read an ICS file, break it into "documents" which are events, todos,
|
||||||
# or journal entries, and interface with recoll execm
|
# or journal entries, and interface with recoll execm
|
||||||
@ -6,6 +6,7 @@
|
|||||||
#
|
#
|
||||||
# Uses pyexiv2. Also tried Pillow, found it useless for tags.
|
# Uses pyexiv2. Also tried Pillow, found it useless for tags.
|
||||||
#
|
#
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|||||||
@ -3,6 +3,8 @@
|
|||||||
# Read a file in GNU info format and output its nodes as subdocs,
|
# Read a file in GNU info format and output its nodes as subdocs,
|
||||||
# interfacing with recoll execm
|
# interfacing with recoll execm
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
@ -139,7 +141,7 @@ class InfoSimpleSplitter:
|
|||||||
if name == b'File':
|
if name == b'File':
|
||||||
infofile = value
|
infofile = value
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print("rclinfo.py: bad line in %s: [%s] %s\n" % \
|
print("rclinfo: bad line in %s: [%s] %s\n" % \
|
||||||
(infofile, line, err), file = sys.stderr)
|
(infofile, line, err), file = sys.stderr)
|
||||||
nodename = prevnodename
|
nodename = prevnodename
|
||||||
node += line
|
node += line
|
||||||
@ -1,59 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# Copyright (C) 2021 J.F.Dockes
|
|
||||||
# This program is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU General Public License
|
|
||||||
# along with this program; if not, write to the
|
|
||||||
# Free Software Foundation, Inc.,
|
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
# Recoll handler for iPython / Jupyter notebook files.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import json
|
|
||||||
|
|
||||||
import rclexecm
|
|
||||||
from rclbasehandler import RclBaseHandler
|
|
||||||
|
|
||||||
class IPYNBextractor(RclBaseHandler):
|
|
||||||
|
|
||||||
def __init__(self, em):
|
|
||||||
super(IPYNBextractor, self).__init__(em)
|
|
||||||
|
|
||||||
def html_text(self, fn):
|
|
||||||
text = open(fn, 'rb').read()
|
|
||||||
data = json.loads(text)
|
|
||||||
mdtext = ""
|
|
||||||
if "worksheets" in data:
|
|
||||||
cells = data["worksheets"][0]["cells"]
|
|
||||||
else:
|
|
||||||
cells = data["cells"]
|
|
||||||
for cell in cells:
|
|
||||||
if cell["cell_type"] == "markdown":
|
|
||||||
mdtext += "\n"
|
|
||||||
for line in cell["source"]:
|
|
||||||
mdtext += "# " + line + "\n"
|
|
||||||
elif cell["cell_type"] == "code":
|
|
||||||
mdtext += "\n\n"
|
|
||||||
key = "source" if "source" in cell else "input"
|
|
||||||
for line in cell[key]:
|
|
||||||
mdtext += line
|
|
||||||
mdtext += "\n"
|
|
||||||
#print("%s"%mdtext, file=sys.stderr)
|
|
||||||
self.outputmimetype = 'text/plain'
|
|
||||||
return mdtext
|
|
||||||
|
|
||||||
|
|
||||||
# Main program: create protocol handler and extractor and run them
|
|
||||||
proto = rclexecm.RclExecM()
|
|
||||||
extract = IPYNBextractor(proto)
|
|
||||||
rclexecm.main(proto, extract)
|
|
||||||
@ -1,6 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# Read a .kar midi karaoke file and translate to recoll indexable format
|
# Read a .kar midi karaoke file and translate to recoll indexable format
|
||||||
|
# This does not work with Python3 yet because python:midi doesn't
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import sys
|
import sys
|
||||||
@ -44,7 +46,11 @@ htmltemplate = '''
|
|||||||
|
|
||||||
nlbytes = b'\n'
|
nlbytes = b'\n'
|
||||||
bsbytes = b'\\'
|
bsbytes = b'\\'
|
||||||
nullchar = 0
|
PY3 = sys.version > '3'
|
||||||
|
if PY3:
|
||||||
|
nullchar = 0
|
||||||
|
else:
|
||||||
|
nullchar = chr(0)
|
||||||
|
|
||||||
class KarTextExtractor(RclBaseHandler):
|
class KarTextExtractor(RclBaseHandler):
|
||||||
# Afaik, the only charset encodings with null bytes are variations on
|
# Afaik, the only charset encodings with null bytes are variations on
|
||||||
@ -13,7 +13,12 @@ epsilon with dasia (in unicode but not iso). Can this be replaced by either epsi
|
|||||||
with acute accent ?
|
with acute accent ?
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
PY3 = sys.version > '3'
|
||||||
|
if not PY3:
|
||||||
|
import string
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
@ -33,7 +38,10 @@ class European8859TextClassifier:
|
|||||||
# Table to translate from punctuation to spaces
|
# Table to translate from punctuation to spaces
|
||||||
self.punct = b'''0123456789<>/*?[].@+-,#_$%&={};.,:!"''' + b"'\n\r"
|
self.punct = b'''0123456789<>/*?[].@+-,#_$%&={};.,:!"''' + b"'\n\r"
|
||||||
spaces = len(self.punct) * b' '
|
spaces = len(self.punct) * b' '
|
||||||
self.spacetable = bytes.maketrans(self.punct, spaces)
|
if PY3:
|
||||||
|
self.spacetable = bytes.maketrans(self.punct, spaces)
|
||||||
|
else:
|
||||||
|
self.spacetable = string.maketrans(self.punct, spaces)
|
||||||
|
|
||||||
def readlanguages(self, langzip):
|
def readlanguages(self, langzip):
|
||||||
"""Extract the stop words lists from the zip file.
|
"""Extract the stop words lists from the zip file.
|
||||||
|
|||||||
@ -23,15 +23,24 @@
|
|||||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
# SOFTWARE.
|
# SOFTWARE.
|
||||||
#
|
#
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from struct import unpack, pack
|
from struct import unpack, pack
|
||||||
import six
|
import six
|
||||||
|
|
||||||
def next_byte_as_int(data):
|
PY3 = sys.version > '3'
|
||||||
return next(data)
|
|
||||||
def next_byte_as_char(data):
|
if PY3:
|
||||||
return bytes([next(data)])
|
def next_byte_as_int(data):
|
||||||
|
return next(data)
|
||||||
|
def next_byte_as_char(data):
|
||||||
|
return bytes([next(data)])
|
||||||
|
else:
|
||||||
|
def next_byte_as_int(data):
|
||||||
|
return ord(data.next())
|
||||||
|
def next_byte_as_char(data):
|
||||||
|
return next(data)
|
||||||
|
|
||||||
##
|
##
|
||||||
## Constants
|
## Constants
|
||||||
@ -261,8 +270,12 @@ class NoteEvent(Event):
|
|||||||
self.velocity)
|
self.velocity)
|
||||||
|
|
||||||
def decode_data(self):
|
def decode_data(self):
|
||||||
self.pitch = self.data[0]
|
if PY3:
|
||||||
self.velocity = self.data[1]
|
self.pitch = self.data[0]
|
||||||
|
self.velocity = self.data[1]
|
||||||
|
else:
|
||||||
|
self.pitch = ord(self.data[0])
|
||||||
|
self.velocity = ord(self.data[1])
|
||||||
|
|
||||||
|
|
||||||
class NoteOnEvent(NoteEvent):
|
class NoteOnEvent(NoteEvent):
|
||||||
@ -296,8 +309,12 @@ class ControlChangeEvent(Event):
|
|||||||
hex(ord(self.data[1])))
|
hex(ord(self.data[1])))
|
||||||
|
|
||||||
def decode_data(self):
|
def decode_data(self):
|
||||||
self.control = self.data[0]
|
if PY3:
|
||||||
self.value = self.data[1]
|
self.control = self.data[0]
|
||||||
|
self.value = self.data[1]
|
||||||
|
else:
|
||||||
|
self.control = ord(self.data[0])
|
||||||
|
self.value = ord(self.data[1])
|
||||||
|
|
||||||
|
|
||||||
class ProgramChangeEvent(Event):
|
class ProgramChangeEvent(Event):
|
||||||
@ -311,7 +328,10 @@ class ProgramChangeEvent(Event):
|
|||||||
hex(ord(self.data[0])))
|
hex(ord(self.data[0])))
|
||||||
|
|
||||||
def decode_data(self):
|
def decode_data(self):
|
||||||
self.value = self.data[0]
|
if PY3:
|
||||||
|
self.value = self.data[0]
|
||||||
|
else:
|
||||||
|
self.value = ord(self.data[0])
|
||||||
|
|
||||||
|
|
||||||
class ChannelAfterTouchEvent(Event):
|
class ChannelAfterTouchEvent(Event):
|
||||||
@ -336,8 +356,12 @@ class PitchWheelEvent(Event):
|
|||||||
hex(ord(self.data[1])))
|
hex(ord(self.data[1])))
|
||||||
|
|
||||||
def decode_data(self):
|
def decode_data(self):
|
||||||
first = self.data[0]
|
if PY3:
|
||||||
second = self.data[1]
|
first = self.data[0]
|
||||||
|
second = self.data[1]
|
||||||
|
else:
|
||||||
|
first = ord(self.data[0])
|
||||||
|
second = ord(self.data[1])
|
||||||
self.value = ((second << 7) | first) - 0x2000
|
self.value = ((second << 7) | first) - 0x2000
|
||||||
|
|
||||||
|
|
||||||
@ -437,7 +461,10 @@ class PortEvent(MetaEvent):
|
|||||||
|
|
||||||
def decode_data(self):
|
def decode_data(self):
|
||||||
assert(len(self.data) == 1)
|
assert(len(self.data) == 1)
|
||||||
self.port = self.data[0]
|
if PY3:
|
||||||
|
self.port = self.data[0]
|
||||||
|
else:
|
||||||
|
self.port = ord(self.data[0])
|
||||||
|
|
||||||
class TrackLoopEvent(MetaEvent):
|
class TrackLoopEvent(MetaEvent):
|
||||||
name = 'Track Loop'
|
name = 'Track Loop'
|
||||||
@ -471,7 +498,13 @@ class SetTempoEvent(MetaEvent):
|
|||||||
|
|
||||||
def decode_data(self):
|
def decode_data(self):
|
||||||
assert(len(self.data) == 3)
|
assert(len(self.data) == 3)
|
||||||
self.mpqn = (self.data[0] << 16) + (self.data[1] << 8) + self.data[2]
|
if PY3:
|
||||||
|
self.mpqn = (self.data[0] << 16) + (self.data[1] << 8) \
|
||||||
|
+ self.data[2]
|
||||||
|
else:
|
||||||
|
self.mpqn = (ord(self.data[0]) << 16) + (ord(self.data[1]) << 8) \
|
||||||
|
+ ord(self.data[2])
|
||||||
|
|
||||||
self.tempo = float(6e7) / self.mpqn
|
self.tempo = float(6e7) / self.mpqn
|
||||||
|
|
||||||
|
|
||||||
@ -490,13 +523,22 @@ class TimeSignatureEvent(MetaEvent):
|
|||||||
(super(TimeSignatureEvent, self).__str__(),
|
(super(TimeSignatureEvent, self).__str__(),
|
||||||
self.numerator, self.denominator,
|
self.numerator, self.denominator,
|
||||||
self.metronome, self.thirtyseconds)
|
self.metronome, self.thirtyseconds)
|
||||||
def decode_data(self):
|
if PY3:
|
||||||
assert(len(self.data) == 4)
|
def decode_data(self):
|
||||||
self.numerator = self.data[0]
|
assert(len(self.data) == 4)
|
||||||
# Weird: the denominator is two to the power of the data variable
|
self.numerator = self.data[0]
|
||||||
self.denominator = 2 ** self.data[1]
|
# Weird: the denominator is two to the power of the data variable
|
||||||
self.metronome = self.data[2]
|
self.denominator = 2 ** self.data[1]
|
||||||
self.thirtyseconds = self.data[3]
|
self.metronome = self.data[2]
|
||||||
|
self.thirtyseconds = self.data[3]
|
||||||
|
else:
|
||||||
|
def decode_data(self):
|
||||||
|
assert(len(self.data) == 4)
|
||||||
|
self.numerator = ord(self.data[0])
|
||||||
|
# Weird: the denominator is two to the power of the data variable
|
||||||
|
self.denominator = 2 ** ord(self.data[1])
|
||||||
|
self.metronome = ord(self.data[2])
|
||||||
|
self.thirtyseconds = ord(self.data[3])
|
||||||
|
|
||||||
|
|
||||||
class KeySignatureEvent(MetaEvent):
|
class KeySignatureEvent(MetaEvent):
|
||||||
|
|||||||
@ -26,8 +26,6 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import atexit
|
|
||||||
import signal
|
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
|
||||||
import rclconfig
|
import rclconfig
|
||||||
@ -35,27 +33,7 @@ import rclocrcache
|
|||||||
import rclexecm
|
import rclexecm
|
||||||
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
rclexecm.logmsg("rclocr: %s" % s)
|
rclexecm.logmsg(s)
|
||||||
|
|
||||||
ocrcleanupmodule = None
|
|
||||||
@atexit.register
|
|
||||||
def finalcleanup():
|
|
||||||
if ocrcleanupmodule:
|
|
||||||
ocrcleanupmodule.cleanocr()
|
|
||||||
|
|
||||||
def signal_handler(sig, frame):
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Not all signals necessary exist on all systems, use catch
|
|
||||||
try: signal.signal(signal.SIGHUP, signal_handler)
|
|
||||||
except: pass
|
|
||||||
try: signal.signal(signal.SIGINT, signal_handler)
|
|
||||||
except: pass
|
|
||||||
try: signal.signal(signal.SIGQUIT, signal_handler)
|
|
||||||
except: pass
|
|
||||||
try: signal.signal(signal.SIGTERM, signal_handler)
|
|
||||||
except: pass
|
|
||||||
|
|
||||||
|
|
||||||
def Usage():
|
def Usage():
|
||||||
_deb("Usage: rclocr.py <imagefilename>")
|
_deb("Usage: rclocr.py <imagefilename>")
|
||||||
@ -94,7 +72,7 @@ if incache:
|
|||||||
try:
|
try:
|
||||||
breakwrite(sys.stdout.buffer, data)
|
breakwrite(sys.stdout.buffer, data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_deb("error writing: %s" % e)
|
_deb("RCLOCR error writing: %s" % e)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
@ -134,7 +112,6 @@ if not ok:
|
|||||||
|
|
||||||
# The OCR module will retrieve its specific parameters from the
|
# The OCR module will retrieve its specific parameters from the
|
||||||
# configuration
|
# configuration
|
||||||
ocrcleanupmodule = ocr
|
|
||||||
status, data = ocr.runocr(config, path)
|
status, data = ocr.runocr(config, path)
|
||||||
|
|
||||||
if not status:
|
if not status:
|
||||||
|
|||||||
@ -42,9 +42,6 @@ abbyocrdir = ""
|
|||||||
def _deb(s):
|
def _deb(s):
|
||||||
rclexecm.logmsg(s)
|
rclexecm.logmsg(s)
|
||||||
|
|
||||||
def cleanocr():
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Return true if abbyy appears to be available
|
# Return true if abbyy appears to be available
|
||||||
def ocrpossible(config, path):
|
def ocrpossible(config, path):
|
||||||
global abbyyocrcmd
|
global abbyyocrcmd
|
||||||
|
|||||||
@ -22,63 +22,37 @@
|
|||||||
# OCR is extremely slow, caching the results is necessary.
|
# OCR is extremely slow, caching the results is necessary.
|
||||||
#
|
#
|
||||||
# The cache stores 2 kinds of objects:
|
# The cache stores 2 kinds of objects:
|
||||||
# - Path files are named from the hash of the image file path and contain the
|
# - Path files are named from the hash of the image file path and
|
||||||
# image data hash, the modification time and size of the image file at the
|
# contain the image data hash, the modification time and size of the
|
||||||
# time the OCR'd data was stored in the cache, and the image path itself (the
|
# image file at the time the OCR'd data was stored in the cache, and
|
||||||
# last is for purging only).
|
# the image path itself (the last is for purging only).
|
||||||
# - Data files are named with the hash of the image data and contain the
|
# - Data files are named with the hash of the image data and contain
|
||||||
# zlib-compressed OCR'd data.
|
# the zlib-compressed OCR'd data.
|
||||||
# - The cache Path and Data files are stored under top subdirectories: objects/
|
|
||||||
# and paths/.
|
|
||||||
#
|
#
|
||||||
# When retrieving data from the cache:
|
# When retrieving data from the cache:
|
||||||
# - We first use the image file size and modification time: if an entry exists
|
# - We first use the image file size and modification time: if an
|
||||||
# for the imagepath/mtime/size triplet, and is up to date, the corresponding
|
# entry exists for the imagepath/mtime/size triplet, and is up to
|
||||||
# data is obtained from the data file and returned.
|
# date, the corresponding data is obtained from the data file and
|
||||||
# - Else we then use the image data: if an entry exists for the computed hashed
|
# returned.
|
||||||
# value of the data, it is returned. This allows moving files around without
|
# - Else we then use the image data: if an entry exists for the
|
||||||
# needing to run OCR again, but of course, it is more expensive than the
|
# computed hashed value of the data, it is returned. This allows
|
||||||
# first step
|
# moving files around without needing to run OCR again, but of
|
||||||
|
# course, it is more expensive than the first step
|
||||||
#
|
#
|
||||||
# In both cases, the paths are hashed with sha1, and the first two characters of
|
# If we need to use the second step, as a side effect, a path file is
|
||||||
# the hash are used as a top level directory, the rest as a file name. E.g. for:
|
# created or updated so that the data will be found with the first
|
||||||
# pd,pf = self._hashpath(path), the result would be stored under pd/pf
|
# step next time around.
|
||||||
#
|
#
|
||||||
# If we need to use the second step, as a side effect, a path file is created or
|
# Purging the cache of obsolete data.
|
||||||
# updated so that the data will be found with the first step next time around.
|
|
||||||
#
|
#
|
||||||
# When processing embedded documents like email attachments, recoll uses
|
# - The cache path and data files are stored under 2 different
|
||||||
# temporary copies in TMPDIR (which defaults to /tmp) or RECOLL_TMPDIR. Of
|
# directories (objects, paths) to make purging easier.
|
||||||
# course the paths for the temporary files changes when re-processing a given
|
# - Purging the paths tree just involves walking it, reading the
|
||||||
# document. We do not store the Path file for data stored in TMPDIR or
|
# files, and checking the existence of the recorded paths.
|
||||||
# RECOLL_TMPDIR, because doing so would cause an indefinite accumulation of
|
# - There is no easy way to purge the data tree. The only possibility
|
||||||
# unusable Path files. This means that access to the OCR data for these
|
# is to input a list of possible source files (e.g. result of a
|
||||||
# documents always causes the computation of the data hash, and is slower. With
|
# find in the image files area), and compute all the hashes. Data
|
||||||
# recent Recoll versions which cache the text content in the index, this only
|
# files which do not match one of the hashes are deleted.
|
||||||
# occurs when reindexing (with older versions, this could also occur for
|
|
||||||
# Preview).
|
|
||||||
#
|
|
||||||
# Purging the cache of obsolete data:
|
|
||||||
#
|
|
||||||
# This can be done by running this file as a top level script with a --purge
|
|
||||||
# option (possibly completed by a --purgedata option but see below)
|
|
||||||
# - Purging the paths tree just involves walking it, reading the files, and
|
|
||||||
# checking the existence of the recorded paths. Path files for non-existent
|
|
||||||
# files are deleted.
|
|
||||||
# - Purging the data tree: we make a list of all Data files referenced by at
|
|
||||||
# least one Path file, then walk the data tree, deleting unreferenced
|
|
||||||
# files. This means that Data files from temporary document copies (see
|
|
||||||
# above) will be deleted, which is quite unsatisfying. This would be
|
|
||||||
# difficult to change:
|
|
||||||
# - There is no way to detect the affected files because the Data files store
|
|
||||||
# no origin information
|
|
||||||
# - Even if we wanted to store an indication that the data file comes from a
|
|
||||||
# temporary document, we'd have no way to access the original document
|
|
||||||
# because the full ipath is not available. Changing this would be close to
|
|
||||||
# impossible because internfile...
|
|
||||||
# In consequence the --purgedata option must be explicitely added for a data
|
|
||||||
# purge to be performed. Only set it if re-OCRing all embedded documents is reasonable.
|
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
@ -87,18 +61,10 @@ import urllib.parse
|
|||||||
import zlib
|
import zlib
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
from rclexecm import logmsg as _deb
|
import rclexecm
|
||||||
|
|
||||||
def _catslash(p):
|
def _deb(s):
|
||||||
if p and p[-1] != "/":
|
rclexecm.logmsg(s)
|
||||||
p += "/"
|
|
||||||
return p
|
|
||||||
|
|
||||||
|
|
||||||
_tmpdir = os.environ["TMPDIR"] if "TMPDIR" in os.environ else "/tmp"
|
|
||||||
_tmpdir = _catslash(_tmpdir)
|
|
||||||
_recoll_tmpdir = os.environ["RECOLL_TMPDIR"] if "RECOLL_TMPDIR" in os.environ else None
|
|
||||||
_recoll_tmpdir = _catslash(_recoll_tmpdir)
|
|
||||||
|
|
||||||
|
|
||||||
class OCRCache(object):
|
class OCRCache(object):
|
||||||
@ -124,7 +90,7 @@ class OCRCache(object):
|
|||||||
|
|
||||||
# Compute sha1 of path data contents, as two parts of 2 and 38 chars
|
# Compute sha1 of path data contents, as two parts of 2 and 38 chars
|
||||||
def _hashdata(self, path):
|
def _hashdata(self, path):
|
||||||
# _deb("Hashing DATA")
|
#_deb("Hashing DATA")
|
||||||
m = hashlib.sha1()
|
m = hashlib.sha1()
|
||||||
with open(path, "rb") as f:
|
with open(path, "rb") as f:
|
||||||
while True:
|
while True:
|
||||||
@ -135,35 +101,35 @@ class OCRCache(object):
|
|||||||
h = m.hexdigest()
|
h = m.hexdigest()
|
||||||
return h[0:2], h[2:]
|
return h[0:2], h[2:]
|
||||||
|
|
||||||
|
|
||||||
def _readpathfile(self, ppf):
|
def _readpathfile(self, ppf):
|
||||||
'''Read path file and return values. We do not decode the image path
|
'''Read path file and return values. We do not decode the image path
|
||||||
as this is only used for purging'''
|
as this is only used for purging'''
|
||||||
with open(ppf, 'r') as f:
|
with open(ppf, 'r') as f:
|
||||||
line = f.read()
|
line = f.read()
|
||||||
dd, df, tm, sz, pth = line.split()
|
dd,df,tm,sz,pth = line.split()
|
||||||
tm = int(tm)
|
tm = int(tm)
|
||||||
sz = int(sz)
|
sz = int(sz)
|
||||||
return dd, df, tm, sz, pth
|
return dd,df,tm,sz,pth
|
||||||
|
|
||||||
# Try to read the stored attributes for a given path: data hash,
|
# Try to read the stored attributes for a given path: data hash,
|
||||||
# modification time and size. If this fails, the path itself is
|
# modification time and size. If this fails, the path itself is
|
||||||
# not cached (but the data still might be, maybe the file was moved)
|
# not cached (but the data still might be, maybe the file was moved)
|
||||||
def _cachedpathattrs(self, path):
|
def _cachedpathattrs(self, path):
|
||||||
pd, pf = self._hashpath(path)
|
pd,pf = self._hashpath(path)
|
||||||
pathfilepath = os.path.join(self.pathdir, pd, pf)
|
pathfilepath = os.path.join(self.pathdir, pd, pf)
|
||||||
if not os.path.exists(pathfilepath):
|
if not os.path.exists(pathfilepath):
|
||||||
return False, None, None, None, None
|
return False, None, None, None, None
|
||||||
try:
|
try:
|
||||||
dd, df, tm, sz, pth = self._readpathfile(pathfilepath)
|
dd, df, tm, sz, pth = self._readpathfile(pathfilepath)
|
||||||
return True, dd, df, tm, sz
|
return True, dd, df, tm, sz
|
||||||
except Exception as ex:
|
except:
|
||||||
_deb(f"Error while trying to access pathfile {pathfilepath}: {ex}")
|
|
||||||
return False, None, None, None, None
|
return False, None, None, None, None
|
||||||
|
|
||||||
# Compute the path hash, and get the mtime and size for given
|
# Compute the path hash, and get the mtime and size for given
|
||||||
# path, for updating the cache path file
|
# path, for updating the cache path file
|
||||||
def _newpathattrs(self, path):
|
def _newpathattrs(self, path):
|
||||||
pd, pf = self._hashpath(path)
|
pd,pf = self._hashpath(path)
|
||||||
tm = int(os.path.getmtime(path))
|
tm = int(os.path.getmtime(path))
|
||||||
sz = int(os.path.getsize(path))
|
sz = int(os.path.getsize(path))
|
||||||
return pd, pf, tm, sz
|
return pd, pf, tm, sz
|
||||||
@ -176,25 +142,31 @@ class OCRCache(object):
|
|||||||
if not ret:
|
if not ret:
|
||||||
return False, None, None
|
return False, None, None
|
||||||
pd, pf, ntm, nsz = self._newpathattrs(path)
|
pd, pf, ntm, nsz = self._newpathattrs(path)
|
||||||
# _deb(" tm %d sz %d" % (ntm, nsz))
|
#_deb(" tm %d sz %d" % (ntm, nsz))
|
||||||
# _deb("otm %d osz %d" % (otm, osz))
|
#_deb("otm %d osz %d" % (otm, osz))
|
||||||
if otm != ntm or osz != nsz:
|
if otm != ntm or osz != nsz:
|
||||||
return False, None, None
|
return False, None, None
|
||||||
return True, od, of
|
return True, od, of
|
||||||
|
|
||||||
|
# Check if cache appears up to date for path (no data check),
|
||||||
|
# return True/False
|
||||||
|
def pathincache(self, path):
|
||||||
|
ret, dd, df = self._pathincache(path)
|
||||||
|
return ret
|
||||||
|
|
||||||
# Compute the data file name for path. Expensive: we compute the data hash.
|
# Compute the data file name for path. Expensive: we compute the data hash.
|
||||||
# Return both the data file path and path elements (for storage in path file)
|
# Return both the data file path and path elements (for storage in path file)
|
||||||
def _datafilename(self, path):
|
def _datafilename(self, path):
|
||||||
d, f = self._hashdata(path)
|
d, f = self._hashdata(path)
|
||||||
return os.path.join(self.objdir, d, f), d, f
|
return os.path.join(self.objdir, d, f), d, f
|
||||||
|
|
||||||
|
# Check if the data for path is in cache: expensive, needs to
|
||||||
|
# compute the hash for the path's data contents. Returns True/False
|
||||||
|
def dataincache(self, path):
|
||||||
|
return os.path.exists(self._datafilename(path)[0])
|
||||||
|
|
||||||
# Create path file with given elements.
|
# Create path file with given elements.
|
||||||
def _updatepathfile(self, pd, pf, dd, df, tm, sz, path):
|
def _updatepathfile(self, pd, pf, dd, df, tm, sz, path):
|
||||||
global _tmpdir, _recoll_tmpdir
|
|
||||||
if (_tmpdir and path.startswith(_tmpdir)) or \
|
|
||||||
(_recoll_tmpdir and path.startswith(_recoll_tmpdir)):
|
|
||||||
_deb(f"ocrcache: not storing path data for temporary file {path}")
|
|
||||||
return
|
|
||||||
dir = os.path.join(self.pathdir, pd)
|
dir = os.path.join(self.pathdir, pd)
|
||||||
if not os.path.exists(dir):
|
if not os.path.exists(dir):
|
||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
@ -206,7 +178,7 @@ class OCRCache(object):
|
|||||||
# Store data for path. Only rewrite an existing data file if told
|
# Store data for path. Only rewrite an existing data file if told
|
||||||
# to do so: this is only useful if we are forcing an OCR re-run.
|
# to do so: this is only useful if we are forcing an OCR re-run.
|
||||||
def store(self, path, datatostore, force=False):
|
def store(self, path, datatostore, force=False):
|
||||||
dd, df = self._hashdata(path)
|
dd,df = self._hashdata(path)
|
||||||
pd, pf, tm, sz = self._newpathattrs(path)
|
pd, pf, tm, sz = self._newpathattrs(path)
|
||||||
self._updatepathfile(pd, pf, dd, df, tm, sz, path)
|
self._updatepathfile(pd, pf, dd, df, tm, sz, path)
|
||||||
dir = os.path.join(self.objdir, dd)
|
dir = os.path.join(self.objdir, dd)
|
||||||
@ -214,7 +186,7 @@ class OCRCache(object):
|
|||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
dfile = os.path.join(dir, df)
|
dfile = os.path.join(dir, df)
|
||||||
if force or not os.path.exists(dfile):
|
if force or not os.path.exists(dfile):
|
||||||
# _deb("Storing data")
|
#_deb("Storing data")
|
||||||
cpressed = zlib.compress(datatostore)
|
cpressed = zlib.compress(datatostore)
|
||||||
with open(dfile, "wb") as f:
|
with open(dfile, "wb") as f:
|
||||||
f.write(cpressed)
|
f.write(cpressed)
|
||||||
@ -231,12 +203,11 @@ class OCRCache(object):
|
|||||||
dfn, dd, df = self._datafilename(path)
|
dfn, dd, df = self._datafilename(path)
|
||||||
|
|
||||||
if not os.path.exists(dfn):
|
if not os.path.exists(dfn):
|
||||||
_deb(f"ocrcache: no existing OCR data file for {path}")
|
|
||||||
return False, b""
|
return False, b""
|
||||||
|
|
||||||
if not pincache:
|
if not pincache:
|
||||||
# File may have moved. Create/Update path file for next time
|
# File has moved. create/Update path file for next time
|
||||||
_deb(f"ocrcache::get: data ok but path file for {path} does not exist: creating it")
|
_deb("ocrcache::get file %s was moved, updating path data" % path)
|
||||||
pd, pf, tm, sz = self._newpathattrs(path)
|
pd, pf, tm, sz = self._newpathattrs(path)
|
||||||
self._updatepathfile(pd, pf, dd, df, tm, sz, path)
|
self._updatepathfile(pd, pf, dd, df, tm, sz, path)
|
||||||
|
|
||||||
@ -252,7 +223,7 @@ class OCRCache(object):
|
|||||||
ntm = int(os.path.getmtime(origpath))
|
ntm = int(os.path.getmtime(origpath))
|
||||||
nsz = int(os.path.getsize(origpath))
|
nsz = int(os.path.getsize(origpath))
|
||||||
if ntm != otm or nsz != osz:
|
if ntm != otm or nsz != osz:
|
||||||
# _deb("Purgepaths otm %d ntm %d osz %d nsz %d"%(otm, ntm, osz, nsz))
|
#_deb("Purgepaths otm %d ntm %d osz %d nsz %d"%(otm, ntm, osz, nsz))
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -280,15 +251,15 @@ class OCRCache(object):
|
|||||||
def _pgdt_pathcb(self, f):
|
def _pgdt_pathcb(self, f):
|
||||||
'''Get a pathfile name, read it, and record datafile identifier
|
'''Get a pathfile name, read it, and record datafile identifier
|
||||||
(concatenate data file subdir and file name)'''
|
(concatenate data file subdir and file name)'''
|
||||||
# _deb("_pgdt_pathcb: %s" % f)
|
#_deb("_pgdt_pathcb: %s" % f)
|
||||||
dd, df, tm, sz, orgpath = self._readpathfile(f)
|
dd, df, tm, sz, orgpath = self._readpathfile(f)
|
||||||
self._pgdt_alldatafns.add(dd+df)
|
self._pgdt_alldatafns.add(dd+df)
|
||||||
|
|
||||||
def _pgdt_datacb(self, datafn):
|
def _pgdt_datacb(self, datafn):
|
||||||
'''Get a datafile name and check that it is referenced by a previously
|
'''Get a datafile name and check that it is referenced by a previously
|
||||||
seen pathfile'''
|
seen pathfile'''
|
||||||
p1, fn = os.path.split(datafn)
|
p1,fn = os.path.split(datafn)
|
||||||
p2, dn = os.path.split(p1)
|
p2,dn = os.path.split(p1)
|
||||||
tst = dn+fn
|
tst = dn+fn
|
||||||
if tst in self._pgdt_alldatafns:
|
if tst in self._pgdt_alldatafns:
|
||||||
_deb("purgedata: ok : %s" % datafn)
|
_deb("purgedata: ok : %s" % datafn)
|
||||||
@ -311,59 +282,48 @@ class OCRCache(object):
|
|||||||
self._walk(self.objdir, self._pgdt_datacb)
|
self._walk(self.objdir, self._pgdt_datacb)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import rclconfig
|
import rclconfig
|
||||||
import getopt
|
def _Usage():
|
||||||
|
_deb("Usage: rclocrcache.py --purge")
|
||||||
def Usage(f=sys.stderr):
|
|
||||||
print("Usage: rclocrcache.py --purge [--purgedata]", file=f)
|
|
||||||
print("Usage: rclocrcache.py --store <imgdatapath> <ocrdatapath>", file=f)
|
|
||||||
print("Usage: rclocrcache.py --get <imgdatapath>", file=f)
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
_Usage()
|
||||||
|
if sys.argv[1] != "--purge":
|
||||||
|
_Usage()
|
||||||
|
|
||||||
conf = rclconfig.RclConfig()
|
conf = rclconfig.RclConfig()
|
||||||
cache = OCRCache(conf)
|
cache = OCRCache(conf)
|
||||||
opts, args = getopt.getopt(sys.argv[1:], "h", ["help", "purge", "purgedata", "store", "get"])
|
cache.purgepaths()
|
||||||
purgedata = False
|
cache.purgedata()
|
||||||
purge = False
|
sys.exit(0)
|
||||||
|
|
||||||
for opt, arg in opts:
|
|
||||||
if opt in ['-h', '--help']:
|
|
||||||
Usage(sys.stdout)
|
|
||||||
elif opt in ['--purgedata']:
|
|
||||||
purgedata = True
|
|
||||||
elif opt in ['--purge']:
|
|
||||||
if len(args) != 0:
|
|
||||||
Usage()
|
|
||||||
purge = True
|
|
||||||
elif opt in ['--store']:
|
|
||||||
if len(args) != 2:
|
|
||||||
Usage()
|
|
||||||
imgdatapath = args[0]
|
|
||||||
ocrdatapath = args[1]
|
|
||||||
ocrdata = open(ocrdatapath, "rb").read()
|
|
||||||
cache.store(imgdatapath, ocrdata, force=False)
|
|
||||||
sys.exit(0)
|
|
||||||
elif opt in ['--get']:
|
|
||||||
if len(args) != 1:
|
|
||||||
Usage()
|
|
||||||
imgdatapath = args[0]
|
|
||||||
incache, data = cache.get(imgdatapath)
|
|
||||||
if incache:
|
|
||||||
print(f"OCR data from cache {data}")
|
|
||||||
sys.exit(0)
|
|
||||||
else:
|
|
||||||
print("OCR Data was not found in cache", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
else:
|
|
||||||
print(f"Unknown option {opt}", file=sys.stderr)
|
|
||||||
Usage()
|
|
||||||
|
|
||||||
# End options. Need purging ?
|
|
||||||
if purge:
|
|
||||||
cache.purgepaths()
|
|
||||||
if purgedata:
|
|
||||||
cache.purgedata()
|
|
||||||
|
|
||||||
Usage()
|
|
||||||
|
|
||||||
|
# def trycache(p):
|
||||||
|
# _deb("== CACHE tests for %s"%p)
|
||||||
|
# ret = cache.pathincache(p)
|
||||||
|
# s = "" if ret else " not"
|
||||||
|
# _deb("path for %s%s in cache" % (p, s))
|
||||||
|
# if not ret:
|
||||||
|
# return False
|
||||||
|
# ret = cache.dataincache(p)
|
||||||
|
# s = "" if ret else " not"
|
||||||
|
# _deb("data for %s%s in cache" % (p, s))
|
||||||
|
# return ret
|
||||||
|
# def trystore(p):
|
||||||
|
# _deb("== STORE test for %s" % p)
|
||||||
|
# cache.store(p, b"my OCR'd text is one line\n", force=False)
|
||||||
|
# def tryget(p):
|
||||||
|
# _deb("== GET test for %s" % p)
|
||||||
|
# incache, data = cache.get(p)
|
||||||
|
# if incache:
|
||||||
|
# _deb("Data from cache [%s]" % data)
|
||||||
|
# else:
|
||||||
|
# _deb("Data was not found in cache")
|
||||||
|
# return incache, data
|
||||||
|
# if False:
|
||||||
|
# path = sys.argv[1]
|
||||||
|
# incache, data = tryget(path)
|
||||||
|
# if not incache:
|
||||||
|
# trystore(path)
|
||||||
|
#
|
||||||
|
|||||||
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import atexit
|
||||||
import tempfile
|
import tempfile
|
||||||
import subprocess
|
import subprocess
|
||||||
import glob
|
import glob
|
||||||
@ -37,28 +38,39 @@ _okexts = ('.tif', '.tiff', '.jpg', '.png', '.jpeg')
|
|||||||
|
|
||||||
tesseractcmd = None
|
tesseractcmd = None
|
||||||
pdftoppmcmd = None
|
pdftoppmcmd = None
|
||||||
pdftocairocmd = None
|
|
||||||
|
|
||||||
def _deb(s):
|
def _deb(s):
|
||||||
rclexecm.logmsg("rclocrtesseract: %s" % s)
|
rclexecm.logmsg(s)
|
||||||
|
|
||||||
|
|
||||||
|
def vacuumdir(dir):
|
||||||
|
if dir:
|
||||||
|
for fn in os.listdir(dir):
|
||||||
|
path = os.path.join(dir, fn)
|
||||||
|
if os.path.isfile(path):
|
||||||
|
os.unlink(path)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
tmpdir = None
|
tmpdir = None
|
||||||
|
|
||||||
def _maybemaketmpdir():
|
def _maybemaketmpdir():
|
||||||
global tmpdir
|
global tmpdir
|
||||||
if tmpdir:
|
if tmpdir:
|
||||||
if not tmpdir.vacuumdir():
|
if not vacuumdir(tmpdir):
|
||||||
_deb("openfile: vacuumdir %s failed" % tmpdir.getpath())
|
_deb("openfile: vacuumdir %s failed" % tmpdir)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
tmpdir = rclexecm.SafeTmpDir("rclocrtesseract")
|
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
||||||
|
|
||||||
|
|
||||||
def cleanocr():
|
def finalcleanup():
|
||||||
global tmpdir
|
|
||||||
if tmpdir:
|
if tmpdir:
|
||||||
del tmpdir
|
vacuumdir(tmpdir)
|
||||||
tmpdir = None
|
os.rmdir(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
atexit.register(finalcleanup)
|
||||||
|
|
||||||
|
|
||||||
# Return true if tesseract and the appropriate conversion program for
|
# Return true if tesseract and the appropriate conversion program for
|
||||||
@ -95,16 +107,12 @@ def ocrpossible(config, path):
|
|||||||
# legacy code used pdftoppm for some reason, and it appears
|
# legacy code used pdftoppm for some reason, and it appears
|
||||||
# that the newest builds from conda-forge do not include
|
# that the newest builds from conda-forge do not include
|
||||||
# pdftocairo. So stay with pdftoppm.
|
# pdftocairo. So stay with pdftoppm.
|
||||||
global pdftoppmcmd, pdftocairocmd
|
global pdftoppmcmd
|
||||||
if not pdftoppmcmd and not pdftocairocmd:
|
if not pdftoppmcmd:
|
||||||
pdftocairocmd = rclexecm.which("pdftocairo")
|
pdftoppmcmd = rclexecm.which("pdftoppm")
|
||||||
if not pdftocairocmd:
|
if not pdftoppmcmd:
|
||||||
pdftocairocmd = rclexecm.which("poppler/pdftocairo")
|
pdftoppmcmd = rclexecm.which("poppler/pdftoppm")
|
||||||
if not pdftocairocmd:
|
if pdftoppmcmd:
|
||||||
pdftoppmcmd = rclexecm.which("pdftoppm")
|
|
||||||
if not pdftoppmcmd:
|
|
||||||
pdftoppmcmd = rclexecm.which("poppler/pdftoppm")
|
|
||||||
if pdftoppmcmd or pdftocairocmd:
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
@ -161,17 +169,14 @@ def _pdftesseract(config, path):
|
|||||||
|
|
||||||
tesseractlang = _guesstesseractlang(config, path)
|
tesseractlang = _guesstesseractlang(config, path)
|
||||||
|
|
||||||
#tesserrorfile = os.path.join(tmpdir.getpath(), "tesserrorfile")
|
#tesserrorfile = os.path.join(tmpdir, "tesserrorfile")
|
||||||
tmpfile = os.path.join(tmpdir.getpath(), "ocrXXXXXX")
|
tmpfile = os.path.join(tmpdir, "ocrXXXXXX")
|
||||||
|
|
||||||
# Split pdf pages
|
# Split pdf pages
|
||||||
try:
|
try:
|
||||||
tmpdir.vacuumdir()
|
vacuumdir(tmpdir)
|
||||||
if pdftocairocmd:
|
cmd = [pdftoppmcmd, "-r", "300", path, tmpfile]
|
||||||
cmd = [pdftocairocmd, "-tiff", "-tiffcompression", "lzw", "-r", "300", path, tmpfile]
|
#_deb("Executing %s" % cmd)
|
||||||
else:
|
|
||||||
cmd = [pdftoppmcmd, "-r", "300", path, tmpfile]
|
|
||||||
#_deb("Executing %s" % cmd)
|
|
||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_deb("%s failed: %s" % (pdftoppmcmd,e))
|
_deb("%s failed: %s" % (pdftoppmcmd,e))
|
||||||
@ -181,8 +186,8 @@ def _pdftesseract(config, path):
|
|||||||
# system is full. There is no really good way to check for
|
# system is full. There is no really good way to check for
|
||||||
# this. We consider any empty file to signal an error
|
# this. We consider any empty file to signal an error
|
||||||
|
|
||||||
pages = glob.glob(tmpfile + "*")
|
ppmfiles = glob.glob(tmpfile + "*")
|
||||||
for f in pages:
|
for f in ppmfiles:
|
||||||
size = os.path.getsize(f)
|
size = os.path.getsize(f)
|
||||||
if os.path.getsize(f) == 0:
|
if os.path.getsize(f) == 0:
|
||||||
_deb("pdftoppm created empty files. "
|
_deb("pdftoppm created empty files. "
|
||||||
@ -198,7 +203,7 @@ def _pdftesseract(config, path):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for f in sorted(pages):
|
for f in sorted(ppmfiles):
|
||||||
out = b''
|
out = b''
|
||||||
try:
|
try:
|
||||||
out = subprocess.check_output(
|
out = subprocess.check_output(
|
||||||
|
|||||||
@ -1,37 +1,18 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# Copyright (C) 2020-2022 J.F.Dockes
|
from __future__ import print_function
|
||||||
# This program is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU General Public License as published by
|
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# This program is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU General Public License
|
|
||||||
# along with this program; if not, write to the
|
|
||||||
# Free Software Foundation, Inc.,
|
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
|
|
||||||
'''Read an org-mode file, optionally break it into subdocs" along level 1 headings'''
|
# Read an org-mode file, break it into "documents" along the separator lines
|
||||||
|
# and interface with recoll execm
|
||||||
import sys
|
|
||||||
import re
|
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclconfig
|
import sys
|
||||||
import conftree
|
import re
|
||||||
|
|
||||||
class OrgModeExtractor:
|
class OrgModeExtractor:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.file = ""
|
self.file = ""
|
||||||
|
self.contents = []
|
||||||
self.em = em
|
self.em = em
|
||||||
self.selftext = ""
|
|
||||||
self.docs = []
|
|
||||||
config = rclconfig.RclConfig()
|
|
||||||
self.createsubdocs = conftree.valToBool(config.getConfParam("orgmodesubdocs"))
|
|
||||||
|
|
||||||
def extractone(self, index):
|
def extractone(self, index):
|
||||||
if index >= len(self.docs):
|
if index >= len(self.docs):
|
||||||
@ -42,7 +23,7 @@ class OrgModeExtractor:
|
|||||||
iseof = rclexecm.RclExecM.noteof
|
iseof = rclexecm.RclExecM.noteof
|
||||||
if self.currentindex >= len(self.docs) -1:
|
if self.currentindex >= len(self.docs) -1:
|
||||||
iseof = rclexecm.RclExecM.eofnext
|
iseof = rclexecm.RclExecM.eofnext
|
||||||
self.em.setmimetype("text/x-orgmode-sub")
|
self.em.setmimetype("text/plain")
|
||||||
try:
|
try:
|
||||||
self.em.setfield("title", docdata.splitlines()[0])
|
self.em.setfield("title", docdata.splitlines()[0])
|
||||||
except:
|
except:
|
||||||
@ -52,6 +33,7 @@ class OrgModeExtractor:
|
|||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
self.file = params["filename"]
|
self.file = params["filename"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = open(self.file, "rb").read()
|
data = open(self.file, "rb").read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -59,15 +41,9 @@ class OrgModeExtractor:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
self.currentindex = -1
|
self.currentindex = -1
|
||||||
if not self.createsubdocs:
|
|
||||||
self.selftext = data
|
|
||||||
return True
|
|
||||||
|
|
||||||
res = rb'''^\* '''
|
res = rb'''^\* '''
|
||||||
self.docs = re.compile(res, flags=re.MULTILINE).split(data)
|
self.docs = re.compile(res, flags=re.MULTILINE).split(data)
|
||||||
# Note that there can be text before the first heading. This goes into the self doc,
|
|
||||||
# because it's not a proper entry.
|
|
||||||
self.selftext = self.docs[0]
|
|
||||||
self.docs = self.docs[1:]
|
self.docs = self.docs[1:]
|
||||||
#self.em.rclog("openfile: Entry count: %d" % len(self.docs))
|
#self.em.rclog("openfile: Entry count: %d" % len(self.docs))
|
||||||
return True
|
return True
|
||||||
@ -83,8 +59,6 @@ class OrgModeExtractor:
|
|||||||
return self.extractone(index)
|
return self.extractone(index)
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
if not self.createsubdocs:
|
|
||||||
return (True, self.selftext, "", rclexecm.RclExecM.eofnext)
|
|
||||||
|
|
||||||
if self.currentindex == -1:
|
if self.currentindex == -1:
|
||||||
# Return "self" doc
|
# Return "self" doc
|
||||||
@ -94,7 +68,7 @@ class OrgModeExtractor:
|
|||||||
eof = rclexecm.RclExecM.eofnext
|
eof = rclexecm.RclExecM.eofnext
|
||||||
else:
|
else:
|
||||||
eof = rclexecm.RclExecM.noteof
|
eof = rclexecm.RclExecM.noteof
|
||||||
return (True, self.selftext, "", eof)
|
return (True, "", "", eof)
|
||||||
|
|
||||||
if self.currentindex >= len(self.docs):
|
if self.currentindex >= len(self.docs):
|
||||||
self.em.rclog("getnext: EOF hit")
|
self.em.rclog("getnext: EOF hit")
|
||||||
|
|||||||
@ -33,7 +33,6 @@ import glob
|
|||||||
import traceback
|
import traceback
|
||||||
import atexit
|
import atexit
|
||||||
import signal
|
import signal
|
||||||
import time
|
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclconfig
|
import rclconfig
|
||||||
@ -67,17 +66,11 @@ _htmlprefix =b'''<html><head>
|
|||||||
_htmlsuffix = b'''</pre></body></html>'''
|
_htmlsuffix = b'''</pre></body></html>'''
|
||||||
|
|
||||||
def finalcleanup():
|
def finalcleanup():
|
||||||
global tmpdir
|
|
||||||
if tmpdir:
|
if tmpdir:
|
||||||
del tmpdir
|
vacuumdir(tmpdir)
|
||||||
tmpdir = None
|
os.rmdir(tmpdir)
|
||||||
|
|
||||||
ocrproc = None
|
|
||||||
def signal_handler(signal, frame):
|
def signal_handler(signal, frame):
|
||||||
global ocrproc
|
|
||||||
if ocrproc:
|
|
||||||
ocrproc.wait()
|
|
||||||
ocrproc = None
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
atexit.register(finalcleanup)
|
atexit.register(finalcleanup)
|
||||||
@ -92,6 +85,14 @@ except: pass
|
|||||||
try: signal.signal(signal.SIGTERM, signal_handler)
|
try: signal.signal(signal.SIGTERM, signal_handler)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
|
def vacuumdir(dir):
|
||||||
|
if dir:
|
||||||
|
for fn in os.listdir(dir):
|
||||||
|
path = os.path.join(dir, fn)
|
||||||
|
if os.path.isfile(path):
|
||||||
|
os.unlink(path)
|
||||||
|
return True
|
||||||
|
|
||||||
class PDFExtractor:
|
class PDFExtractor:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.currentindex = 0
|
self.currentindex = 0
|
||||||
@ -212,7 +213,7 @@ class PDFExtractor:
|
|||||||
# no big deal
|
# no big deal
|
||||||
return True
|
return True
|
||||||
try:
|
try:
|
||||||
tmpdir.vacuumdir()
|
vacuumdir(tmpdir)
|
||||||
# Note: the java version of pdftk sometimes/often fails
|
# Note: the java version of pdftk sometimes/often fails
|
||||||
# here with writing to stdout:
|
# here with writing to stdout:
|
||||||
# Error occurred during initialization of VM
|
# Error occurred during initialization of VM
|
||||||
@ -222,9 +223,9 @@ class PDFExtractor:
|
|||||||
# output, until we fix the error or preferably find a way
|
# output, until we fix the error or preferably find a way
|
||||||
# to do it with poppler...
|
# to do it with poppler...
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
[self.pdftk, self.filename, "unpack_files", "output", tmpdir.getpath()],
|
[self.pdftk, self.filename, "unpack_files", "output",
|
||||||
stdout=sys.stderr)
|
tmpdir], stdout=sys.stderr)
|
||||||
self.attachlist = sorted(os.listdir(tmpdir.getpath()))
|
self.attachlist = sorted(os.listdir(tmpdir))
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.em.rclog("extractAttach: failed: %s" % e)
|
self.em.rclog("extractAttach: failed: %s" % e)
|
||||||
@ -398,12 +399,11 @@ class PDFExtractor:
|
|||||||
def maybemaketmpdir(self):
|
def maybemaketmpdir(self):
|
||||||
global tmpdir
|
global tmpdir
|
||||||
if tmpdir:
|
if tmpdir:
|
||||||
if not tmpdir.vacuumdir():
|
if not vacuumdir(tmpdir):
|
||||||
self.em.rclog("openfile: vacuumdir %s failed" % tmpdir.getpath())
|
self.em.rclog("openfile: vacuumdir %s failed" % tmpdir)
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
tmpdir = rclexecm.SafeTmpDir("rclpdf", self.em)
|
tmpdir = tempfile.mkdtemp(prefix='rclmpdf')
|
||||||
#self.em.rclog("Using temporary directory %s" % tmpdir.getpath())
|
|
||||||
if self.pdftk and re.match("/snap/", self.pdftk):
|
if self.pdftk and re.match("/snap/", self.pdftk):
|
||||||
# We know this is Unix (Ubuntu actually). Check that tmpdir
|
# We know this is Unix (Ubuntu actually). Check that tmpdir
|
||||||
# belongs to the user as snap commands can't use /tmp to share
|
# belongs to the user as snap commands can't use /tmp to share
|
||||||
@ -415,7 +415,9 @@ class PDFExtractor:
|
|||||||
if st.st_uid == os.getuid():
|
if st.st_uid == os.getuid():
|
||||||
ok = True
|
ok = True
|
||||||
if not ok:
|
if not ok:
|
||||||
self.em.rclog("pdftk is a snap command and needs TMPDIR to be owned by you")
|
self.em.rclog(
|
||||||
|
"pdftk is a snap command and needs TMPDIR to be "
|
||||||
|
"a directory you own")
|
||||||
|
|
||||||
def _process_annotations(self, html):
|
def _process_annotations(self, html):
|
||||||
doc = Poppler.Document.new_from_file(
|
doc = Poppler.Document.new_from_file(
|
||||||
@ -489,11 +491,9 @@ class PDFExtractor:
|
|||||||
s = self.config.getConfParam("pdfocr")
|
s = self.config.getConfParam("pdfocr")
|
||||||
if rclexecm.configparamtrue(s):
|
if rclexecm.configparamtrue(s):
|
||||||
try:
|
try:
|
||||||
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"), self.filename]
|
cmd = [sys.executable, os.path.join(_execdir, "rclocr.py"),
|
||||||
global ocrproc
|
self.filename]
|
||||||
ocrproc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
|
data = subprocess.check_output(cmd)
|
||||||
data, stderr = ocrproc.communicate()
|
|
||||||
ocrproc = None
|
|
||||||
html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
|
html = _htmlprefix + rclexecm.htmlescape(data) + _htmlsuffix
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.em.rclog("%s failed: %s" % (cmd, e))
|
self.em.rclog("%s failed: %s" % (cmd, e))
|
||||||
@ -520,9 +520,7 @@ class PDFExtractor:
|
|||||||
if not self.attextractdone:
|
if not self.attextractdone:
|
||||||
if not self.extractAttach():
|
if not self.extractAttach():
|
||||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
if type(ipath) != type(""):
|
path = os.path.join(tmpdir, ipath)
|
||||||
ipath = ipath.decode('utf-8')
|
|
||||||
path = os.path.join(tmpdir.getpath(), ipath)
|
|
||||||
if os.path.isfile(path):
|
if os.path.isfile(path):
|
||||||
f = open(path, "rb")
|
f = open(path, "rb")
|
||||||
docdata = f.read();
|
docdata = f.read();
|
||||||
|
|||||||
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
# Recoll PPT text extractor
|
# Recoll PPT text extractor
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclexec1
|
import rclexec1
|
||||||
import re
|
import re
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/python3
|
||||||
#################################
|
#################################
|
||||||
# Copyright (C) 2019 J.F.Dockes
|
# Copyright (C) 2019 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
@ -28,14 +28,12 @@ import os
|
|||||||
import pathlib
|
import pathlib
|
||||||
import email.parser
|
import email.parser
|
||||||
import email.policy
|
import email.policy
|
||||||
import email.message
|
|
||||||
import mailbox
|
import mailbox
|
||||||
import subprocess
|
import subprocess
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclconfig
|
import rclconfig
|
||||||
import conftree
|
import conftree
|
||||||
import base64
|
import base64
|
||||||
import traceback
|
|
||||||
|
|
||||||
_mswindows = (sys.platform == "win32" or sys.platform == "msys")
|
_mswindows = (sys.platform == "win32" or sys.platform == "msys")
|
||||||
if _mswindows:
|
if _mswindows:
|
||||||
@ -97,26 +95,14 @@ class EmailBuilder(object):
|
|||||||
newmsg = email.message.EmailMessage(policy=email.policy.default)
|
newmsg = email.message.EmailMessage(policy=email.policy.default)
|
||||||
headerstr = self.headers.decode("UTF-8", errors='replace')
|
headerstr = self.headers.decode("UTF-8", errors='replace')
|
||||||
# print("%s" % headerstr)
|
# print("%s" % headerstr)
|
||||||
try:
|
headers = self.parser.parsestr(headerstr, headersonly=True)
|
||||||
headers = self.parser.parsestr(headerstr, headersonly=True)
|
|
||||||
except:
|
|
||||||
# This sometimes fails, for example with 'day is out of range for month'. Try to go on
|
|
||||||
# without headers
|
|
||||||
headers = email.message.EmailMessage()
|
|
||||||
|
|
||||||
#self.log("EmailBuilder: content-type %s" % headers['content-type'])
|
#self.log("EmailBuilder: content-type %s" % headers['content-type'])
|
||||||
for nm in ('from', 'subject', 'date'):
|
for nm in ('from', 'subject'):
|
||||||
if nm in headers:
|
if nm in headers:
|
||||||
try:
|
newmsg.add_header(nm, headers[nm])
|
||||||
newmsg.add_header(nm, headers[nm])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for h in ('to', 'cc'):
|
for h in ('to', 'cc'):
|
||||||
try:
|
tolist = headers.get_all(h)
|
||||||
tolist = headers.get_all(h)
|
|
||||||
except:
|
|
||||||
tolist = []
|
|
||||||
if not tolist:
|
if not tolist:
|
||||||
continue
|
continue
|
||||||
alldests = ""
|
alldests = ""
|
||||||
@ -127,10 +113,7 @@ class EmailBuilder(object):
|
|||||||
alldests += sd + ", "
|
alldests += sd + ", "
|
||||||
if alldests:
|
if alldests:
|
||||||
alldests = alldests.rstrip(", ")
|
alldests = alldests.rstrip(", ")
|
||||||
try:
|
newmsg.add_header(h, alldests)
|
||||||
newmsg.add_header(h, alldests)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Decoding the body: the .pst contains the text value decoded from qp
|
# Decoding the body: the .pst contains the text value decoded from qp
|
||||||
# or base64 (at least that's what libpff sends). Unfortunately, it
|
# or base64 (at least that's what libpff sends). Unfortunately, it
|
||||||
@ -152,13 +135,8 @@ class EmailBuilder(object):
|
|||||||
charset = headers.get_content_charset()
|
charset = headers.get_content_charset()
|
||||||
body = ''
|
body = ''
|
||||||
if charset:
|
if charset:
|
||||||
if charset == 'unicode':
|
body = self.body.decode(charset, errors='replace')
|
||||||
charset = 'utf-16'
|
#self.log("DECODE FROM HEADER CHARSET %s SUCCEEDED"% charset)
|
||||||
try:
|
|
||||||
body = self.body.decode(charset, errors='replace')
|
|
||||||
#self.log("DECODE FROM HEADER CHARSET %s SUCCEEDED"% charset)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
body = self.body.decode('utf-8')
|
body = self.body.decode('utf-8')
|
||||||
@ -399,7 +377,6 @@ class PstExtractor(object):
|
|||||||
return(False, "", "", rclexecm.RclExecM.eofnow)
|
return(False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
self.em.rclog("getnext: exception: %s" % ex)
|
self.em.rclog("getnext: exception: %s" % ex)
|
||||||
traceback.print_exc()
|
|
||||||
return(False, "", "", rclexecm.RclExecM.eofnow)
|
return(False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
return (True, doc, ipath, rclexecm.RclExecM.noteof)
|
return (True, doc, ipath, rclexecm.RclExecM.noteof)
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
# Rclpython is based on "colorize.py" from:
|
# Rclpython is based on "colorize.py" from:
|
||||||
# http://chrisarndt.de/en/software/python/colorize.html
|
# http://chrisarndt.de/en/software/python/colorize.html
|
||||||
@ -51,12 +51,6 @@ _css_classes = {
|
|||||||
_TEXT: 'text',
|
_TEXT: 'text',
|
||||||
}
|
}
|
||||||
|
|
||||||
# python3.8 token.py sends an ENCODING token which we ignore
|
|
||||||
try:
|
|
||||||
token_encoding_type = token.ENCODING
|
|
||||||
except:
|
|
||||||
token_encoding_type = 62
|
|
||||||
|
|
||||||
_HTML_HEADER = """\
|
_HTML_HEADER = """\
|
||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||||
"http://www.w3.org/TR/html4/loose.dtd">
|
"http://www.w3.org/TR/html4/loose.dtd">
|
||||||
@ -152,21 +146,17 @@ class Parser:
|
|||||||
def __call__(self, toktype, toktext, startpos, endpos, line):
|
def __call__(self, toktype, toktext, startpos, endpos, line):
|
||||||
""" Token handler.
|
""" Token handler.
|
||||||
"""
|
"""
|
||||||
srow, scol = startpos
|
|
||||||
erow, ecol = endpos
|
|
||||||
if 0:
|
if 0:
|
||||||
print("type %s %s text %s start %s %s end %s %s<br>\n" % \
|
print("type %s %s text %s start %s %s end %s %s<br>\n" % \
|
||||||
(toktype, token.tok_name[toktype], toktext, \
|
(toktype, token.tok_name[toktype], toktext, \
|
||||||
srow, scol,erow,ecol), file=sys.stderr)
|
srow, scol,erow,ecol))
|
||||||
|
srow, scol = startpos
|
||||||
|
erow, ecol = endpos
|
||||||
# calculate new positions
|
# calculate new positions
|
||||||
oldpos = self.pos
|
oldpos = self.pos
|
||||||
newpos = self.lines[srow] + scol
|
newpos = self.lines[srow] + scol
|
||||||
self.pos = newpos + len(toktext)
|
self.pos = newpos + len(toktext)
|
||||||
|
|
||||||
if toktype == token_encoding_type:
|
|
||||||
return
|
|
||||||
|
|
||||||
# handle newlines
|
# handle newlines
|
||||||
if toktype in [token.NEWLINE, tokenize.NL]:
|
if toktype in [token.NEWLINE, tokenize.NL]:
|
||||||
self.out.write(b'\n')
|
self.out.write(b'\n')
|
||||||
|
|||||||
@ -18,6 +18,8 @@
|
|||||||
# Free Software Foundation, Inc.,
|
# Free Software Foundation, Inc.,
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import os
|
import os
|
||||||
@ -58,7 +60,7 @@ except Exception as ex:
|
|||||||
# (https://www.rarlab.com/rar_add.htm). The unrar-free version fails
|
# (https://www.rarlab.com/rar_add.htm). The unrar-free version fails
|
||||||
# with the message "Failed the read enough data"
|
# with the message "Failed the read enough data"
|
||||||
#
|
#
|
||||||
# This is identical to rclzip.py except I did a search/replace from zip
|
# This is identical to rclzip except I did a search/replace from zip
|
||||||
# to rar, and changed this comment.
|
# to rar, and changed this comment.
|
||||||
class RarExtractor:
|
class RarExtractor:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
@ -1,4 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import rclexec1
|
import rclexec1
|
||||||
|
|||||||
@ -2,10 +2,12 @@
|
|||||||
|
|
||||||
# Tar-file filter for Recoll
|
# Tar-file filter for Recoll
|
||||||
# Thanks to Recoll user Martin Ziegler
|
# Thanks to Recoll user Martin Ziegler
|
||||||
# This is a modified version of /usr/share/recoll/filters/rclzip.py
|
# This is a modified version of /usr/share/recoll/filters/rclzip
|
||||||
# It works not only for tar-files, but automatically for gzipped and
|
# It works not only for tar-files, but automatically for gzipped and
|
||||||
# bzipped tar-files at well.
|
# bzipped tar-files at well.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -18,6 +18,8 @@
|
|||||||
# Wrapping a text file. Recoll does it internally in most cases, but
|
# Wrapping a text file. Recoll does it internally in most cases, but
|
||||||
# this is for use by another filter.
|
# this is for use by another filter.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import sys
|
import sys
|
||||||
from rclbasehandler import RclBaseHandler
|
from rclbasehandler import RclBaseHandler
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
"""Index text lines as document (execm handler sample). This exists
|
"""Index text lines as document (execm handler sample). This exists
|
||||||
to demonstrate the execm interface and is not meant to be useful or
|
to demonstrate the execm interface and is not meant to be useful or
|
||||||
efficient"""
|
efficient"""
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
# No shebang: this is only used on Windows. We use a shell script on Linux
|
# No shebang: this is only used on Windows. We use a shell script on Linux
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import sys
|
import sys
|
||||||
|
|||||||
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
# WAR web archive filter for recoll. War file are gzipped tar files
|
# WAR web archive filter for recoll. War file are gzipped tar files
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
@ -16,6 +16,7 @@
|
|||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
# Code to extract XMP tags using libexempi and python-xmp
|
# Code to extract XMP tags using libexempi and python-xmp
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
can_xmp = True
|
can_xmp = True
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -18,11 +18,11 @@
|
|||||||
|
|
||||||
# Zip file extractor for Recoll
|
# Zip file extractor for Recoll
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import posixpath
|
import posixpath
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import datetime
|
|
||||||
|
|
||||||
import rclexecm
|
import rclexecm
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
@ -49,7 +49,7 @@ if not hasrclconfig:
|
|||||||
# and stores it in the catalog as an unicode object. Else it uses the
|
# and stores it in the catalog as an unicode object. Else it uses the
|
||||||
# binary string, which it decodes as CP437 (zip standard).
|
# binary string, which it decodes as CP437 (zip standard).
|
||||||
#
|
#
|
||||||
# When reading the file, the input file name is used by rclzip.py
|
# When reading the file, the input file name is used by rclzip
|
||||||
# directly as an index into the catalog.
|
# directly as an index into the catalog.
|
||||||
#
|
#
|
||||||
# When we send the file name data to the indexer, we have to serialize
|
# When we send the file name data to the indexer, we have to serialize
|
||||||
@ -119,8 +119,6 @@ class ZipExtractor:
|
|||||||
# element).
|
# element).
|
||||||
filename = posixpath.basename(ipath)
|
filename = posixpath.basename(ipath)
|
||||||
self.em.setfield("filename", filename)
|
self.em.setfield("filename", filename)
|
||||||
dt = datetime.datetime(*info.date_time)
|
|
||||||
self.em.setfield("modificationdate", str(int(dt.timestamp())))
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
ok = True
|
ok = True
|
||||||
@ -153,11 +151,14 @@ class ZipExtractor:
|
|||||||
if skipped is not None:
|
if skipped is not None:
|
||||||
self.skiplist += conftree.stringToStrings(skipped)
|
self.skiplist += conftree.stringToStrings(skipped)
|
||||||
try:
|
try:
|
||||||
# Note: py3 ZipFile wants an str file name, which
|
if rclexecm.PY3:
|
||||||
# is wrong: file names are binary. But it accepts an
|
# Note: py3 ZipFile wants an str file name, which
|
||||||
# open file, and open() has no such restriction
|
# is wrong: file names are binary. But it accepts an
|
||||||
self.f = open(filename, 'rb')
|
# open file, and open() has no such restriction
|
||||||
self.zip = ZipFile(self.f)
|
self.f = open(filename, 'rb')
|
||||||
|
self.zip = ZipFile(self.f)
|
||||||
|
else:
|
||||||
|
self.zip = ZipFile(filename)
|
||||||
return True
|
return True
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
self.em.rclog("openfile: failed: [%s]" % err)
|
self.em.rclog("openfile: failed: [%s]" % err)
|
||||||
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/env python3
|
||||||
# Copyright (C) 2017-2022 J.F.Dockes
|
# Copyright (C) 2017 J.F.Dockes
|
||||||
# This program is free software; you can redistribute it and/or modify
|
# This program is free software; you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
# the Free Software Foundation; either version 2 of the License, or
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -31,7 +31,6 @@ but it can also be run by hand.
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import getopt
|
|
||||||
try:
|
try:
|
||||||
from hashlib import md5 as md5
|
from hashlib import md5 as md5
|
||||||
except:
|
except:
|
||||||
@ -95,44 +94,28 @@ def list_all_files(dir):
|
|||||||
return mfiles,cfiles
|
return mfiles,cfiles
|
||||||
|
|
||||||
#######################
|
#######################
|
||||||
def msg(s):
|
|
||||||
print(f"{s}", file=sys.stderr)
|
|
||||||
def usage():
|
def usage():
|
||||||
msg("Usage: recoll-we-move-files.py [-c <recollconfigdir>]")
|
print("Usage: recoll-we-move-files.py [<downloaddir>]", file=sys.stderr)
|
||||||
msg(" The script needs the recoll configuration directory. This can be set either through")
|
|
||||||
msg(" the RECOLL_CONFDIR environment variable or the '-c' command line option (which takes")
|
|
||||||
msg(" precedence). If none is set, the default configuration directory will be used.")
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
config = rclconfig.RclConfig()
|
||||||
|
|
||||||
opts, args = getopt.getopt(sys.argv[1:], "c:")
|
# Source dir is parameter, else from config else default Downloads directory
|
||||||
if not len(args) == 0:
|
|
||||||
usage()
|
|
||||||
|
|
||||||
configdir = None
|
|
||||||
for opt,val in opts:
|
|
||||||
#logdeb(f"opt {opt} val {val}")
|
|
||||||
if opt == "-c":
|
|
||||||
configdir = val
|
|
||||||
if not os.path.isdir(val):
|
|
||||||
msg(f"{val} is not a directory")
|
|
||||||
usage()
|
|
||||||
else:
|
|
||||||
usage()
|
|
||||||
|
|
||||||
config = rclconfig.RclConfig(argcnf=configdir)
|
|
||||||
|
|
||||||
# Get the directory where the browser extension creates the page files. Our user can set it as a
|
|
||||||
# subdirectory of the default Downloads directory, for tidyness
|
|
||||||
downloadsdir = config.getConfParam("webdownloadsdir")
|
downloadsdir = config.getConfParam("webdownloadsdir")
|
||||||
if not downloadsdir:
|
if not downloadsdir:
|
||||||
downloadsdir = "~/Downloads"
|
downloadsdir = "~/Downloads"
|
||||||
downloadsdir = os.path.expanduser(downloadsdir)
|
downloadsdir = os.path.expanduser(downloadsdir)
|
||||||
if not os.path.isdir(downloadsdir):
|
|
||||||
msg(f"Downloads directory {downloadsdir} does not exist")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Get the target recoll webqueue directory, into which we are going to move the downloaded files.
|
if len(sys.argv) == 2:
|
||||||
|
mydir = sys.argv[1]
|
||||||
|
elif len(sys.argv) == 1:
|
||||||
|
mydir = downloadsdir
|
||||||
|
else:
|
||||||
|
usage()
|
||||||
|
if not os.path.isdir(mydir):
|
||||||
|
usage()
|
||||||
|
|
||||||
|
# Get target webqueue recoll directory from recoll configuration
|
||||||
webqueuedir = config.getConfParam("webqueuedir")
|
webqueuedir = config.getConfParam("webqueuedir")
|
||||||
if not webqueuedir:
|
if not webqueuedir:
|
||||||
if _mswindows:
|
if _mswindows:
|
||||||
@ -142,11 +125,10 @@ if not webqueuedir:
|
|||||||
webqueuedir = os.path.expanduser(webqueuedir)
|
webqueuedir = os.path.expanduser(webqueuedir)
|
||||||
os.makedirs(webqueuedir, exist_ok = True)
|
os.makedirs(webqueuedir, exist_ok = True)
|
||||||
|
|
||||||
|
# logdeb("webqueuedir is %s" % webqueuedir)
|
||||||
#logdeb(f"recoll confdir [{configdir}] downloadsdir [{downloadsdir}] webqueuedir [{webqueuedir}]")
|
|
||||||
|
|
||||||
# Get the lists of all files created by the browser addon
|
# Get the lists of all files created by the browser addon
|
||||||
mfiles, cfiles = list_all_files(downloadsdir)
|
mfiles, cfiles = list_all_files(mydir)
|
||||||
|
|
||||||
# Only keep the last version
|
# Only keep the last version
|
||||||
mfiles = delete_previous_instances(mfiles, downloadsdir)
|
mfiles = delete_previous_instances(mfiles, downloadsdir)
|
||||||
@ -161,7 +143,7 @@ cfiles = delete_previous_instances(cfiles, downloadsdir)
|
|||||||
# The old plugin created the data first, so we move data then meta
|
# The old plugin created the data first, so we move data then meta
|
||||||
for hash in cfiles.keys():
|
for hash in cfiles.keys():
|
||||||
if hash in mfiles.keys():
|
if hash in mfiles.keys():
|
||||||
newname = "firefox-recoll-web-" + hash
|
newname = "firefox-recoll-web-"+hash
|
||||||
shutil.move(os.path.join(downloadsdir, cfiles[hash]),
|
shutil.move(os.path.join(downloadsdir, cfiles[hash]),
|
||||||
os.path.join(webqueuedir, newname))
|
os.path.join(webqueuedir, newname))
|
||||||
shutil.move(os.path.join(downloadsdir, mfiles[hash]),
|
shutil.move(os.path.join(downloadsdir, mfiles[hash]),
|
||||||
|
|||||||
@ -23,6 +23,8 @@
|
|||||||
# the minimum version supported.
|
# the minimum version supported.
|
||||||
|
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import xml.sax
|
import xml.sax
|
||||||
|
|
||||||
|
|||||||
@ -61,7 +61,8 @@ public:
|
|||||||
EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m)
|
EXEDocFetcher::EXEDocFetcher(const EXEDocFetcher::Internal& _m)
|
||||||
{
|
{
|
||||||
m = new Internal(_m);
|
m = new Internal(_m);
|
||||||
LOGDEB("EXEDocFetcher::EXEDocFetcher: fetch is " << stringsToString(m->sfetch) << "\n");
|
LOGDEB("EXEDocFetcher::EXEDocFetcher: fetch is " <<
|
||||||
|
stringsToString(m->sfetch) << "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool EXEDocFetcher::fetch(RclConfig*, const Rcl::Doc& idoc, RawDoc& out)
|
bool EXEDocFetcher::fetch(RclConfig*, const Rcl::Doc& idoc, RawDoc& out)
|
||||||
@ -76,7 +77,8 @@ bool EXEDocFetcher::makesig(RclConfig*, const Rcl::Doc& idoc, string& sig)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Lookup bckid in the config and create an appropriate fetcher.
|
// Lookup bckid in the config and create an appropriate fetcher.
|
||||||
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config, const string& bckid)
|
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config,
|
||||||
|
const string& bckid)
|
||||||
{
|
{
|
||||||
// The config we only read once, not gonna change.
|
// The config we only read once, not gonna change.
|
||||||
static ConfSimple *bconf;
|
static ConfSimple *bconf;
|
||||||
|
|||||||
@ -40,8 +40,6 @@ public:
|
|||||||
class Internal;
|
class Internal;
|
||||||
EXEDocFetcher(const Internal&);
|
EXEDocFetcher(const Internal&);
|
||||||
virtual ~EXEDocFetcher() {}
|
virtual ~EXEDocFetcher() {}
|
||||||
EXEDocFetcher(const EXEDocFetcher&) = delete;
|
|
||||||
EXEDocFetcher& operator=(const EXEDocFetcher&) = delete;
|
|
||||||
|
|
||||||
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
||||||
/** Calls stat to retrieve file signature data */
|
/** Calls stat to retrieve file signature data */
|
||||||
@ -53,6 +51,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Lookup bckid in the config and create an appropriate fetcher.
|
// Lookup bckid in the config and create an appropriate fetcher.
|
||||||
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config, const std::string& bckid);
|
std::unique_ptr<EXEDocFetcher> exeDocFetcherMake(RclConfig *config,
|
||||||
|
const std::string& bckid);
|
||||||
|
|
||||||
#endif /* _EXEFETCHER_H_INCLUDED_ */
|
#endif /* _EXEFETCHER_H_INCLUDED_ */
|
||||||
|
|||||||
@ -72,18 +72,18 @@ public:
|
|||||||
* @param idoc the data gathered from the index for this doc (udi/ipath)
|
* @param idoc the data gathered from the index for this doc (udi/ipath)
|
||||||
* @param sig output.
|
* @param sig output.
|
||||||
*/
|
*/
|
||||||
virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc, std::string& sig) = 0;
|
virtual bool makesig(RclConfig* cnf, const Rcl::Doc& idoc,
|
||||||
|
std::string& sig) = 0;
|
||||||
enum Reason{FetchOk, FetchNotExist, FetchNoPerm, FetchOther};
|
enum Reason{FetchOk, FetchNotExist, FetchNoPerm, FetchOther};
|
||||||
virtual Reason testAccess(RclConfig*, const Rcl::Doc&) {
|
virtual Reason testAccess(RclConfig*, const Rcl::Doc&) {
|
||||||
return FetchOther;
|
return FetchOther;
|
||||||
}
|
}
|
||||||
DocFetcher() {}
|
|
||||||
virtual ~DocFetcher() {}
|
virtual ~DocFetcher() {}
|
||||||
DocFetcher(const DocFetcher&) = delete;
|
|
||||||
DocFetcher& operator=(const DocFetcher&) = delete;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Return an appropriate fetcher object given the backend string identifier inside idoc*/
|
/** Return an appropriate fetcher object given the backend string
|
||||||
std::unique_ptr<DocFetcher> docFetcherMake(RclConfig *config, const Rcl::Doc& idoc);
|
* identifier inside idoc*/
|
||||||
|
std::unique_ptr<DocFetcher> docFetcherMake(RclConfig *config,
|
||||||
|
const Rcl::Doc& idoc);
|
||||||
|
|
||||||
#endif /* _FETCHER_H_INCLUDED_ */
|
#endif /* _FETCHER_H_INCLUDED_ */
|
||||||
|
|||||||
@ -23,18 +23,14 @@
|
|||||||
/**
|
/**
|
||||||
* The file-system fetcher:
|
* The file-system fetcher:
|
||||||
*/
|
*/
|
||||||
class FSDocFetcher : public DocFetcher {
|
class FSDocFetcher : public DocFetcher{
|
||||||
public:
|
|
||||||
/** FSDocFetcher::fetch always returns a file name */
|
/** FSDocFetcher::fetch always returns a file name */
|
||||||
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
virtual bool fetch(RclConfig* cnf, const Rcl::Doc& idoc, RawDoc& out);
|
||||||
|
|
||||||
/** Calls stat to retrieve file signature data */
|
/** Calls stat to retrieve file signature data */
|
||||||
virtual bool makesig(RclConfig* cnf,const Rcl::Doc& idoc, std::string& sig);
|
virtual bool makesig(RclConfig* cnf,const Rcl::Doc& idoc, std::string& sig);
|
||||||
virtual DocFetcher::Reason testAccess(RclConfig* cnf, const Rcl::Doc& idoc);
|
virtual DocFetcher::Reason testAccess(RclConfig* cnf, const Rcl::Doc& idoc);
|
||||||
FSDocFetcher() {}
|
|
||||||
virtual ~FSDocFetcher() {}
|
virtual ~FSDocFetcher() {}
|
||||||
FSDocFetcher(const FSDocFetcher&) = delete;
|
|
||||||
FSDocFetcher& operator=(const FSDocFetcher&) = delete;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern void fsmakesig(const struct PathStat *stp, std::string& out);
|
extern void fsmakesig(const struct PathStat *stp, std::string& out);
|
||||||
|
|||||||
@ -195,7 +195,6 @@ bool FsIndexer::index(int flags)
|
|||||||
m_walker.setMaxDepth(2);
|
m_walker.setMaxDepth(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool walkok(true);
|
|
||||||
for (const auto& topdir : m_tdl) {
|
for (const auto& topdir : m_tdl) {
|
||||||
LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
|
LOGDEB("FsIndexer::index: Indexing " << topdir << " into " <<
|
||||||
getDbDir() << "\n");
|
getDbDir() << "\n");
|
||||||
@ -230,46 +229,29 @@ bool FsIndexer::index(int flags)
|
|||||||
if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
|
if (m_walker.walk(topdir, *this) != FsTreeWalker::FtwOk) {
|
||||||
LOGERR("FsIndexer::index: error while indexing " << topdir <<
|
LOGERR("FsIndexer::index: error while indexing " << topdir <<
|
||||||
": " << m_walker.getReason() << "\n");
|
": " << m_walker.getReason() << "\n");
|
||||||
// DO NOT return: we need to flush the queues before the Db can be closed !
|
return false;
|
||||||
walkok = false;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
shutdownQueues(walkok);
|
#ifdef IDX_THREADS
|
||||||
|
if (m_haveInternQ)
|
||||||
|
m_iwqueue.waitIdle();
|
||||||
|
if (m_haveSplitQ)
|
||||||
|
m_dwqueue.waitIdle();
|
||||||
|
m_db->waitUpdIdle();
|
||||||
|
#endif // IDX_THREADS
|
||||||
|
|
||||||
if (m_missing) {
|
if (m_missing) {
|
||||||
string missing;
|
string missing;
|
||||||
m_missing->getMissingDescription(missing);
|
m_missing->getMissingDescription(missing);
|
||||||
if (!missing.empty()) {
|
if (!missing.empty()) {
|
||||||
LOGINFO("FsIndexer::index missing helper program(s):\n" << missing << "\n");
|
LOGINFO("FsIndexer::index missing helper program(s):\n" <<
|
||||||
|
missing << "\n");
|
||||||
}
|
}
|
||||||
m_config->storeMissingHelperDesc(missing);
|
m_config->storeMissingHelperDesc(missing);
|
||||||
}
|
}
|
||||||
LOGINFO("fsindexer: status: " << walkok << " index time: " << chron.millis() << " mS\n");
|
LOGINFO("fsindexer index time: " << chron.millis() << " mS\n");
|
||||||
return walkok;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
void FsIndexer::shutdownQueues(bool ok)
|
|
||||||
{
|
|
||||||
#ifdef IDX_THREADS
|
|
||||||
if (!ok) {
|
|
||||||
// Error or more probably interrupt. Discard everything for fast shutdown
|
|
||||||
if (m_haveInternQ) {
|
|
||||||
m_iwqueue.closeShop();
|
|
||||||
}
|
|
||||||
if (m_haveSplitQ) {
|
|
||||||
m_dwqueue.closeShop();
|
|
||||||
}
|
|
||||||
m_db->closeQueue();
|
|
||||||
}
|
|
||||||
if (m_haveInternQ) {
|
|
||||||
m_iwqueue.waitIdle();
|
|
||||||
}
|
|
||||||
if (m_haveSplitQ) {
|
|
||||||
m_dwqueue.waitIdle();
|
|
||||||
}
|
|
||||||
m_db->waitUpdIdle();
|
|
||||||
#endif // IDX_THREADS
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool matchesSkipped(
|
static bool matchesSkipped(
|
||||||
@ -377,7 +359,7 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
|||||||
FsTreeWalker walker;
|
FsTreeWalker walker;
|
||||||
walker.setSkippedPaths(m_config->getSkippedPaths());
|
walker.setSkippedPaths(m_config->getSkippedPaths());
|
||||||
|
|
||||||
for (auto it = files.begin(); it != files.end(); ) {
|
for (list<string>::iterator it = files.begin(); it != files.end(); ) {
|
||||||
LOGDEB2("FsIndexer::indexFiles: [" << *it << "]\n");
|
LOGDEB2("FsIndexer::indexFiles: [" << *it << "]\n");
|
||||||
|
|
||||||
m_config->setKeyDir(path_getfather(*it));
|
m_config->setKeyDir(path_getfather(*it));
|
||||||
@ -421,14 +403,22 @@ bool FsIndexer::indexFiles(list<string>& files, int flags)
|
|||||||
|
|
||||||
ret = true;
|
ret = true;
|
||||||
out:
|
out:
|
||||||
shutdownQueues(ret);
|
#ifdef IDX_THREADS
|
||||||
|
if (m_haveInternQ)
|
||||||
|
m_iwqueue.waitIdle();
|
||||||
|
if (m_haveSplitQ)
|
||||||
|
m_dwqueue.waitIdle();
|
||||||
|
m_db->waitUpdIdle();
|
||||||
|
#endif // IDX_THREADS
|
||||||
|
|
||||||
// Purge possible orphan documents
|
// Purge possible orphan documents
|
||||||
if (ret == true) {
|
if (ret == true) {
|
||||||
LOGDEB("Indexfiles: purging orphans\n");
|
LOGDEB("Indexfiles: purging orphans\n");
|
||||||
for (const auto& udi : m_purgeCandidates.getCandidates()) {
|
const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
|
||||||
LOGDEB("Indexfiles: purging orphans for " << udi << "\n");
|
for (vector<string>::const_iterator it = purgecandidates.begin();
|
||||||
m_db->purgeOrphans(udi);
|
it != purgecandidates.end(); it++) {
|
||||||
|
LOGDEB("Indexfiles: purging orphans for " << *it << "\n");
|
||||||
|
m_db->purgeOrphans(*it);
|
||||||
}
|
}
|
||||||
#ifdef IDX_THREADS
|
#ifdef IDX_THREADS
|
||||||
m_db->waitUpdIdle();
|
m_db->waitUpdIdle();
|
||||||
@ -468,7 +458,13 @@ bool FsIndexer::purgeFiles(list<string>& files)
|
|||||||
|
|
||||||
ret = true;
|
ret = true;
|
||||||
out:
|
out:
|
||||||
shutdownQueues(ret);
|
#ifdef IDX_THREADS
|
||||||
|
if (m_haveInternQ)
|
||||||
|
m_iwqueue.waitIdle();
|
||||||
|
if (m_haveSplitQ)
|
||||||
|
m_dwqueue.waitIdle();
|
||||||
|
m_db->waitUpdIdle();
|
||||||
|
#endif // IDX_THREADS
|
||||||
LOGDEB("FsIndexer::purgeFiles: done\n");
|
LOGDEB("FsIndexer::purgeFiles: done\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -492,9 +488,10 @@ void FsIndexer::localfieldsfromconf()
|
|||||||
ConfSimple attrs;
|
ConfSimple attrs;
|
||||||
m_config->valueSplitAttributes(sfields, value, attrs);
|
m_config->valueSplitAttributes(sfields, value, attrs);
|
||||||
vector<string> nmlst = attrs.getNames(cstr_null);
|
vector<string> nmlst = attrs.getNames(cstr_null);
|
||||||
for (const auto& anm : nmlst) {
|
for (vector<string>::const_iterator it = nmlst.begin();
|
||||||
string nm = m_config->fieldCanon(anm);
|
it != nmlst.end(); it++) {
|
||||||
attrs.get(anm, m_localfields[nm]);
|
string nm = m_config->fieldCanon(*it);
|
||||||
|
attrs.get(*it, m_localfields[nm]);
|
||||||
LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
|
LOGDEB2("FsIndexer::localfieldsfromconf: [" << nm << "]->[" <<
|
||||||
m_localfields[nm] << "]\n");
|
m_localfields[nm] << "]\n");
|
||||||
}
|
}
|
||||||
@ -502,11 +499,12 @@ void FsIndexer::localfieldsfromconf()
|
|||||||
|
|
||||||
void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
|
void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
|
||||||
{
|
{
|
||||||
for (const auto& field : fields) {
|
for (map<string, string>::const_iterator it = fields.begin();
|
||||||
|
it != fields.end(); it++) {
|
||||||
// Being chosen by the user, localfields override values from
|
// Being chosen by the user, localfields override values from
|
||||||
// the filter. The key is already canonic (see
|
// the filter. The key is already canonic (see
|
||||||
// localfieldsfromconf())
|
// localfieldsfromconf())
|
||||||
doc.meta[field.first] = field.second;
|
doc.meta[it->first] = it->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -842,7 +840,9 @@ FsTreeWalker::Status FsIndexer::processonefile(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if defined(HAVE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
|
#if defined(HAVE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
|
||||||
// See framagit issue 26. This is off by default and controlled by a command line switch.
|
// See framagit issue 26. If this appears to be a good idea
|
||||||
|
// after all (not sure), we'll need a command line switch to
|
||||||
|
// control it. For now it's compile-time only.
|
||||||
if (m_cleancache) {
|
if (m_cleancache) {
|
||||||
int fd = open(fn.c_str(), O_RDONLY);
|
int fd = open(fn.c_str(), O_RDONLY);
|
||||||
if (fd >= 0) {
|
if (fd >= 0) {
|
||||||
|
|||||||
@ -27,6 +27,7 @@
|
|||||||
#endif // IDX_THREADS
|
#endif // IDX_THREADS
|
||||||
|
|
||||||
class FIMissingStore;
|
class FIMissingStore;
|
||||||
|
struct PathStat;
|
||||||
|
|
||||||
class DbUpdTask;
|
class DbUpdTask;
|
||||||
class InternfileTask;
|
class InternfileTask;
|
||||||
@ -54,8 +55,6 @@ public:
|
|||||||
*/
|
*/
|
||||||
FsIndexer(RclConfig *cnf, Rcl::Db *db);
|
FsIndexer(RclConfig *cnf, Rcl::Db *db);
|
||||||
virtual ~FsIndexer();
|
virtual ~FsIndexer();
|
||||||
FsIndexer(const FsIndexer&) = delete;
|
|
||||||
FsIndexer& operator=(const FsIndexer&) = delete;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Top level file system tree index method for updating a given database.
|
* Top level file system tree index method for updating a given database.
|
||||||
@ -158,7 +157,6 @@ private:
|
|||||||
processonefile(RclConfig *config, const string &fn,
|
processonefile(RclConfig *config, const string &fn,
|
||||||
const struct PathStat *,
|
const struct PathStat *,
|
||||||
const map<string,string>& localfields);
|
const map<string,string>& localfields);
|
||||||
void shutdownQueues(bool);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _fsindexer_h_included_ */
|
#endif /* _fsindexer_h_included_ */
|
||||||
|
|||||||
@ -57,8 +57,6 @@ class DbIxStatusUpdater {
|
|||||||
public:
|
public:
|
||||||
DbIxStatusUpdater(const RclConfig *config, bool nox11monitor);
|
DbIxStatusUpdater(const RclConfig *config, bool nox11monitor);
|
||||||
virtual ~DbIxStatusUpdater(){}
|
virtual ~DbIxStatusUpdater(){}
|
||||||
DbIxStatusUpdater(const DbIxStatusUpdater&) = delete;
|
|
||||||
DbIxStatusUpdater& operator=(const DbIxStatusUpdater&) = delete;
|
|
||||||
|
|
||||||
enum Incr {IncrNone, IncrDocsDone = 0x1, IncrFilesDone = 0x2, IncrFileErrors = 0x4};
|
enum Incr {IncrNone, IncrDocsDone = 0x1, IncrFilesDone = 0x2, IncrFileErrors = 0x4};
|
||||||
// Change phase/fn and update
|
// Change phase/fn and update
|
||||||
|
|||||||
@ -59,10 +59,12 @@ bool runWebFilesMoverScript(RclConfig *config)
|
|||||||
static string downloadsdir;
|
static string downloadsdir;
|
||||||
if (downloadsdir.empty()) {
|
if (downloadsdir.empty()) {
|
||||||
if (!config->getConfParam("webdownloadsdir", downloadsdir)) {
|
if (!config->getConfParam("webdownloadsdir", downloadsdir)) {
|
||||||
downloadsdir = "~/Downloads";
|
downloadsdir = path_tildexpand("~/Downloads");
|
||||||
}
|
}
|
||||||
downloadsdir = path_tildexpand(downloadsdir);
|
|
||||||
}
|
}
|
||||||
|
vector<string> cmdvec;
|
||||||
|
config->pythonCmd("recoll-we-move-files.py", cmdvec);
|
||||||
|
|
||||||
/* Arrange to not actually run the script if the directory did not change */
|
/* Arrange to not actually run the script if the directory did not change */
|
||||||
static time_t dirmtime;
|
static time_t dirmtime;
|
||||||
time_t ndirmtime = 0;
|
time_t ndirmtime = 0;
|
||||||
@ -70,17 +72,17 @@ bool runWebFilesMoverScript(RclConfig *config)
|
|||||||
if (path_fileprops(downloadsdir.c_str(), &st) == 0) {
|
if (path_fileprops(downloadsdir.c_str(), &st) == 0) {
|
||||||
ndirmtime = st.pst_mtime;
|
ndirmtime = st.pst_mtime;
|
||||||
}
|
}
|
||||||
// If stat fails, presumably Downloads does not exist or is not accessible, dirmtime and
|
/* If stat fails, presumably Downloads does not exist or is not
|
||||||
// mdirmtime stay at 0, and we never execute the script, which is the right thing.
|
accessible, dirmtime and mdirmtime stay at 0, and we never
|
||||||
|
execute the script, which is the right thing. */
|
||||||
if (dirmtime != ndirmtime) {
|
if (dirmtime != ndirmtime) {
|
||||||
// The script is going to change the directory, so updating dirmtime before it runs means
|
/* The script is going to change the directory, so updating
|
||||||
// that we are going to execute it one time too many (it will run without doing anything),
|
dirmtime before it runs means that we are going to execute
|
||||||
// but we can't set the mtime to after the run in case files are created during the run.
|
it one time too many (it will run without doing anything),
|
||||||
|
but we can't set the mtime to after the run in case files
|
||||||
|
are created during the run. */
|
||||||
dirmtime = ndirmtime;
|
dirmtime = ndirmtime;
|
||||||
vector<string> cmdvec;
|
|
||||||
config->pythonCmd("recoll-we-move-files.py", cmdvec);
|
|
||||||
ExecCmd cmd;
|
ExecCmd cmd;
|
||||||
cmd.putenv("RECOLL_CONFDIR", config->getConfDir());
|
|
||||||
int status = cmd.doexec1(cmdvec);
|
int status = cmd.doexec1(cmdvec);
|
||||||
return status == 0;
|
return status == 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user