diff --git a/packaging/debian/debian/changelog b/packaging/debian/debian/changelog index 1766cd84..17c1f1e4 100644 --- a/packaging/debian/debian/changelog +++ b/packaging/debian/debian/changelog @@ -1,5 +1,5 @@ -recoll (3153-1.1) experimental; urgency=low - +recoll (1.20.0-1.1) raring; urgency=low + * local package testing new release build -- Jean-Francois Dockes Sun, 30 Dec 2012 12:29:46 +0530 diff --git a/packaging/debian/debian/control b/packaging/debian/debian/control index eb6c23fc..129b2ba5 100644 --- a/packaging/debian/debian/control +++ b/packaging/debian/debian/control @@ -10,8 +10,9 @@ Build-Depends: autotools-dev, libx11-dev, libxapian-dev (>= 1.0.15), libz-dev, - python-all-dev (>= 2.6.6-3~) -Standards-Version: 3.9.3 + python-all-dev (>= 2.6.6-3~), + python3-all-dev, +Standards-Version: 3.9.4 X-Python-Version: >= 2.7 Homepage: http://www.lesbonscomptes.com/recoll Vcs-Git: git://anonscm.debian.org/collab-maint/recoll.git @@ -57,7 +58,7 @@ Description: Personal full text search package with a Qt GUI Package: python-recoll Architecture: any Section: python -Depends: recoll (>= ${source:Version}), +Depends: recoll (= ${binary:Version}), ${misc:Depends}, ${python:Depends}, ${shlibs:Depends} @@ -67,3 +68,17 @@ Description: Python extension for recoll . This package provides Python extension module for recoll which can be use to extend recoll such as an Ubuntu Unity Lens. + +Package: python3-recoll +Architecture: any +Section: python +Depends: recoll (= ${binary:Version}), + ${misc:Depends}, + ${python3:Depends}, + ${shlibs:Depends} +Description: Python extension for recoll + Personal full text search package which is based on a very strong backend + (Xapian), for which it provides an easy to use and feature-rich interface. + . + This package provides Python extension module for recoll which can be use to + extend recoll such as an Ubuntu Unity Lens. diff --git a/packaging/debian/debian/python-recoll.install b/packaging/debian/debian/python-recoll.install index ca40d507..ae42f11b 100644 --- a/packaging/debian/debian/python-recoll.install +++ b/packaging/debian/debian/python-recoll.install @@ -1,2 +1,2 @@ -usr/lib/python*/*-packages/*.egg-info -usr/lib/python*/*-packages/recoll/* +usr/lib/python2*/*-packages/*.egg-info +usr/lib/python2*/*-packages/recoll/* diff --git a/packaging/debian/debian/python3-recoll.install b/packaging/debian/debian/python3-recoll.install new file mode 100644 index 00000000..b0bb287d --- /dev/null +++ b/packaging/debian/debian/python3-recoll.install @@ -0,0 +1,2 @@ +usr/lib/python3*/*-packages/*.egg-info +usr/lib/python3*/*-packages/recoll/* diff --git a/packaging/debian/debian/rules b/packaging/debian/debian/rules index ea4a247a..64b39b6b 100755 --- a/packaging/debian/debian/rules +++ b/packaging/debian/debian/rules @@ -33,7 +33,7 @@ build-arch: build-stamp build-indep: build-stamp build-stamp: config.status dh_testdir - $(MAKE) + $(MAKE) -j 5 touch $@ clean: @@ -53,6 +53,9 @@ install: (cd python/recoll; python setup.py install \ --install-layout=deb \ --prefix=$(CURDIR)/debian/tmp/usr ) + (cd python/recoll; python3 setup.py install \ + --install-layout=deb \ + --prefix=$(CURDIR)/debian/tmp/usr ) binary-arch: build install dh_testdir @@ -63,7 +66,8 @@ binary-arch: build install dh_installman dh_install --sourcedir=debian/tmp dh_makeshlibs - dh_python2 + dh_python2 -p python-recoll + dh_python3 -p python3-recoll dh_link dh_strip dh_compress diff --git a/src/INSTALL b/src/INSTALL index 17fb5fd8..72961176 100644 --- a/src/INSTALL +++ b/src/INSTALL @@ -294,6 +294,15 @@ Chapter 5. Installation and configuration to manually copy and modify one of the existing files (the new file name should be the output of uname -s). + 5.3.2.1. Building on Solaris + + We did not test building the GUI on Solaris for recent versions. You will + need at least Qt 4.4. There are some hints on an old web site page, they + may still be valid. + + Someone did test the 1.19 indexer and Python module build, they do work, + with a few minor glitches. Be sure to use GNU make and install. + 5.3.3. Installation Either type make install or execute recollinstall prefix, in the root of @@ -342,12 +351,25 @@ Chapter 5. Installation and configuration by comments inside the default files, and we will just give a general overview here. - For each index, there are two sets of configuration files. System-wide - configuration files are kept in a directory named like + By default, for each index, there are two sets of configuration files. + System-wide configuration files are kept in a directory named like /usr/[local/]share/recoll/examples, and define default values, shared by all indexes. For each index, a parallel set of files defines the customized parameters. + In addition (as of Recoll version 1.19.7), it is possible to specify two + additional configuration directories which will be stacked before and + after the user configuration directory. These are defined by the + RECOLL_CONFTOP and RECOLL_CONFMID environment variables. Values from + configuration files inside the top directory will override user ones, + values from configuration files inside the middle directory will override + system ones and be overriden by user ones. These two variables may be of + use to applications which augment Recoll functionality, and need to add + configuration data without disturbing the user's files. Please note that + the two, currently single, values will probably be interpreted as + colon-separated lists in the future: do not use colon characters inside + the directory paths. + The default location of the configuration is the .recoll directory in your home. Most people will only use this directory. @@ -411,7 +433,7 @@ Chapter 5. Installation and configuration text files with appropriate encodings, and concatenate them to create the complete configuration. - 5.4.1. Main configuration file + 5.4.1. The main configuration file, recoll.conf recoll.conf is the main configuration file. It defines things like what to index (top directories and things to ignore), and the default character @@ -437,7 +459,7 @@ Chapter 5. Installation and configuration skippedNames - A space-separated list of patterns for names of files or + A space-separated list of wilcard patterns for names of files or directories that should be completely ignored. The list defined in the default file is: @@ -488,6 +510,16 @@ Chapter 5. Installation and configuration can set skippedPathsFnmPathname to 0 to disable the use of FNM_PATHNAME (meaning that /*/dir3 will match /dir1/dir2/dir3). + zipSkippedNames + + A space-separated list of patterns for names of files or + directories that should be ignored inside zip archives. This is + used directly by the zip filter, and has a function similar to + skippedNames, but works independantly. Can be redefined for + filesystem subdirectories. For versions up to 1.19, you will need + to update the Zip filter and install a supplementary Python + module. The details are described on the Recoll wiki. + followLinks Specifies if the indexer should follow symbolic links while @@ -679,17 +711,41 @@ Chapter 5. Installation and configuration = val, then select specifier viewer with mimetype|tag=... in mimeview. + noxattrfields + + Recoll versions 1.19 and later automatically translate file + extended attributes into document fields (to be processed + according to the parameters from the fields file). Setting this + variable to 1 will disable the behaviour. + metadatacmds This allows executing external commands for each file and storing - the output in a Recoll field. This could be used for example to - index external tag data. The value is a list of field names and - commands, don't forget an initial semi-colon. Example: + the output in Recoll document fields. This could be used for + example to index external tag data. The value is a list of field + names and commands, don't forget an initial semi-colon. Example: [/some/area/of/the/fs] metadatacmds = ; tags = tmsu tags %f; otherfield = somecmd -xx %f + As a specially disgusting hack brought by Recoll 1.19.7, if a + "field name" begins with rclmulti, the data returned by the + command is expected to contain multiple field values, in + configuration file format. This allows setting several fields by + executing a single command. Example: + + metadatacmds = ; rclmulti1 = somecmd %f + + + If somecmd returns data in the form of: + + field1 = value1 + field2 = value for field2 + + + field1 and field2 will be set inside the document metadata. + 5.4.1.3. Parameters affecting where and how we store things: dbdir @@ -746,7 +802,7 @@ Chapter 5. Installation and configuration memory, you can try higher values between 20 and 80. In my experience, values beyond 100 are always counterproductive. - 5.4.1.4. Indexing parallelism configuration + 5.4.1.4. Parameters affecting multithread processing The Recoll indexing process recollindex can use multiple threads to speed up indexing on multiprocessor systems. The work done to index files is @@ -774,7 +830,7 @@ Chapter 5. Installation and configuration stage. In practise, deep queues have not been shown to increase performance. A value of 0 for the first queue tells Recoll to perform autoconfiguration (no need for the two other values in - this case)- this is the default configuration. + this case) - this is the default configuration. thrTCounts @@ -804,6 +860,11 @@ Chapter 5. Installation and configuration thrQSizes = 2 -1 -1 thrTCounts = 6 1 1 + The following example would disable multithreading. Indexing will be + performed by a single thread. + + thrQSizes = -1 -1 -1 + 5.4.1.5. Miscellaneous parameters: autodiacsens diff --git a/src/Makefile.in b/src/Makefile.in index e50b3292..e608f3fe 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -42,9 +42,9 @@ clean: ${MAKE} -C common clean ${MAKE} -C index clean ${MAKE} -C internfile clean - ${MAKE} -C lib clean ${MAKE} -C query clean ${MAKE} -C utils clean + -${MAKE} -C lib clean -${MAKE} -C desktop/unity-lens-recoll clean -${MAKE} -C python/recoll clean @NOQTMAKE@@-${MAKE} -C $(QTGUI) clean diff --git a/src/README b/src/README index a4740c8b..41e907d3 100644 --- a/src/README +++ b/src/README @@ -174,7 +174,7 @@ More documentation can be found in the doc/ directory or at http://www.recoll.or 5.4. Configuration overview - 5.4.1. Main configuration file + 5.4.1. The main configuration file, recoll.conf 5.4.2. The fields file @@ -416,11 +416,11 @@ Chapter 2. Indexing to be indexed. In the latter case, any type not in the list will be ignored. - Excluding types can be done by adding name patterns to the skippedNames - list, which can be done from the GUI Index configuration menu. It is also - possible to exclude a mime type independantly of the file name by - associating it with the rclnull filter. This can be done by editing the - mimeconf configuration file. + Excluding types can be done by adding wildcard name patterns to the + skippedNames list, which can be done from the GUI Index configuration + menu. It is also possible to exclude a mime type independantly of the file + name by associating it with the rclnull filter. This can be done by + editing the mimeconf configuration file. In order to define a positive list, You need to edit the main configuration file (recoll.conf) and set the indexedmimetypes @@ -627,6 +627,11 @@ Chapter 2. Indexing probably slightly slower, and the feature is still young, so that a certain amount of weirdness cannot be excluded. + One of the most adverse consequence of using a raw index is that some + phrase and proximity searches may become impossible: because each term + needs to be expanded, and all combinations searched for, the + multiplicative expansion may become unmanageable. + 2.3.3. The index configuration GUI Most parameters for a given index configuration can be set from a recoll @@ -860,6 +865,24 @@ Chapter 2. Indexing it if your system is short on resources. Periodic indexing is adequate in most cases. + Increasing resources for inotify + + On Linux systems, monitoring a big tree may imply increasing the resources + available to inotify, which are normally defined in /etc/sysctl.conf. + + ### inotify + # + # cat /proc/sys/fs/inotify/max_queued_events - 16384 + # cat /proc/sys/fs/inotify/max_user_instances - 128 + # cat /proc/sys/fs/inotify/max_user_watches - 16384 + # + # -- Change to: + # + fs.inotify.max_queued_events=32768 + fs.notify.max_user_instances=256 + fs.inotify.max_user_watches=32768 + + 2.8.1. Slowing down the reindexing rate for fast changing files When using the real time monitor, it may happen that some files need to be @@ -2702,14 +2725,22 @@ Chapter 4. Programming interface 4.3.2.1. Introduction Recoll versions after 1.11 define a Python programming interface, both for - searching and indexing. + searching and indexing. The indexing portion has seen little use, but the + searching one is used in the Recoll Ubuntu Unity Lens and Recoll Web UI. - The API is inspired by the Python database API specification, version 1.0 - for Recoll versions up to 1.18, version 2.0 for Recoll versions 1.19 and - later. The package structure changed with Recoll 1.19 too. We will mostly - describe the new API and package structure here. A paragraph at the end of - this section will explain a few differences and ways to write code - compatible with both versions. + The API is inspired by the Python database API specification. There were + two major changes in recent Recoll versions: + + o The basis for the Recoll API changed from Python database API version + 1.0 (Recoll versions up to 1.18.1), to version 2.0 (Recoll 1.18.2 and + later). + o The recoll module became a package (with an internal recoll module) as + of Recoll version 1.19, in order to add more functions. For existing + code, this only changes the way the interface must be imported. + + We will mostly describe the new API and package structure here. A + paragraph at the end of this section will explain a few differences and + ways to write code compatible with both versions. The Python interface can be found in the source package, under python/recoll. @@ -2723,6 +2754,12 @@ Chapter 4. Programming interface python setup.py install + The normal Recoll installer installs the Python API along with the main + code. + + When installing from a repository, and depending on the distribution, the + Python API can sometimes be found in a separate package. + 4.3.2.2. Recoll package The recoll package contains two modules: @@ -2766,7 +2803,17 @@ Chapter 4. Programming interface These aliases return a blank Query object for this index. Db.setAbstractParams(maxchars, contextwords) - Set the parameters used to build snippets. + Set the parameters used to build snippets (sets of keywords in + context text fragments). maxchars defines the maximum total size + of the abstract. contextwords defines how many terms are shown + around the keyword. + + Db.termMatch(match_type, expr, field='', maxlen=-1, casesens=False, + diacsens=False, lang='english') + Expand an expression against the index term list. Performs the + basic function from the GUI term explorer tool. match_type can be + either of wildcard, regexp or stem. Returns a list of terms + expanded from the input expression. The Query class @@ -2794,7 +2841,7 @@ Chapter 4. Programming interface Fetches the next Doc object from the current search results. Query.close() - Closes the connection. The object is unusable after the call. + Closes the query. The object is unusable after the call. Query.scroll(value, mode='relative') Adjusts the position in the current result set. mode can be @@ -2803,9 +2850,9 @@ Chapter 4. Programming interface Query.getgroups() Retrieves the expanded query terms as a list of pairs. Meaningful only after executexx In each pair, the first entry is a list of - user terms, the second a list of query terms as derived from the - user terms and used in the Xapian Query. The size of each list is - one for simple terms, or more for group and phrase clauses. + user terms (of size one for simple terms, or more for group and + phrase clauses), the second a list of query terms as derived from + the user terms and used in the Xapian Query. Query.getxquery() Return the Xapian query description as a Unicode string. @@ -2837,8 +2884,8 @@ Chapter 4. Programming interface Query.rownumber Next index to be fetched from results. Normally increments after - each fetchone() call, but can be set/reset before the call effect - seeking. Starts at 0. + each fetchone() call, but can be set/reset before the call to + effect seeking (equivalent to using scroll()). Starts at 0. The Doc class @@ -2887,11 +2934,13 @@ Chapter 4. Programming interface 4.3.2.4. The rclextract module - Document content is not provided by an index query. To access it, the data - extraction part of the indexing process must be performed (subdocument - access and format translation). This is not trivial in general. The - rclextract module currently provides a single class which can be used to - access the data content for result documents. + Index queries do not provide document content (only a partial and + unprecise reconstruction is performed to show the snippets text). In order + to access the actual document data, the data extraction part of the + indexing process must be performed (subdocument access and format + translation). This is not trivial in general. The rclextract module + currently provides a single class which can be used to access the data + content for result documents. Classes @@ -2905,13 +2954,23 @@ Chapter 4. Programming interface Extractor.textextract(ipath) Extract document defined by ipath and return a Doc object. The - doc.text field has the document text as either text/plain or - text/html according to doc.mimetype. + doc.text field has the document text converted to either + text/plain or text/html according to doc.mimetype. The typical use + would be as follows: - Extractor.idoctofile() + qdoc = query.fetchone() + extractor = recoll.Extractor(qdoc) + doc = extractor.textextract(qdoc.ipath) + # use doc.text, e.g. for previewing + + Extractor.idoctofile(ipath, targetmtype, outfile='') Extracts document into an output file, which can be given explicitly or will be created as a temporary file to be deleted by - the caller. + the caller. Typical use: + + qdoc = query.fetchone() + extractor = recoll.Extractor(qdoc) + filename = extractor.idoctofile(qdoc.ipath, qdoc.mimetype) 4.3.2.5. Example code @@ -3224,6 +3283,15 @@ Chapter 5. Installation and configuration to manually copy and modify one of the existing files (the new file name should be the output of uname -s). + 5.3.2.1. Building on Solaris + + We did not test building the GUI on Solaris for recent versions. You will + need at least Qt 4.4. There are some hints on an old web site page, they + may still be valid. + + Someone did test the 1.19 indexer and Python module build, they do work, + with a few minor glitches. Be sure to use GNU make and install. + 5.3.3. Installation Either type make install or execute recollinstall prefix, in the root of @@ -3259,12 +3327,25 @@ Chapter 5. Installation and configuration by comments inside the default files, and we will just give a general overview here. - For each index, there are two sets of configuration files. System-wide - configuration files are kept in a directory named like + By default, for each index, there are two sets of configuration files. + System-wide configuration files are kept in a directory named like /usr/[local/]share/recoll/examples, and define default values, shared by all indexes. For each index, a parallel set of files defines the customized parameters. + In addition (as of Recoll version 1.19.7), it is possible to specify two + additional configuration directories which will be stacked before and + after the user configuration directory. These are defined by the + RECOLL_CONFTOP and RECOLL_CONFMID environment variables. Values from + configuration files inside the top directory will override user ones, + values from configuration files inside the middle directory will override + system ones and be overriden by user ones. These two variables may be of + use to applications which augment Recoll functionality, and need to add + configuration data without disturbing the user's files. Please note that + the two, currently single, values will probably be interpreted as + colon-separated lists in the future: do not use colon characters inside + the directory paths. + The default location of the configuration is the .recoll directory in your home. Most people will only use this directory. @@ -3328,7 +3409,7 @@ Chapter 5. Installation and configuration text files with appropriate encodings, and concatenate them to create the complete configuration. - 5.4.1. Main configuration file + 5.4.1. The main configuration file, recoll.conf recoll.conf is the main configuration file. It defines things like what to index (top directories and things to ignore), and the default character @@ -3354,7 +3435,7 @@ Chapter 5. Installation and configuration skippedNames - A space-separated list of patterns for names of files or + A space-separated list of wilcard patterns for names of files or directories that should be completely ignored. The list defined in the default file is: @@ -3405,6 +3486,16 @@ Chapter 5. Installation and configuration can set skippedPathsFnmPathname to 0 to disable the use of FNM_PATHNAME (meaning that /*/dir3 will match /dir1/dir2/dir3). + zipSkippedNames + + A space-separated list of patterns for names of files or + directories that should be ignored inside zip archives. This is + used directly by the zip filter, and has a function similar to + skippedNames, but works independantly. Can be redefined for + filesystem subdirectories. For versions up to 1.19, you will need + to update the Zip filter and install a supplementary Python + module. The details are described on the Recoll wiki. + followLinks Specifies if the indexer should follow symbolic links while @@ -3596,17 +3687,41 @@ Chapter 5. Installation and configuration = val, then select specifier viewer with mimetype|tag=... in mimeview. + noxattrfields + + Recoll versions 1.19 and later automatically translate file + extended attributes into document fields (to be processed + according to the parameters from the fields file). Setting this + variable to 1 will disable the behaviour. + metadatacmds This allows executing external commands for each file and storing - the output in a Recoll field. This could be used for example to - index external tag data. The value is a list of field names and - commands, don't forget an initial semi-colon. Example: + the output in Recoll document fields. This could be used for + example to index external tag data. The value is a list of field + names and commands, don't forget an initial semi-colon. Example: [/some/area/of/the/fs] metadatacmds = ; tags = tmsu tags %f; otherfield = somecmd -xx %f + As a specially disgusting hack brought by Recoll 1.19.7, if a + "field name" begins with rclmulti, the data returned by the + command is expected to contain multiple field values, in + configuration file format. This allows setting several fields by + executing a single command. Example: + + metadatacmds = ; rclmulti1 = somecmd %f + + + If somecmd returns data in the form of: + + field1 = value1 + field2 = value for field2 + + + field1 and field2 will be set inside the document metadata. + 5.4.1.3. Parameters affecting where and how we store things: dbdir @@ -3663,7 +3778,7 @@ Chapter 5. Installation and configuration memory, you can try higher values between 20 and 80. In my experience, values beyond 100 are always counterproductive. - 5.4.1.4. Indexing parallelism configuration + 5.4.1.4. Parameters affecting multithread processing The Recoll indexing process recollindex can use multiple threads to speed up indexing on multiprocessor systems. The work done to index files is @@ -3691,7 +3806,7 @@ Chapter 5. Installation and configuration stage. In practise, deep queues have not been shown to increase performance. A value of 0 for the first queue tells Recoll to perform autoconfiguration (no need for the two other values in - this case)- this is the default configuration. + this case) - this is the default configuration. thrTCounts @@ -3721,6 +3836,11 @@ Chapter 5. Installation and configuration thrQSizes = 2 -1 -1 thrTCounts = 6 1 1 + The following example would disable multithreading. Indexing will be + performed by a single thread. + + thrQSizes = -1 -1 -1 + 5.4.1.5. Miscellaneous parameters: autodiacsens diff --git a/src/mk/manifest.txt b/src/mk/manifest.txt index 299f36eb..8404afe7 100644 --- a/src/mk/manifest.txt +++ b/src/mk/manifest.txt @@ -46,6 +46,7 @@ common/textsplit.cpp common/textsplit.h common/unacpp.cpp common/unacpp.h +common/unordered_defs.h common/uproplist.h configure configure.ac @@ -83,6 +84,29 @@ desktop/unity-lens-recoll/py-compile desktop/unity-lens-recoll/recollscope/ desktop/unity-lens-recoll/recollscope/__init__.py desktop/unity-lens-recoll/recollscope/rclsearch.py +desktop/unity-scope-recoll/ +desktop/unity-scope-recoll/COPYING +desktop/unity-scope-recoll/MANIFEST.in +desktop/unity-scope-recoll/README +desktop/unity-scope-recoll/__init__.py +desktop/unity-scope-recoll/debian/ +desktop/unity-scope-recoll/debian/changelog +desktop/unity-scope-recoll/debian/compat +desktop/unity-scope-recoll/debian/control +desktop/unity-scope-recoll/debian/copyright +desktop/unity-scope-recoll/debian/rules +desktop/unity-scope-recoll/debian/source/ +desktop/unity-scope-recoll/debian/source/format +desktop/unity-scope-recoll/debian/watch +desktop/unity-scope-recoll/recoll.scope +desktop/unity-scope-recoll/scratch.txt +desktop/unity-scope-recoll/service-recoll.svg +desktop/unity-scope-recoll/setup.py +desktop/unity-scope-recoll/unity-scope-recoll.application.in +desktop/unity-scope-recoll/unity-scope-recoll.desktop +desktop/unity-scope-recoll/unity-scope-recoll.png +desktop/unity-scope-recoll/unity-scope-recoll.service +desktop/unity-scope-recoll/unity_recoll_daemon.py desktop/xdg-utils-1.0.1/ desktop/xdg-utils-1.0.1/LICENSE desktop/xdg-utils-1.0.1/scripts/ @@ -213,6 +237,8 @@ index/subtreelist.h internfile/ internfile/Filter.h internfile/Makefile +internfile/extrameta.cpp +internfile/extrameta.h internfile/htmlparse.cpp internfile/htmlparse.h internfile/indextext.h @@ -260,11 +286,11 @@ kde/kioslave/kio_recoll/recoll.protocol kde/kioslave/kio_recoll/recollf.protocol kde/kioslave/kio_recoll/recollnolist.protocol lib/ -lib/Makefile -lib/mkMake +lib/mkMake.in makestaticdist.sh mk/ mk/AIX +mk/CYGWIN mk/Darwin mk/Default mk/FreeBSD diff --git a/src/python/recoll/Makefile b/src/python/recoll/Makefile index 89168f7e..2e1ccced 100644 --- a/src/python/recoll/Makefile +++ b/src/python/recoll/Makefile @@ -4,7 +4,8 @@ all: install: sudo python setup.py install clean: - rm -rf build + rm -rf build rm -f recoll/__init__.pyc + rm -rf recoll/__pycache__ distclean: clean - rm -f setup.py + rm -f setup.py