small mods inocuous or auxiliary to case/diac sensitivity but which can live in main branch

This commit is contained in:
Jean-Francois Dockes 2012-09-13 12:25:01 +02:00
parent 25f4fc3b2c
commit e0bc65bfdd
12 changed files with 247 additions and 159 deletions

View File

@ -96,6 +96,12 @@
/* Use file extended attributes */
#undef RCL_USE_XATTR
/* Use multiple threads for indexing */
#undef IDX_THREADS
/* Remove case and accents from terms */
#undef RCL_INDEX_STRIPCHARS
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS

261
src/configure vendored
View File

@ -1,11 +1,9 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.68 for Recoll 1.18.0.
# Generated by GNU Autoconf 2.69 for Recoll 1.18.0.
#
#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
# Foundation, Inc.
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
#
#
# This configure script is free software; the Free Software Foundation
@ -134,6 +132,31 @@ export LANGUAGE
# CDPATH.
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
# Use a proper internal environment variable to ensure we don't fall
# into an infinite loop, continuously re-executing ourselves.
if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
_as_can_reexec=no; export _as_can_reexec;
# We cannot yet assume a decent shell, so we have to provide a
# neutralization value for shells without unset; and this also
# works around shells that cannot unset nonexistent variables.
# Preserve -v and -x to the replacement shell.
BASH_ENV=/dev/null
ENV=/dev/null
(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
case $- in # ((((
*v*x* | *x*v* ) as_opts=-vx ;;
*v* ) as_opts=-v ;;
*x* ) as_opts=-x ;;
* ) as_opts= ;;
esac
exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
# Admittedly, this is quite paranoid, since all the known shells bail
# out after a failed `exec'.
$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
as_fn_exit 255
fi
# We don't want this to propagate to other subprocesses.
{ _as_can_reexec=; unset _as_can_reexec;}
if test "x$CONFIG_SHELL" = x; then
as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
emulate sh
@ -167,7 +190,8 @@ if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
else
exitcode=1; echo positional parameters were not saved.
fi
test x\$exitcode = x0 || exit 1"
test x\$exitcode = x0 || exit 1
test -x / || exit 1"
as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
@ -212,21 +236,25 @@ IFS=$as_save_IFS
if test "x$CONFIG_SHELL" != x; then :
# We cannot yet assume a decent shell, so we have to provide a
# neutralization value for shells without unset; and this also
# works around shells that cannot unset nonexistent variables.
# Preserve -v and -x to the replacement shell.
BASH_ENV=/dev/null
ENV=/dev/null
(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
export CONFIG_SHELL
case $- in # ((((
*v*x* | *x*v* ) as_opts=-vx ;;
*v* ) as_opts=-v ;;
*x* ) as_opts=-x ;;
* ) as_opts= ;;
esac
exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"}
export CONFIG_SHELL
# We cannot yet assume a decent shell, so we have to provide a
# neutralization value for shells without unset; and this also
# works around shells that cannot unset nonexistent variables.
# Preserve -v and -x to the replacement shell.
BASH_ENV=/dev/null
ENV=/dev/null
(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
case $- in # ((((
*v*x* | *x*v* ) as_opts=-vx ;;
*v* ) as_opts=-v ;;
*x* ) as_opts=-x ;;
* ) as_opts= ;;
esac
exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
# Admittedly, this is quite paranoid, since all the known shells bail
# out after a failed `exec'.
$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
exit 255
fi
if test x$as_have_required = xno; then :
@ -328,6 +356,14 @@ $as_echo X"$as_dir" |
} # as_fn_mkdir_p
# as_fn_executable_p FILE
# -----------------------
# Test if FILE is an executable regular file.
as_fn_executable_p ()
{
test -f "$1" && test -x "$1"
} # as_fn_executable_p
# as_fn_append VAR VALUE
# ----------------------
# Append the text in VALUE to the end of the definition contained in VAR. Take
@ -449,6 +485,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits
chmod +x "$as_me.lineno" ||
{ $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
# If we had to re-execute with $CONFIG_SHELL, we're ensured to have
# already done that, so ensure we don't try to do so again and fall
# in an infinite loop. This has already happened in practice.
_as_can_reexec=no; export _as_can_reexec
# Don't try to exec as it changes $[0], causing all sort of problems
# (the dirname of $[0] is not the place where we might find the
# original and so on. Autoconf is especially sensitive to this).
@ -483,16 +523,16 @@ if (echo >conf$$.file) 2>/dev/null; then
# ... but there are two gotchas:
# 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
# 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
# In both cases, we have to default to `cp -p'.
# In both cases, we have to default to `cp -pR'.
ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
as_ln_s='cp -p'
as_ln_s='cp -pR'
elif ln conf$$.file conf$$ 2>/dev/null; then
as_ln_s=ln
else
as_ln_s='cp -p'
as_ln_s='cp -pR'
fi
else
as_ln_s='cp -p'
as_ln_s='cp -pR'
fi
rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
rmdir conf$$.dir 2>/dev/null
@ -504,28 +544,8 @@ else
as_mkdir_p=false
fi
if test -x / >/dev/null 2>&1; then
as_test_x='test -x'
else
if ls -dL / >/dev/null 2>&1; then
as_ls_L_option=L
else
as_ls_L_option=
fi
as_test_x='
eval sh -c '\''
if test -d "$1"; then
test -d "$1/.";
else
case $1 in #(
-*)set "./$1";;
esac;
case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
???[sx]*):;;*)false;;esac;fi
'\'' sh
'
fi
as_executable_p=$as_test_x
as_test_x='test -x'
as_executable_p=as_fn_executable_p
# Sed expression to map a string onto a valid CPP name.
as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@ -693,7 +713,9 @@ with_aspell
with_inotify
with_fam
enable_xattr
enable_idxthreads
enable_camelcase
enable_stripchars
enable_python_module
enable_pic
enable_qtgui
@ -1171,8 +1193,6 @@ target=$target_alias
if test "x$host_alias" != x; then
if test "x$build_alias" = x; then
cross_compiling=maybe
$as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host.
If a cross compiler is detected then cross compile mode will be used" >&2
elif test "x$build_alias" != "x$host_alias"; then
cross_compiling=yes
fi
@ -1337,6 +1357,8 @@ Optional Features:
creates them on (part of) your data set. You also
need to set up appropriate mappings in the
configuration.
--enable-idxthreads Enable multithread indexing. This can somewhat boost
indexing performance.
--enable-camelcase Enable splitting camelCase words. This is not
enabled by default as this makes phrase matches more
difficult: you need to use matching case in the
@ -1344,12 +1366,16 @@ Optional Features:
manual" and "my sql manual" are the same, but not
the same as "mysql manual" (in phrases only and you
could raise the phrase slack to get a match).
--enable-stripchars Remove diacritics and fold character case in indexed
terms. This will yield less precise searches but the
index will be smaller
--disable-python-module Do not build the Python module.
--enable-pic Do not compile library objects as position
--disable-pic Do not compile library objects as position
independant code. This is incompatible with the php
or python extensions.
--disable-qtgui Disable the QT-based graphical user interface.
--disable-webkit Disable use of qt-webkit.
--disable-webkit Disable use of qt-webkit (only meaningful if qtgui
is enabled).
--disable-x11mon Disable recollindex support for X11 session
monitoring.
@ -1361,7 +1387,7 @@ Optional Packages:
--without-aspell Disable use of aspell spelling package to provide
term expansion to other spellings
--with-inotify Use inotify for almost real time indexing of
modified files.
modified files (the default is yes on Linux).
--with-fam Use File Alteration Monitor for almost real time
indexing of modified files. Give the fam/gamin
library as argument (ie: /usr/lib/libfam.so) if
@ -1451,9 +1477,9 @@ test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
Recoll configure 1.18.0
generated by GNU Autoconf 2.68
generated by GNU Autoconf 2.69
Copyright (C) 2010 Free Software Foundation, Inc.
Copyright (C) 2012 Free Software Foundation, Inc.
This configure script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it.
_ACEOF
@ -1764,7 +1790,7 @@ $as_echo "$ac_try_echo"; } >&5
test ! -s conftest.err
} && test -s conftest$ac_exeext && {
test "$cross_compiling" = yes ||
$as_test_x conftest$ac_exeext
test -x conftest$ac_exeext
}; then :
ac_retval=0
else
@ -1877,7 +1903,7 @@ $as_echo "$ac_try_echo"; } >&5
test ! -s conftest.err
} && test -s conftest$ac_exeext && {
test "$cross_compiling" = yes ||
$as_test_x conftest$ac_exeext
test -x conftest$ac_exeext
}; then :
ac_retval=0
else
@ -2004,7 +2030,7 @@ This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by Recoll $as_me 1.18.0, which was
generated by GNU Autoconf 2.68. Invocation command line was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@ -2384,7 +2410,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -2428,7 +2454,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_CXX="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -2907,7 +2933,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CC="${ac_tool_prefix}gcc"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -2947,7 +2973,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_CC="gcc"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -3000,7 +3026,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CC="${ac_tool_prefix}cc"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -3041,7 +3067,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
ac_prog_rejected=yes
continue
@ -3099,7 +3125,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -3143,7 +3169,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_CC="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -3339,8 +3365,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <stdarg.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
struct stat;
/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
struct buf { int x; };
FILE * (*rcsopen) (struct buf *, struct stat *, int);
@ -3622,6 +3647,8 @@ _ACEOF
esac
rm -rf conftest*
fi
fi
@ -3780,7 +3807,7 @@ do
for ac_prog in grep ggrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
{ test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
as_fn_executable_p "$ac_path_GREP" || continue
# Check for GNU ac_path_GREP and select it if it is found.
# Check for GNU $ac_path_GREP
case `"$ac_path_GREP" --version 2>&1` in
@ -3846,7 +3873,7 @@ do
for ac_prog in egrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
{ test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
as_fn_executable_p "$ac_path_EGREP" || continue
# Check for GNU ac_path_EGREP and select it if it is found.
# Check for GNU $ac_path_EGREP
case `"$ac_path_EGREP" --version 2>&1` in
@ -4084,7 +4111,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_path_fileProg="$as_dir/$ac_word$ac_exec_ext"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -4160,7 +4187,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_path_aspellProg="$as_dir/$ac_word$ac_exec_ext"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -4313,7 +4340,8 @@ _ACEOF
fi
# Enable use of file extended attributes.
# Not by default as these are little used for now.
# Not by default as extended attributes are little used for now, and
# looking for them is not completely trivial
# Check whether --enable-xattr was given.
if test "${enable_xattr+set}" = set; then :
enableval=$enable_xattr; xattrEnabled=$enableval
@ -4328,6 +4356,26 @@ $as_echo "#define RCL_USE_XATTR 1" >>confdefs.h
fi
# Enable use of threads in the indexing pipeline.
# Threads are used in bucket-brigade fashion for the processing steps
# (reading file - text splitting - indexing proper). The performance
# increase can be significant, but this is disabled by default as we
# usually care little about indexing absolute performance (more about
# impact on usability and total resources used).
# Check whether --enable-idxthreads was given.
if test "${enable_idxthreads+set}" = set; then :
enableval=$enable_idxthreads; idxthreadsEnabled=$enableval
else
idxthreadsEnabled=no
fi
if test X$idxthreadsEnabled = Xyes ; then
$as_echo "#define IDX_THREADS 1" >>confdefs.h
fi
# Enable CamelCase word splitting. This is optional because it causes
# problems with phrases: with camelcase enabled, "MySQL manual"
# will be matched by "MySQL manual" and "my sql manual" but not
@ -4347,6 +4395,21 @@ $as_echo "#define RCL_SPLIT_CAMELCASE 1" >>confdefs.h
fi
# Not by default as these are little used for now.
# Check whether --enable-stripchars was given.
if test "${enable_stripchars+set}" = set; then :
enableval=$enable_stripchars; stripcharsEnabled=$enableval
else
stripcharsEnabled=no
fi
if test X$stripcharsEnabled = Xyes ; then
$as_echo "#define RCL_INDEX_STRIPCHARS 1" >>confdefs.h
fi
# Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's
@ -4555,7 +4618,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_path_XAPIAN_CONFIG="$as_dir/$ac_word$ac_exec_ext"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -4676,7 +4739,7 @@ do
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_path_QMAKEPATH="$as_dir/$ac_word$ac_exec_ext"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
@ -4722,7 +4785,7 @@ fi
#echo "qmake version: $qmakevers"
v4=`expr "$qmakevers" : '.*Qt *version *4.*'`
if test X$v4 = X0 ; then
as_fn_error $? "qmake seems to indincate using Qt version 3 which is not supported any more" "$LINENO" 5
as_fn_error $? "qmake seems to be using Qt version 3 which is not supported any more" "$LINENO" 5
QTGUI=qtgui
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: using qt version 4 user interface" >&5
@ -4769,6 +4832,8 @@ fi
QMAKE_DISABLE_WEBKIT=""
fi
##### Using QZeitGeist lib ? Default no for now
# Check whether --with-qzeitgeist was given.
@ -6110,16 +6175,16 @@ if (echo >conf$$.file) 2>/dev/null; then
# ... but there are two gotchas:
# 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
# 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
# In both cases, we have to default to `cp -p'.
# In both cases, we have to default to `cp -pR'.
ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
as_ln_s='cp -p'
as_ln_s='cp -pR'
elif ln conf$$.file conf$$ 2>/dev/null; then
as_ln_s=ln
else
as_ln_s='cp -p'
as_ln_s='cp -pR'
fi
else
as_ln_s='cp -p'
as_ln_s='cp -pR'
fi
rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
rmdir conf$$.dir 2>/dev/null
@ -6179,28 +6244,16 @@ else
as_mkdir_p=false
fi
if test -x / >/dev/null 2>&1; then
as_test_x='test -x'
else
if ls -dL / >/dev/null 2>&1; then
as_ls_L_option=L
else
as_ls_L_option=
fi
as_test_x='
eval sh -c '\''
if test -d "$1"; then
test -d "$1/.";
else
case $1 in #(
-*)set "./$1";;
esac;
case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
???[sx]*):;;*)false;;esac;fi
'\'' sh
'
fi
as_executable_p=$as_test_x
# as_fn_executable_p FILE
# -----------------------
# Test if FILE is an executable regular file.
as_fn_executable_p ()
{
test -f "$1" && test -x "$1"
} # as_fn_executable_p
as_test_x='test -x'
as_executable_p=as_fn_executable_p
# Sed expression to map a string onto a valid CPP name.
as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@ -6222,7 +6275,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# values after options handling.
ac_log="
This file was extended by Recoll $as_me 1.18.0, which was
generated by GNU Autoconf 2.68. Invocation command line was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
CONFIG_HEADERS = $CONFIG_HEADERS
@ -6284,10 +6337,10 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
Recoll config.status 1.18.0
configured by $0, generated by GNU Autoconf 2.68,
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
Copyright (C) 2010 Free Software Foundation, Inc.
Copyright (C) 2012 Free Software Foundation, Inc.
This config.status script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it."
@ -6375,7 +6428,7 @@ fi
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
if \$ac_cs_recheck; then
set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
shift
\$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
CONFIG_SHELL='$SHELL'

View File

@ -104,7 +104,8 @@ fi
# Real time monitoring with inotify
AC_ARG_WITH(inotify,
AC_HELP_STRING([--with-inotify],
[Use inotify for almost real time indexing of modified files.]),
[Use inotify for almost real time indexing of modified files (the default
is yes on Linux).]),
withInotify=$withval, withInotify=$inot_default)
if test X$withInotify != Xno ; then
@ -163,7 +164,8 @@ if test X$withFam != Xno ; then
fi
# Enable use of file extended attributes.
# Not by default as these are little used for now.
# Not by default as extended attributes are little used for now, and
# looking for them is not completely trivial
AC_ARG_ENABLE(xattr,
AC_HELP_STRING([--enable-xattr],
[Enable fetching metadata from file extended attributes. This is only
@ -175,6 +177,22 @@ if test X$xattrEnabled = Xyes ; then
AC_DEFINE(RCL_USE_XATTR, 1, [Use file extended attributes])
fi
# Enable use of threads in the indexing pipeline. Threads are used in
# bucket-brigade fashion for the processing steps (reading file - text
# splitting - indexing proper). The performance increase is small in normal
# case (might be a bit more significant if you're using an SSD), and this
# is disabled by default as we usually care little about indexing absolute
# performance (more about impact on usability and total resources used).
AC_ARG_ENABLE(idxthreads,
AC_HELP_STRING([--enable-idxthreads],
[Enable multithread indexing. This can somewhat boost indexing
performance.]),
idxthreadsEnabled=$enableval, idxthreadsEnabled=no)
if test X$idxthreadsEnabled = Xyes ; then
AC_DEFINE(IDX_THREADS, 1, [Use multiple threads for indexing])
fi
# Enable CamelCase word splitting. This is optional because it causes
# problems with phrases: with camelcase enabled, "MySQL manual"
# will be matched by "MySQL manual" and "my sql manual" but not
@ -194,6 +212,17 @@ if test X$camelcaseEnabled = Xyes ; then
AC_DEFINE(RCL_SPLIT_CAMELCASE, 1, [Split camelCase words])
fi
# Not by default as these are little used for now.
AC_ARG_ENABLE(stripchars,
AC_HELP_STRING([--enable-stripchars],
[Remove diacritics and fold character case in indexed terms. This will
yield less precise searches but the index will be smaller]),
stripcharsEnabled=$enableval, stripcharsEnabled=no)
if test X$stripcharsEnabled = Xyes ; then
AC_DEFINE(RCL_INDEX_STRIPCHARS, 1, [Remove case and accents from terms])
fi
# Disable building the python module. This is built by default, because
# it's really the easiest way to interface and extend recoll. It forces PIC
# objects for everything (indexing performance impact: 1%), because it's
@ -214,7 +243,7 @@ fi
# Build PIC objects for the library ?
AC_ARG_ENABLE(pic,
AC_HELP_STRING([--enable-pic],
AC_HELP_STRING([--disable-pic],
[Do not compile library objects as position independant code.
This is incompatible with the php or python extensions.]),
picEnabled=$enableval, picEnabled=forpython)
@ -433,7 +462,7 @@ else
##### Using Qt webkit for reslist display? Else Qt textbrowser
AC_ARG_ENABLE(webkit,
AC_HELP_STRING([--disable-webkit],
[Disable use of qt-webkit.]),
[Disable use of qt-webkit (only meaningful if qtgui is enabled).]),
enableWebkit=$enableval, enableWebkit="yes")
if test "$enableWebkit" = "yes" ; then
@ -444,6 +473,8 @@ else
QMAKE_DISABLE_WEBKIT=""
fi
##### Using QZeitGeist lib ? Default no for now
AC_ARG_WITH(qzeitgeist,
AC_HELP_STRING([--with-qzeitgeist],

View File

@ -14,9 +14,7 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "autoconfig.h"
#endif
#include <stdio.h>
#include <sys/stat.h>

View File

@ -258,10 +258,11 @@ bool ConfIndexer::createStemmingDatabases()
bool ConfIndexer::createStemDb(const string &lang)
{
if (!m_db.open(Rcl::Db::DbUpd)) {
if (!m_db.open(Rcl::Db::DbUpd))
return false;
}
return m_db.createStemDbs(vector<string>(1, lang));
vector<string> langs;
stringToStrings(lang, langs);
return m_db.createStemDbs(langs);
}
// The language for the aspell dictionary is handled internally by the aspell

View File

@ -274,15 +274,6 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
// We get here if there was no specific error, but there is no
// identified mime type, or no handler associated.
#ifdef INDEX_UNKNOWN_TEXT_AS_PLAIN
// If the type is an unknown text/xxx, index as text/plain and
// hope for the best (this wouldn't work too well with text/rtf...)
if (mtype.find("text/") == 0) {
h = mhFactory(cstr_textplain);
goto out;
}
#endif
// Finally, unhandled files are either ignored or their name and
// generic metadata is indexed, depending on configuration
{bool indexunknown = false;

View File

@ -102,6 +102,8 @@ static char usage [] =
" -m : dump the whole document meta[] array for each result\n"
" -A : output the document abstracts\n"
" -S fld : sort by field <fld>\n"
" -s stemlang : set stemming language to use (must exist in index...)\n"
" Use -s \"\" to turn off stem expansion\n"
" -D : sort descending\n"
" -i <dbdir> : additional index, several can be given\n"
" -e use url encoding (%xx) for urls\n"

View File

@ -166,6 +166,9 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
}
// "Regular" processing follows:
unsigned int mods = (unsigned int)(*it)->m_modifiers;
nclause = 0;
switch ((*it)->m_op) {
case WasaQuery::OP_NULL:
case WasaQuery::OP_AND:
@ -192,8 +195,6 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
}
}
unsigned int mods = (unsigned int)(*it)->m_modifiers;
// I'm not sure I understand the phrase/near detection
// thereafter anymore, maybe it would be better to have an
// explicit flag. Mods can only be set after a double
@ -216,12 +217,6 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
LOGERR(("wasaQueryToRcl: out of memory\n"));
return 0;
}
if (mods & WasaQuery::WQM_NOSTEM) {
nclause->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
}
if ((*it)->m_weight != 1.0)
nclause->setWeight((*it)->m_weight);
sdata->addClause(nclause);
}
break;
@ -248,11 +243,6 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
LOGERR(("wasaQueryToRcl: out of memory\n"));
return 0;
}
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
if ((*it)->m_weight != 1.0)
nclause->setWeight((*it)->m_weight);
sdata->addClause(nclause);
break;
case WasaQuery::OP_OR:
@ -272,10 +262,17 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
reason = "Out of memory";
return 0;
}
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
sdata->addClause(nclause);
}
if (mods & WasaQuery::WQM_NOSTEM)
nclause->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
if (mods & WasaQuery::WQM_DIACSENS)
nclause->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
if (mods & WasaQuery::WQM_CASESENS)
nclause->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
if ((*it)->m_weight != 1.0)
nclause->setWeight((*it)->m_weight);
sdata->addClause(nclause);
}
return sdata;

View File

@ -179,7 +179,7 @@ private:
class SearchDataClause {
public:
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
SDCM_ANCHOREND=4};
SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
SearchDataClause(SClType tp)
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),

View File

@ -16,7 +16,8 @@ topdirs = ~
# ignore. If you need index mozilla/thunderbird mail folders, don't put
# ".*" in there (as was the case with an older sample config)
# These are simple names, not paths (must contain no / )
skippedNames = #* bin CVS Cache cache* caughtspam tmp .thumbnails .svn \
skippedNames = #* bin CVS Cache cache* .cache caughtspam tmp \
.thumbnails .svn \
*~ .beagle .git .hg .bzr loop.ps .xsession-errors \
.recoll* xapiandb recollrc recoll.conf
@ -73,8 +74,13 @@ indexstemminglanguages = english
# first element and the translation following. The handling of both the
# lowercase and upper-case versions of a character should be specified, as
# appartenance to the list will turn-off both standard accent and case
# processing. Example for Swedish:
# processing. Examples:
# Swedish:
# unac_except_trans = åå Åå ää Ää öö Öö
# German:
# unac_except_trans = Ää Öö Üü ää öö üü ßss
# In French, you probably want to decompose oe and ae
# unac_except_trans = œoe Œoe æae Æae
# Where to store the database (directory). This may be an absolute path,
# else it is taken as relative to the configuration directory (-c argument

View File

@ -24,7 +24,9 @@ struct HighlightData {
/** Processed/expanded terms and groups. Used for looking for
* regions to highlight. Terms are just groups with 1 entry. All
* terms in there are unaccented, and the list may include values
* terms are transformed to be compatible with index content
* (unaccented and lowercased as needed depending on
* configuration), and the list may include values
* expanded from the original terms by stem or wildcard expansion.
*/
std::vector<std::vector<std::string> > groups;

View File

@ -20,6 +20,7 @@
#ifdef UTF8ITER_CHECK
#include "assert.h"
#endif
#include <string>
/**
* A small helper class to iterate over utf8 strings. This is not an
@ -30,13 +31,13 @@
*/
class Utf8Iter {
public:
Utf8Iter(const string &in)
Utf8Iter(const std::string &in)
: m_s(in), m_cl(0), m_pos(0), m_charpos(0), m_error(false)
{
update_cl();
}
const string& buffer() const {return m_s;}
const std::string& buffer() const {return m_s;}
void rewind()
{
@ -52,7 +53,7 @@ public:
* current position */
unsigned int operator[](unsigned int charpos) const
{
string::size_type mypos = 0;
std::string::size_type mypos = 0;
unsigned int mycp = 0;
if (charpos >= m_charpos) {
mypos = m_pos;
@ -75,7 +76,7 @@ public:
}
/** Increment current position to next utf-8 char */
string::size_type operator++(int)
std::string::size_type operator++(int)
{
// Note: m_cl may be zero at eof if user's test not right
// this shouldn't crash the program until actual data access
@ -83,7 +84,7 @@ public:
assert(m_cl != 0);
#endif
if (m_cl <= 0)
return string::npos;
return std::string::npos;
m_pos += m_cl;
m_charpos++;
@ -102,7 +103,7 @@ public:
/** Append current utf-8 possibly multi-byte character to string param.
This needs to be fast. No error checking. */
unsigned int appendchartostring(string &out) {
unsigned int appendchartostring(std::string &out) {
#ifdef UTF8ITER_CHECK
assert(m_cl != 0);
#endif
@ -111,7 +112,7 @@ public:
}
/** Return current character as string */
operator string() {
operator std::string() {
#ifdef UTF8ITER_CHECK
assert(m_cl != 0);
#endif
@ -127,39 +128,39 @@ public:
}
/** Return current byte offset in input string */
string::size_type getBpos() const {
std::string::size_type getBpos() const {
return m_pos;
}
/** Return current character length */
string::size_type getBlen() const {
std::string::size_type getBlen() const {
return m_cl;
}
/** Return current unicode character offset in input string */
string::size_type getCpos() const {
std::string::size_type getCpos() const {
return m_charpos;
}
private:
// String we're working with
const string& m_s;
const std::string& m_s;
// Character length at current position. A value of zero indicates
// an error.
unsigned int m_cl;
// Current byte offset in string.
string::size_type m_pos;
std::string::size_type m_pos;
// Current character position
unsigned int m_charpos;
// Am I ok ?
mutable bool m_error;
// Check position and cl against string length
bool poslok(string::size_type p, int l) const {
bool poslok(std::string::size_type p, int l) const {
#ifdef UTF8ITER_CHECK
assert(p != string::npos && l > 0 && p + l <= m_s.length());
assert(p != std::string::npos && l > 0 && p + l <= m_s.length());
#endif
return p != string::npos && l > 0 && p + l <= m_s.length();
return p != std::string::npos && l > 0 && p + l <= m_s.length();
}
// Update current char length in object state, minimum checking
@ -180,7 +181,7 @@ private:
}
// Get character byte length at specified position. Returns 0 for error.
inline int get_cl(string::size_type p) const
inline int get_cl(std::string::size_type p) const
{
unsigned int z = (unsigned char)m_s[p];
if (z <= 127) {
@ -200,7 +201,7 @@ private:
}
// Compute value at given position. No error checking.
inline unsigned int getvalueat(string::size_type p, int l) const
inline unsigned int getvalueat(std::string::size_type p, int l) const
{
switch (l) {
case 1: