diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp
index 37391888..1578a9e5 100644
--- a/src/common/rclinit.cpp
+++ b/src/common/rclinit.cpp
@@ -107,9 +107,15 @@ RclConfig *recollinit(RclInitFlags flags,
// Make sure the locale charset is initialized (so that multiple
// threads don't try to do it at once).
config->getDefCharset();
+
// Init unac locking
unac_init_mt();
+ // Init Unac translation exceptions
+ string unacex;
+ if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty())
+ unac_set_except_translations(unacex.c_str());
+
int flushmb;
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
LOGDEB1(("rclinit: idxflushmb=%d, set XAPIAN_FLUSH_THRESHOLD to 10E6\n",
diff --git a/src/common/unacpp.cpp b/src/common/unacpp.cpp
index 280e8ef5..11d6775d 100644
--- a/src/common/unacpp.cpp
+++ b/src/common/unacpp.cpp
@@ -88,6 +88,7 @@ using namespace std;
#include "unacpp.h"
#include "readfile.h"
+#include "rclinit.h"
int main(int argc, char **argv)
{
@@ -98,8 +99,13 @@ int main(int argc, char **argv)
}
const char *encoding = argv[1];
string ifn = argv[2];
+ if (!ifn.compare("stdin"))
+ ifn.clear();
const char *ofn = argv[3];
+ string reason;
+ (void)recollinit(RCLINIT_NONE, 0, 0, reason, 0);
+
string odata;
if (!file_to_string(ifn, odata)) {
cerr << "file_to_string: " << odata << endl;
@@ -111,7 +117,12 @@ int main(int argc, char **argv)
exit(1);
}
- int fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
+ int fd;
+ if (strcmp(ofn, "stdout")) {
+ fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
+ } else {
+ fd = 1;
+ }
if (fd < 0) {
cerr << "Open/Create " << ofn << " failed: " << strerror(errno)
<< endl;
diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml
index f8ca5075..939d07d4 100644
--- a/src/doc/user/usermanual.sgml
+++ b/src/doc/user/usermanual.sgml
@@ -34,6 +34,9 @@
This document introduces full text search notions
and describes the installation and use of the &RCL;
application. It currently describes &RCL; &RCLVERSION;.
+
@@ -3849,6 +3852,32 @@ skippedPaths = ~/somedir/∗.txt
+ unac_except_trans
+ This is a list of characters which should be
+ handled specially when converting text to unaccented lowercase.
+ For example, in Swedish, the letter a with diaeresis
+ has full alphabet citizenship and should not be
+ turned into an a. Each element in the
+ space-separated list has the special character as first element
+ and the translation following. The handling of both the
+ lowercase and upper-case versions of a character should be
+ specified, as appartenance to the list will turn-off both
+ standard accent and case processing. Example for Swedish:
+
+unac_except_trans = åå Åå ää Ää öö Öö
+
+
+ Note that the translation is not limited to a single
+ character, you could very well have something like
+ üue in the list.
+
+ This parameter can't be defined for subdirectories, it
+ is global, because there is no way to do otherwise when
+ querying. If you have document sets which would need different
+ values, you will have to index and query them separately.
+
+
+
maildefcharset
This can be used to define the default
character set specifically for email messages which don't
diff --git a/src/doc/user/xmlmake.sh b/src/doc/user/xmlmake.sh
index d3c13ed6..0cc228b6 100644
--- a/src/doc/user/xmlmake.sh
+++ b/src/doc/user/xmlmake.sh
@@ -14,6 +14,9 @@
# Wherever docbook.xsl and chunk.xsl live
XSLDIR="/usr/local/share/xsl/docbook/"
+dochunky=1
+test $# -eq 1 && dochunky=0
+
# Remove the SGML header and uncomment the XML one
sed -e '\!//FreeBSD//DTD!d' \
-e '\!DTD DocBook XML!s/