diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp index 37391888..1578a9e5 100644 --- a/src/common/rclinit.cpp +++ b/src/common/rclinit.cpp @@ -107,9 +107,15 @@ RclConfig *recollinit(RclInitFlags flags, // Make sure the locale charset is initialized (so that multiple // threads don't try to do it at once). config->getDefCharset(); + // Init unac locking unac_init_mt(); + // Init Unac translation exceptions + string unacex; + if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty()) + unac_set_except_translations(unacex.c_str()); + int flushmb; if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) { LOGDEB1(("rclinit: idxflushmb=%d, set XAPIAN_FLUSH_THRESHOLD to 10E6\n", diff --git a/src/common/unacpp.cpp b/src/common/unacpp.cpp index 280e8ef5..11d6775d 100644 --- a/src/common/unacpp.cpp +++ b/src/common/unacpp.cpp @@ -88,6 +88,7 @@ using namespace std; #include "unacpp.h" #include "readfile.h" +#include "rclinit.h" int main(int argc, char **argv) { @@ -98,8 +99,13 @@ int main(int argc, char **argv) } const char *encoding = argv[1]; string ifn = argv[2]; + if (!ifn.compare("stdin")) + ifn.clear(); const char *ofn = argv[3]; + string reason; + (void)recollinit(RCLINIT_NONE, 0, 0, reason, 0); + string odata; if (!file_to_string(ifn, odata)) { cerr << "file_to_string: " << odata << endl; @@ -111,7 +117,12 @@ int main(int argc, char **argv) exit(1); } - int fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666); + int fd; + if (strcmp(ofn, "stdout")) { + fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666); + } else { + fd = 1; + } if (fd < 0) { cerr << "Open/Create " << ofn << " failed: " << strerror(errno) << endl; diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index f8ca5075..939d07d4 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -34,6 +34,9 @@ This document introduces full text search notions and describes the installation and use of the &RCL; application. It currently describes &RCL; &RCLVERSION;. + @@ -3849,6 +3852,32 @@ skippedPaths = ~/somedir/∗.txt + unac_except_trans + This is a list of characters which should be + handled specially when converting text to unaccented lowercase. + For example, in Swedish, the letter a with diaeresis + has full alphabet citizenship and should not be + turned into an a. Each element in the + space-separated list has the special character as first element + and the translation following. The handling of both the + lowercase and upper-case versions of a character should be + specified, as appartenance to the list will turn-off both + standard accent and case processing. Example for Swedish: + +unac_except_trans = åå Åå ää Ää öö Öö + + + Note that the translation is not limited to a single + character, you could very well have something like + üue in the list. + + This parameter can't be defined for subdirectories, it + is global, because there is no way to do otherwise when + querying. If you have document sets which would need different + values, you will have to index and query them separately. + + + maildefcharset This can be used to define the default character set specifically for email messages which don't diff --git a/src/doc/user/xmlmake.sh b/src/doc/user/xmlmake.sh index d3c13ed6..0cc228b6 100644 --- a/src/doc/user/xmlmake.sh +++ b/src/doc/user/xmlmake.sh @@ -14,6 +14,9 @@ # Wherever docbook.xsl and chunk.xsl live XSLDIR="/usr/local/share/xsl/docbook/" +dochunky=1 +test $# -eq 1 && dochunky=0 + # Remove the SGML header and uncomment the XML one sed -e '\!//FreeBSD//DTD!d' \ -e '\!DTD DocBook XML!s/