diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml
index 4f0ca928..0a6e3131 100644
--- a/src/doc/user/usermanual.sgml
+++ b/src/doc/user/usermanual.sgml
@@ -1,4 +1,5 @@
+
@@ -3792,11 +3793,13 @@ while query.next >= 0 and query.next < nres:
List elements with embedded spaces can be quoted using
double-quotes.
- Encoding issues
+
+ Encoding issues
Most of the configuration parameters are plain ASCII. Two
particular sets of values may cause encoding issues:
-
-
+
+
+
File path parameters may contain non-ascii
characters and should use the exact same byte values as found in
the file system directory. Usually, this means that the
@@ -3813,8 +3816,8 @@ while query.next >= 0 and query.next < nres:
configuration.
-
-
+
+
Main configuration file
@@ -4092,12 +4095,12 @@ skippedPaths = ~/somedir/∗.txt
the list will turn-off both standard accent and case
processing. Example for Swedish:
-unac_except_trans = åå Åå ää Ää öö Öö
+unac_except_trans = åå Åå ää Ää öö Öö
Note that the translation is not limited to a single
character, you could very well have something like
- üue in the list.
+ üue in the list.
This parameter can't be defined for subdirectories, it
is global, because there is no way to do otherwise when
diff --git a/src/doc/user/xmlmake.sh b/src/doc/user/xmlmake.sh
index fb4bc164..187e29f9 100644
--- a/src/doc/user/xmlmake.sh
+++ b/src/doc/user/xmlmake.sh
@@ -10,18 +10,27 @@
# usermanual.html#RCL.CONFIG.INDEXING won't work because fragments are
# case-sensitive. This could be solved by converting all ids inside the
# source file to upper-case.
+# - No simple way to produce pdf
# Wherever docbook.xsl and chunk.xsl live
-XSLDIR="/opt/local/share/xsl/docbook-xsl/"
+# Fbsd
+XSLDIR="/usr/local/share/xsl/docbook/"
+# Mac
+#XSLDIR="/opt/local/share/xsl/docbook-xsl/"
+#Linux
+#XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"
dochunky=1
test $# -eq 1 && dochunky=0
-# Remove the SGML header and uncomment the XML one
+# Remove the SGML header and uncomment the XML one + convert from iso-8859-1
+# to utf-8
sed -e '\!//FreeBSD//DTD!d' \
-e '\!DTD DocBook XML!s///' \
- < usermanual.sgml > usermanual.xml
+ < usermanual.sgml \
+ | iconv -f iso-8859-1 -t utf-8 \
+ > usermanual.xml
# Options common to the single-file and chunked versions
commonoptions="--stringparam section.autolabel 1 \
diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp
index 14b68598..8ff85709 100644
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@@ -406,10 +406,11 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
interner.setMissingStore(m_missing);
// File name transcoded to utf8 for indexing.
- string charset = m_config->getDefCharset(true);
// If this fails, the file name won't be indexed, no big deal
// Note that we used to do the full path here, but I ended up believing
// that it made more sense to use only the file name
+ // The charset is used is the one from the locale.
+ string charset = m_config->getDefCharset(true);
string utf8fn; int ercnt;
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
diff --git a/src/qtgui/advsearch_w.cpp b/src/qtgui/advsearch_w.cpp
index 12e05842..e308389d 100644
--- a/src/qtgui/advsearch_w.cpp
+++ b/src/qtgui/advsearch_w.cpp
@@ -428,6 +428,7 @@ void AdvSearch::runSearch()
entries.push_back(subtreeCMB->currentText());
entries.sort();
entries.unique();
+ LOGDEB(("Subtree list now has %d entries\n", entries.size()));
subtreeCMB->clear();
for (list::iterator it = entries.begin();
it != entries.end(); it++) {
diff --git a/src/rcldb/termproc.h b/src/rcldb/termproc.h
index 226a150d..def66d13 100644
--- a/src/rcldb/termproc.h
+++ b/src/rcldb/termproc.h
@@ -19,7 +19,6 @@
#ifndef _TERMPROC_H_INCLUDED_
#define _TERMPROC_H_INCLUDED_
-
#include "textsplit.h"
#include "stoplist.h"
@@ -74,7 +73,7 @@ private:
* Specialized TextSplit class: this will probably replace the base
* TextSplit when we've converted all the code. The takeword() routine in this
* calls a TermProc's instead of being overriden in a user derived class.
- * The text_to_word() method also takes care of flushing.
+ * The text_to_words() method also takes care of flushing.
*/
class TextSplitP : public TextSplit {
public:
@@ -110,7 +109,9 @@ private:
class TermProcPrep : public TermProc {
public:
TermProcPrep(TermProc *nxt)
- : TermProc(nxt), m_totalterms(0), m_unacerrors(0) {}
+ : TermProc(nxt), m_totalterms(0), m_unacerrors(0)
+ {
+ }
virtual bool takeword(const string& itrm, int pos, int bs, int be)
{
@@ -148,7 +149,9 @@ private:
class TermProcStop : public TermProc {
public:
TermProcStop(TermProc *nxt, const Rcl::StopList& stops)
- : TermProc(nxt), m_stops(stops) {}
+ : TermProc(nxt), m_stops(stops)
+ {
+ }
virtual bool takeword(const string& term, int pos, int bs, int be)
{
@@ -171,7 +174,9 @@ private:
class TermProcCommongrams : public TermProc {
public:
TermProcCommongrams(TermProc *nxt, const Rcl::StopList& stops)
- : TermProc(nxt), m_stops(stops), m_onlygrams(false) { }
+ : TermProc(nxt), m_stops(stops), m_onlygrams(false)
+ {
+ }
virtual bool takeword(const string& term, int pos, int bs, int be)
{