diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index 4f0ca928..0a6e3131 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -1,4 +1,5 @@ + @@ -3792,11 +3793,13 @@ while query.next >= 0 and query.next < nres: List elements with embedded spaces can be quoted using double-quotes. - Encoding issues + + Encoding issues Most of the configuration parameters are plain ASCII. Two particular sets of values may cause encoding issues: - - + + + File path parameters may contain non-ascii characters and should use the exact same byte values as found in the file system directory. Usually, this means that the @@ -3813,8 +3816,8 @@ while query.next >= 0 and query.next < nres: configuration. - - + + Main configuration file @@ -4092,12 +4095,12 @@ skippedPaths = ~/somedir/∗.txt the list will turn-off both standard accent and case processing. Example for Swedish: -unac_except_trans = åå Ã…Ã¥ ää Ää öö Öö +unac_except_trans = åå Åå ää Ää öö Öö Note that the translation is not limited to a single character, you could very well have something like - üue in the list. + üue in the list. This parameter can't be defined for subdirectories, it is global, because there is no way to do otherwise when diff --git a/src/doc/user/xmlmake.sh b/src/doc/user/xmlmake.sh index fb4bc164..187e29f9 100644 --- a/src/doc/user/xmlmake.sh +++ b/src/doc/user/xmlmake.sh @@ -10,18 +10,27 @@ # usermanual.html#RCL.CONFIG.INDEXING won't work because fragments are # case-sensitive. This could be solved by converting all ids inside the # source file to upper-case. +# - No simple way to produce pdf # Wherever docbook.xsl and chunk.xsl live -XSLDIR="/opt/local/share/xsl/docbook-xsl/" +# Fbsd +XSLDIR="/usr/local/share/xsl/docbook/" +# Mac +#XSLDIR="/opt/local/share/xsl/docbook-xsl/" +#Linux +#XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/" dochunky=1 test $# -eq 1 && dochunky=0 -# Remove the SGML header and uncomment the XML one +# Remove the SGML header and uncomment the XML one + convert from iso-8859-1 +# to utf-8 sed -e '\!//FreeBSD//DTD!d' \ -e '\!DTD DocBook XML!s///' \ - < usermanual.sgml > usermanual.xml + < usermanual.sgml \ + | iconv -f iso-8859-1 -t utf-8 \ + > usermanual.xml # Options common to the single-file and chunked versions commonoptions="--stringparam section.autolabel 1 \ diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index 14b68598..8ff85709 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -406,10 +406,11 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, interner.setMissingStore(m_missing); // File name transcoded to utf8 for indexing. - string charset = m_config->getDefCharset(true); // If this fails, the file name won't be indexed, no big deal // Note that we used to do the full path here, but I ended up believing // that it made more sense to use only the file name + // The charset is used is the one from the locale. + string charset = m_config->getDefCharset(true); string utf8fn; int ercnt; if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) { LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n", diff --git a/src/qtgui/advsearch_w.cpp b/src/qtgui/advsearch_w.cpp index 12e05842..e308389d 100644 --- a/src/qtgui/advsearch_w.cpp +++ b/src/qtgui/advsearch_w.cpp @@ -428,6 +428,7 @@ void AdvSearch::runSearch() entries.push_back(subtreeCMB->currentText()); entries.sort(); entries.unique(); + LOGDEB(("Subtree list now has %d entries\n", entries.size())); subtreeCMB->clear(); for (list::iterator it = entries.begin(); it != entries.end(); it++) { diff --git a/src/rcldb/termproc.h b/src/rcldb/termproc.h index 226a150d..def66d13 100644 --- a/src/rcldb/termproc.h +++ b/src/rcldb/termproc.h @@ -19,7 +19,6 @@ #ifndef _TERMPROC_H_INCLUDED_ #define _TERMPROC_H_INCLUDED_ - #include "textsplit.h" #include "stoplist.h" @@ -74,7 +73,7 @@ private: * Specialized TextSplit class: this will probably replace the base * TextSplit when we've converted all the code. The takeword() routine in this * calls a TermProc's instead of being overriden in a user derived class. - * The text_to_word() method also takes care of flushing. + * The text_to_words() method also takes care of flushing. */ class TextSplitP : public TextSplit { public: @@ -110,7 +109,9 @@ private: class TermProcPrep : public TermProc { public: TermProcPrep(TermProc *nxt) - : TermProc(nxt), m_totalterms(0), m_unacerrors(0) {} + : TermProc(nxt), m_totalterms(0), m_unacerrors(0) + { + } virtual bool takeword(const string& itrm, int pos, int bs, int be) { @@ -148,7 +149,9 @@ private: class TermProcStop : public TermProc { public: TermProcStop(TermProc *nxt, const Rcl::StopList& stops) - : TermProc(nxt), m_stops(stops) {} + : TermProc(nxt), m_stops(stops) + { + } virtual bool takeword(const string& term, int pos, int bs, int be) { @@ -171,7 +174,9 @@ private: class TermProcCommongrams : public TermProc { public: TermProcCommongrams(TermProc *nxt, const Rcl::StopList& stops) - : TermProc(nxt), m_stops(stops), m_onlygrams(false) { } + : TermProc(nxt), m_stops(stops), m_onlygrams(false) + { + } virtual bool takeword(const string& term, int pos, int bs, int be) {