comments doc and formatting

2012-08-24 10:26:16 +02:00 · 2012-08-24 10:26:16 +02:00 · ee9dbda9fc
commit ee9dbda9fc
parent d24a3afdf8
5 changed files with 35 additions and 16 deletions
--- a/src/doc/user/usermanual.sgml
+++ b/src/doc/user/usermanual.sgml
@ -1,4 +1,5 @@
 <!-- Use this header for the FreeBSD sgml toolchain -->
+<!-- NOTE: the sgml version should be saved as ISO-8859-1. -->
 <!DOCTYPE BOOK PUBLIC "-//FreeBSD//DTD DocBook V4.1-Based Extension//EN" [

 <!-- Use this header for going XML -->
@ -3792,11 +3793,13 @@ while query.next >= 0 and query.next < nres:
        List elements with embedded spaces can be quoted using
        double-quotes.</para>

-      <formalpara><title>Encoding issues</title>
+      <formalpara>
+        <title>Encoding issues</title>
        <para>Most of the configuration parameters are plain ASCII. Two
        particular sets of values may cause encoding issues:</para>
-
-        <itemizedlist>
+      </formalpara>
+      <para>
+      <itemizedlist>
        <listitem><para>File path parameters may contain non-ascii
        characters and should use the exact same byte values as found in
        the file system directory. Usually, this means that the
@ -3813,8 +3816,8 @@ while query.next >= 0 and query.next < nres:
        configuration.</para>
        </listitem>
      </itemizedlist>
-      </formalpara>
-        
+      </para>
+
      <sect2 id="rcl.install.config.recollconf">
        <title>Main configuration file</title>

@ -4092,12 +4095,12 @@ skippedPaths = ~/somedir/&lowast;.txt
            the list will turn-off both standard accent and case
            processing. Example for Swedish:</para>
                <programlisting>
-unac_except_trans =  åå Åå ää Ää öö Öö
+unac_except_trans =  åå Åå ää Ää öö Öö
            </programlisting>

            <para>Note that the translation is not limited to a single
            character, you could very well have something like
-            <literal>üue</literal> in the list.</para>
+            <literal>üue</literal> in the list.</para>

             <para>This parameter can't be defined for subdirectories, it
             is global, because there is no way to do otherwise when
--- a/src/doc/user/xmlmake.sh
+++ b/src/doc/user/xmlmake.sh
@ -10,18 +10,27 @@
 #     usermanual.html#RCL.CONFIG.INDEXING won't work because fragments are
 #     case-sensitive. This could be solved by converting all ids inside the
 #     source file to upper-case.
+#   - No simple way to produce pdf

 # Wherever docbook.xsl and chunk.xsl live
-XSLDIR="/opt/local/share/xsl/docbook-xsl/"
+# Fbsd
+XSLDIR="/usr/local/share/xsl/docbook/"
+# Mac
+#XSLDIR="/opt/local/share/xsl/docbook-xsl/"
+#Linux
+#XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"

 dochunky=1
 test $# -eq 1 && dochunky=0

-# Remove the SGML header and uncomment the XML one
+# Remove the SGML header and uncomment the XML one + convert from iso-8859-1
+# to utf-8
 sed -e '\!//FreeBSD//DTD!d' \
    -e '\!DTD DocBook XML!s/<!--//' \
    -e '\!/docbookx.dtd!s/-->//' \
-    < usermanual.sgml > usermanual.xml
+    < usermanual.sgml \
+    | iconv -f iso-8859-1 -t utf-8 \
+    > usermanual.xml

 # Options common to the single-file and chunked versions
 commonoptions="--stringparam section.autolabel 1 \
--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@ -406,10 +406,11 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
    interner.setMissingStore(m_missing);

    // File name transcoded to utf8 for indexing. 
-    string charset = m_config->getDefCharset(true);
    // If this fails, the file name won't be indexed, no big deal
    // Note that we used to do the full path here, but I ended up believing
    // that it made more sense to use only the file name
+    // The charset is used is the one from the locale.
+    string charset = m_config->getDefCharset(true);
    string utf8fn; int ercnt;
    if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
 	LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
--- a/src/qtgui/advsearch_w.cpp
+++ b/src/qtgui/advsearch_w.cpp
@ -428,6 +428,7 @@ void AdvSearch::runSearch()
 	entries.push_back(subtreeCMB->currentText());
 	entries.sort();
 	entries.unique();
+	LOGDEB(("Subtree list now has %d entries\n", entries.size()));
 	subtreeCMB->clear();
 	for (list<QString>::iterator it = entries.begin(); 
 	     it != entries.end(); it++) {
--- a/src/rcldb/termproc.h
+++ b/src/rcldb/termproc.h
@ -19,7 +19,6 @@
 #ifndef _TERMPROC_H_INCLUDED_
 #define _TERMPROC_H_INCLUDED_

-
 #include "textsplit.h"
 #include "stoplist.h"

@ -74,7 +73,7 @@ private:
 * Specialized TextSplit class: this will probably replace the base
 * TextSplit when we've converted all the code. The takeword() routine in this
 * calls a TermProc's instead of being overriden in a user derived class.
- * The text_to_word() method also takes care of flushing.
+ * The text_to_words() method also takes care of flushing.
 */
 class TextSplitP : public TextSplit {
 public:
@ -110,7 +109,9 @@ private:
 class TermProcPrep : public TermProc {
 public:
    TermProcPrep(TermProc *nxt)	
-	: TermProc(nxt), m_totalterms(0), m_unacerrors(0) {}
+	: TermProc(nxt), m_totalterms(0), m_unacerrors(0) 
+    {
+    }

    virtual bool takeword(const string& itrm, int pos, int bs, int be)
    {
@ -148,7 +149,9 @@ private:
 class TermProcStop : public TermProc {
 public:
    TermProcStop(TermProc *nxt, const Rcl::StopList& stops)
-	: TermProc(nxt), m_stops(stops) {}
+	: TermProc(nxt), m_stops(stops) 
+    {
+    }

    virtual bool takeword(const string& term, int pos, int bs, int be)
    {
@ -171,7 +174,9 @@ private:
 class TermProcCommongrams : public TermProc {
 public:
    TermProcCommongrams(TermProc *nxt, const Rcl::StopList& stops)
-	: TermProc(nxt), m_stops(stops), m_onlygrams(false) { }
+	: TermProc(nxt), m_stops(stops), m_onlygrams(false) 
+    {
+    }

    virtual bool takeword(const string& term, int pos, int bs, int be)
    {