comments doc and formatting

This commit is contained in:
Jean-Francois Dockes 2012-08-24 10:26:16 +02:00
parent d24a3afdf8
commit ee9dbda9fc
5 changed files with 35 additions and 16 deletions

View File

@ -1,4 +1,5 @@
<!-- Use this header for the FreeBSD sgml toolchain -->
<!-- NOTE: the sgml version should be saved as ISO-8859-1. -->
<!DOCTYPE BOOK PUBLIC "-//FreeBSD//DTD DocBook V4.1-Based Extension//EN" [
<!-- Use this header for going XML -->
@ -3792,11 +3793,13 @@ while query.next >= 0 and query.next < nres:
List elements with embedded spaces can be quoted using
double-quotes.</para>
<formalpara><title>Encoding issues</title>
<formalpara>
<title>Encoding issues</title>
<para>Most of the configuration parameters are plain ASCII. Two
particular sets of values may cause encoding issues:</para>
<itemizedlist>
</formalpara>
<para>
<itemizedlist>
<listitem><para>File path parameters may contain non-ascii
characters and should use the exact same byte values as found in
the file system directory. Usually, this means that the
@ -3813,8 +3816,8 @@ while query.next >= 0 and query.next < nres:
configuration.</para>
</listitem>
</itemizedlist>
</formalpara>
</para>
<sect2 id="rcl.install.config.recollconf">
<title>Main configuration file</title>
@ -4092,12 +4095,12 @@ skippedPaths = ~/somedir/&lowast;.txt
the list will turn-off both standard accent and case
processing. Example for Swedish:</para>
<programlisting>
unac_except_trans = åå Åå ää Ää öö Öö
unac_except_trans = åå Åå ää Ää öö Öö
</programlisting>
<para>Note that the translation is not limited to a single
character, you could very well have something like
<literal>üue</literal> in the list.</para>
<literal>üue</literal> in the list.</para>
<para>This parameter can't be defined for subdirectories, it
is global, because there is no way to do otherwise when

View File

@ -10,18 +10,27 @@
# usermanual.html#RCL.CONFIG.INDEXING won't work because fragments are
# case-sensitive. This could be solved by converting all ids inside the
# source file to upper-case.
# - No simple way to produce pdf
# Wherever docbook.xsl and chunk.xsl live
XSLDIR="/opt/local/share/xsl/docbook-xsl/"
# Fbsd
XSLDIR="/usr/local/share/xsl/docbook/"
# Mac
#XSLDIR="/opt/local/share/xsl/docbook-xsl/"
#Linux
#XSLDIR="/usr/share/xml/docbook/stylesheet/docbook-xsl/"
dochunky=1
test $# -eq 1 && dochunky=0
# Remove the SGML header and uncomment the XML one
# Remove the SGML header and uncomment the XML one + convert from iso-8859-1
# to utf-8
sed -e '\!//FreeBSD//DTD!d' \
-e '\!DTD DocBook XML!s/<!--//' \
-e '\!/docbookx.dtd!s/-->//' \
< usermanual.sgml > usermanual.xml
< usermanual.sgml \
| iconv -f iso-8859-1 -t utf-8 \
> usermanual.xml
# Options common to the single-file and chunked versions
commonoptions="--stringparam section.autolabel 1 \

View File

@ -406,10 +406,11 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
interner.setMissingStore(m_missing);
// File name transcoded to utf8 for indexing.
string charset = m_config->getDefCharset(true);
// If this fails, the file name won't be indexed, no big deal
// Note that we used to do the full path here, but I ended up believing
// that it made more sense to use only the file name
// The charset is used is the one from the locale.
string charset = m_config->getDefCharset(true);
string utf8fn; int ercnt;
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",

View File

@ -428,6 +428,7 @@ void AdvSearch::runSearch()
entries.push_back(subtreeCMB->currentText());
entries.sort();
entries.unique();
LOGDEB(("Subtree list now has %d entries\n", entries.size()));
subtreeCMB->clear();
for (list<QString>::iterator it = entries.begin();
it != entries.end(); it++) {

View File

@ -19,7 +19,6 @@
#ifndef _TERMPROC_H_INCLUDED_
#define _TERMPROC_H_INCLUDED_
#include "textsplit.h"
#include "stoplist.h"
@ -74,7 +73,7 @@ private:
* Specialized TextSplit class: this will probably replace the base
* TextSplit when we've converted all the code. The takeword() routine in this
* calls a TermProc's instead of being overriden in a user derived class.
* The text_to_word() method also takes care of flushing.
* The text_to_words() method also takes care of flushing.
*/
class TextSplitP : public TextSplit {
public:
@ -110,7 +109,9 @@ private:
class TermProcPrep : public TermProc {
public:
TermProcPrep(TermProc *nxt)
: TermProc(nxt), m_totalterms(0), m_unacerrors(0) {}
: TermProc(nxt), m_totalterms(0), m_unacerrors(0)
{
}
virtual bool takeword(const string& itrm, int pos, int bs, int be)
{
@ -148,7 +149,9 @@ private:
class TermProcStop : public TermProc {
public:
TermProcStop(TermProc *nxt, const Rcl::StopList& stops)
: TermProc(nxt), m_stops(stops) {}
: TermProc(nxt), m_stops(stops)
{
}
virtual bool takeword(const string& term, int pos, int bs, int be)
{
@ -171,7 +174,9 @@ private:
class TermProcCommongrams : public TermProc {
public:
TermProcCommongrams(TermProc *nxt, const Rcl::StopList& stops)
: TermProc(nxt), m_stops(stops), m_onlygrams(false) { }
: TermProc(nxt), m_stops(stops), m_onlygrams(false)
{
}
virtual bool takeword(const string& term, int pos, int bs, int be)
{