From d2b54d6af2c9432b1ac08cfae8ccfadd8202540b Mon Sep 17 00:00:00 2001 From: dockes Date: Tue, 6 Dec 2005 08:35:48 +0000 Subject: [PATCH] fix nasty html parse bug introduced in 1.0.9 --- src/VERSION | 2 +- src/internfile/internfile.cpp | 6 ++++-- src/internfile/mh_html.cpp | 6 ++++-- src/internfile/myhtmlparse.cpp | 5 ++--- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/VERSION b/src/VERSION index 59e9e604..bb83058e 100644 --- a/src/VERSION +++ b/src/VERSION @@ -1 +1 @@ -1.0.11 +1.0.12 diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index bd13541b..aa938b38 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: internfile.cpp,v 1.11 2005-11-24 07:16:15 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: internfile.cpp,v 1.12 2005-12-06 08:35:48 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -161,7 +161,9 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath) m_handler->mkDoc(m_cfg, m_fn, m_mime, doc, ipath); FileInterner::Status ret = FIError; switch (mhs) { - case MimeHandler::MHError: break; + case MimeHandler::MHError: + LOGERR(("FileInterner::internfile: error parsing %s\n", m_fn.c_str())); + break; case MimeHandler::MHDone: ret = FIDone;break; case MimeHandler::MHAgain: ret = FIAgain;break; } diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index fa659b8c..c8ce2bb1 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -108,8 +108,10 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &, LOGDEB(("textHtmlToDoc: charset '%s' doc charset '%s'," "reparse\n", charset.c_str(),pres.doccharset.c_str())); charset = pres.doccharset; - } else - break; + } else { + LOGERR(("textHtmlToDoc:: error: non charset exception\n")); + return MimeHandler::MHError; + } } } diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index 307d8253..9d514bd5 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -24,10 +24,9 @@ #include #include "myhtmlparse.h" - #include "indextext.h" // for lowercase_term() - #include "mimeparse.h" +#include "smallut.h" // The original version for this compresses whitespace and suppresses newlines // I can see no good reason to do this, and it actually helps preview to keep @@ -171,7 +170,7 @@ MyHtmlParser::opening_tag(const string &tag, const map &p) if ((k = p.params.find("charset")) != p.params.end()) { doccharset = k->second; - if (doccharset != ocharset) { + if (!samecharset(doccharset, ocharset)) { LOGDEB1(("Doc specified charset '%s' " "differs from announced '%s'\n", doccharset.c_str(),