diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index c8ce2bb1..56324a46 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -101,8 +101,12 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &, try { p.parse_html(transcoded); - } catch (bool) { + } catch (bool diag) { pres = p; + if (diag == true) + break; + LOGDEB(("textHtmlToDoc: charset [%s] doc charset [%s]\n", + charset.c_str(),pres.doccharset.c_str())); if (!pres.doccharset.empty() && !samecharset(pres.doccharset, pres.ocharset)) { LOGDEB(("textHtmlToDoc: charset '%s' doc charset '%s'," @@ -117,7 +121,7 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &, docout.origcharset = charset; docout.text = pres.dump; - // LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str())); + //LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str())); docout.title = pres.title; docout.keywords = pres.keywords; docout.abstract = pres.sample; diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index 9d514bd5..aca5cf35 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -156,7 +156,8 @@ MyHtmlParser::opening_tag(const string &tag, const map &p) if (val.find("none") != string::npos || val.find("noindex") != string::npos) { indexing_allowed = false; - throw true; + LOGDEB1(("myhtmlparse: robots/noindex\n")); + throw false; } } } else if ((j = p.find("http-equiv")) != p.end()) { @@ -175,7 +176,7 @@ MyHtmlParser::opening_tag(const string &tag, const map &p) "differs from announced '%s'\n", doccharset.c_str(), ocharset.c_str())); - throw true; + throw false; } } } @@ -232,6 +233,7 @@ MyHtmlParser::closing_tag(const string &tag) break; case 'b': if (tag == "body") { + LOGDEB1(("Myhtmlparse: body close tag found\n")); throw true; } if (tag == "blockquote" || tag == "br") pending_space = true;