diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp
index c8ce2bb1..56324a46 100644
--- a/src/internfile/mh_html.cpp
+++ b/src/internfile/mh_html.cpp
@@ -101,8 +101,12 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &,
try {
p.parse_html(transcoded);
- } catch (bool) {
+ } catch (bool diag) {
pres = p;
+ if (diag == true)
+ break;
+ LOGDEB(("textHtmlToDoc: charset [%s] doc charset [%s]\n",
+ charset.c_str(),pres.doccharset.c_str()));
if (!pres.doccharset.empty() &&
!samecharset(pres.doccharset, pres.ocharset)) {
LOGDEB(("textHtmlToDoc: charset '%s' doc charset '%s',"
@@ -117,7 +121,7 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &,
docout.origcharset = charset;
docout.text = pres.dump;
- // LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str()));
+ //LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str()));
docout.title = pres.title;
docout.keywords = pres.keywords;
docout.abstract = pres.sample;
diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp
index 9d514bd5..aca5cf35 100644
--- a/src/internfile/myhtmlparse.cpp
+++ b/src/internfile/myhtmlparse.cpp
@@ -156,7 +156,8 @@ MyHtmlParser::opening_tag(const string &tag, const map &p)
if (val.find("none") != string::npos ||
val.find("noindex") != string::npos) {
indexing_allowed = false;
- throw true;
+ LOGDEB1(("myhtmlparse: robots/noindex\n"));
+ throw false;
}
}
} else if ((j = p.find("http-equiv")) != p.end()) {
@@ -175,7 +176,7 @@ MyHtmlParser::opening_tag(const string &tag, const map &p)
"differs from announced '%s'\n",
doccharset.c_str(),
ocharset.c_str()));
- throw true;
+ throw false;
}
}
}
@@ -232,6 +233,7 @@ MyHtmlParser::closing_tag(const string &tag)
break;
case 'b':
if (tag == "body") {
+ LOGDEB1(("Myhtmlparse: body close tag found\n"));
throw true;
}
if (tag == "blockquote" || tag == "br") pending_space = true;