previous html fix didnt work
This commit is contained in:
parent
d2b54d6af2
commit
c8e18ccc81
@ -101,8 +101,12 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &,
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
p.parse_html(transcoded);
|
p.parse_html(transcoded);
|
||||||
} catch (bool) {
|
} catch (bool diag) {
|
||||||
pres = p;
|
pres = p;
|
||||||
|
if (diag == true)
|
||||||
|
break;
|
||||||
|
LOGDEB(("textHtmlToDoc: charset [%s] doc charset [%s]\n",
|
||||||
|
charset.c_str(),pres.doccharset.c_str()));
|
||||||
if (!pres.doccharset.empty() &&
|
if (!pres.doccharset.empty() &&
|
||||||
!samecharset(pres.doccharset, pres.ocharset)) {
|
!samecharset(pres.doccharset, pres.ocharset)) {
|
||||||
LOGDEB(("textHtmlToDoc: charset '%s' doc charset '%s',"
|
LOGDEB(("textHtmlToDoc: charset '%s' doc charset '%s',"
|
||||||
@ -117,7 +121,7 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &,
|
|||||||
|
|
||||||
docout.origcharset = charset;
|
docout.origcharset = charset;
|
||||||
docout.text = pres.dump;
|
docout.text = pres.dump;
|
||||||
// LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str()));
|
//LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str()));
|
||||||
docout.title = pres.title;
|
docout.title = pres.title;
|
||||||
docout.keywords = pres.keywords;
|
docout.keywords = pres.keywords;
|
||||||
docout.abstract = pres.sample;
|
docout.abstract = pres.sample;
|
||||||
|
|||||||
@ -156,7 +156,8 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
|
|||||||
if (val.find("none") != string::npos ||
|
if (val.find("none") != string::npos ||
|
||||||
val.find("noindex") != string::npos) {
|
val.find("noindex") != string::npos) {
|
||||||
indexing_allowed = false;
|
indexing_allowed = false;
|
||||||
throw true;
|
LOGDEB1(("myhtmlparse: robots/noindex\n"));
|
||||||
|
throw false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if ((j = p.find("http-equiv")) != p.end()) {
|
} else if ((j = p.find("http-equiv")) != p.end()) {
|
||||||
@ -175,7 +176,7 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
|
|||||||
"differs from announced '%s'\n",
|
"differs from announced '%s'\n",
|
||||||
doccharset.c_str(),
|
doccharset.c_str(),
|
||||||
ocharset.c_str()));
|
ocharset.c_str()));
|
||||||
throw true;
|
throw false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -232,6 +233,7 @@ MyHtmlParser::closing_tag(const string &tag)
|
|||||||
break;
|
break;
|
||||||
case 'b':
|
case 'b':
|
||||||
if (tag == "body") {
|
if (tag == "body") {
|
||||||
|
LOGDEB1(("Myhtmlparse: body close tag found\n"));
|
||||||
throw true;
|
throw true;
|
||||||
}
|
}
|
||||||
if (tag == "blockquote" || tag == "br") pending_space = true;
|
if (tag == "blockquote" || tag == "br") pending_space = true;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user