dont throw away text even if html is weird
This commit is contained in:
parent
507ee32fdb
commit
e3f89dca7e
@ -167,7 +167,9 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
|
|||||||
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
||||||
dmtime = ascuxtime;
|
dmtime = ascuxtime;
|
||||||
}
|
}
|
||||||
} else if (name == "robots") {
|
}
|
||||||
|
#if 0 // We're not a robot, so we don't care about robots metainfo
|
||||||
|
else if (name == "robots") {
|
||||||
string val = i->second;
|
string val = i->second;
|
||||||
decode_entities(val);
|
decode_entities(val);
|
||||||
lowercase_term(val);
|
lowercase_term(val);
|
||||||
@ -178,6 +180,7 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
|
|||||||
throw false;
|
throw false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif // 0
|
||||||
} else if ((j = p.find("http-equiv")) != p.end()) {
|
} else if ((j = p.find("http-equiv")) != p.end()) {
|
||||||
string hequiv = j->second;
|
string hequiv = j->second;
|
||||||
lowercase_term(hequiv);
|
lowercase_term(hequiv);
|
||||||
@ -332,13 +335,17 @@ MyHtmlParser::closing_tag(const string &tag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This gets called when hitting eof. If the <body> is open, do
|
// This gets called when hitting eof.
|
||||||
// something with the text (that is, don't throw up). Else, things are
|
// We used to do:
|
||||||
// too weird, throw an error. We don't get called if the parser finds
|
// > If the <body> is open, do
|
||||||
// a closing body tag (exception gets thrown by closing_tag())
|
// > something with the text (that is, don't throw up). Else, things are
|
||||||
|
// > too weird, throw an error. We don't get called if the parser finds
|
||||||
|
// > a closing body tag (exception gets thrown by closing_tag())
|
||||||
|
// But we don't throw any more. Whatever text we've extracted up to now is
|
||||||
|
// better than nothing.
|
||||||
void
|
void
|
||||||
MyHtmlParser::do_eof()
|
MyHtmlParser::do_eof()
|
||||||
{
|
{
|
||||||
if (!in_body_tag)
|
// if (!in_body_tag)
|
||||||
throw(false);
|
// throw(false);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user