From e3f89dca7ebe9e8a6c3dc61bc2e22c28da786f8a Mon Sep 17 00:00:00 2001 From: dockes Date: Thu, 21 Sep 2006 05:59:59 +0000 Subject: [PATCH] dont throw away text even if html is weird --- src/internfile/myhtmlparse.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index b5f68f1a..e6a6c9dc 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -167,7 +167,9 @@ MyHtmlParser::opening_tag(const string &tag, const map &p) sprintf(ascuxtime, "%ld", (long)mktime(&tm)); dmtime = ascuxtime; } - } else if (name == "robots") { + } +#if 0 // We're not a robot, so we don't care about robots metainfo + else if (name == "robots") { string val = i->second; decode_entities(val); lowercase_term(val); @@ -178,6 +180,7 @@ MyHtmlParser::opening_tag(const string &tag, const map &p) throw false; } } +#endif // 0 } else if ((j = p.find("http-equiv")) != p.end()) { string hequiv = j->second; lowercase_term(hequiv); @@ -332,13 +335,17 @@ MyHtmlParser::closing_tag(const string &tag) } } -// This gets called when hitting eof. If the is open, do -// something with the text (that is, don't throw up). Else, things are -// too weird, throw an error. We don't get called if the parser finds -// a closing body tag (exception gets thrown by closing_tag()) +// This gets called when hitting eof. +// We used to do: +// > If the is open, do +// > something with the text (that is, don't throw up). Else, things are +// > too weird, throw an error. We don't get called if the parser finds +// > a closing body tag (exception gets thrown by closing_tag()) +// But we don't throw any more. Whatever text we've extracted up to now is +// better than nothing. void MyHtmlParser::do_eof() { - if (!in_body_tag) - throw(false); + // if (!in_body_tag) + // throw(false); }