reenable stripping newlines

2006-01-25 08:39:07 +00:00 · 2006-01-25 08:39:07 +00:00 · 65d00b9c74
commit 65d00b9c74
parent 7ad5f2d90d
2 changed files with 55 additions and 40 deletions
--- a/src/internfile/myhtmlparse.cpp
+++ b/src/internfile/myhtmlparse.cpp
@ -32,30 +32,34 @@
 // I can see no good reason to do this, and it actually helps preview to keep
 // whitespace, especially if the html comes from a filter that generated it 
 // from text (ie: inside '<pre> tags)
 //
 // Otoh doing it takes us closer to what the html rendering would
 // be. We should actually switch on/off according to pre tags
 void
 MyHtmlParser::process_text(const string &text)
 {
    if (!in_script_tag && !in_style_tag) {
-#if 0
+	if (!in_pre_tag) {
-	string::size_type b = 0;
+	    string::size_type b = 0;
-	while ((b = text.find_first_not_of(WHITESPACE, b)) != string::npos) {
+	    while ((b = text.find_first_not_of(WHITESPACE, b)) != string::npos) {
-	    if (pending_space || b != 0)
+		if (pending_space || b != 0)
-		if (!dump.empty()) dump += ' ';
+		    if (!dump.empty()) 
-	    pending_space = true;
+			dump += ' ';
-	    string::size_type e = text.find_first_of(WHITESPACE, b);
+		pending_space = true;
-	    if (e == string::npos) {
+		string::size_type e = text.find_first_of(WHITESPACE, b);
-		dump += text.substr(b);
+		if (e == string::npos) {
-		pending_space = false;
+		    dump += text.substr(b);
-		break;
+		    pending_space = false;
 		    break;
 		}
 		dump += text.substr(b, e - b);
 		b = e + 1;
 	    }
-	    dump += text.substr(b, e - b);
+	} else {
-	    b = e + 1;
+	    if (pending_space)
 		dump += ' ';
 	    dump += text;
 	}
 #else
 	if (pending_space)
 	    dump += ' ';
 	dump += text;
 #endif
    }
 }
@ -192,7 +196,11 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
 	    if (tag == "ol" || tag == "option") pending_space = true;
 	    break;
 	case 'p':
-	    if (tag == "p" || tag == "pre" || tag == "plaintext") {
+	    if (tag == "p" || tag == "plaintext") {
 		dump += '\n';
 		pending_space = true;
 	    } else if (tag == "pre") {
 		in_pre_tag = true;
 		dump += '\n';
 		pending_space = true;
 	    }
@ -269,7 +277,12 @@ MyHtmlParser::closing_tag(const string &tag)
 	    if (tag == "ol" || tag == "option") pending_space = true;
 	    break;
 	case 'p':
-	    if (tag == "p" || tag == "pre") pending_space = true;
+	    if (tag == "p") {
 		pending_space = true;
 	    } else if  (tag == "pre") {
 		pending_space = true;
 		in_pre_tag = false;
 	    }
 	    break;
 	case 'q':
 	    if (tag == "q") pending_space = true;
--- a/src/internfile/myhtmlparse.h
+++ b/src/internfile/myhtmlparse.h
@ -29,24 +29,26 @@
 #define WHITESPACE " \t\n\r"
 class MyHtmlParser : public HtmlParser {
-    public:
+ public:
-	bool in_script_tag;
+    bool in_script_tag;
-	bool in_style_tag;
+    bool in_style_tag;
-	bool in_body_tag; 
+    bool in_body_tag; 
-	bool pending_space;
+    bool in_pre_tag;
-    	string title, sample, keywords, dump, dmtime;
+    bool pending_space;
-        string ocharset; // This is the charset our user thinks the doc was
+    string title, sample, keywords, dump, dmtime;
-        string charset; // This is the charset it was supposedly converted to
+    string ocharset; // This is the charset our user thinks the doc was
-        string doccharset; // Set this to value of charset parameter in header
+    string charset; // This is the charset it was supposedly converted to
-	bool indexing_allowed;
+    string doccharset; // Set this to value of charset parameter in header
-	void process_text(const string &text);
+    bool indexing_allowed;
-	void opening_tag(const string &tag, const map<string,string> &p);
+    void process_text(const string &text);
-	void closing_tag(const string &tag);
+    void opening_tag(const string &tag, const map<string,string> &p);
-	void do_eof();
+    void closing_tag(const string &tag);
-	MyHtmlParser() :
+    void do_eof();
-		in_script_tag(false),
+    MyHtmlParser() :
-		in_style_tag(false),
+	in_script_tag(false),
-		in_body_tag(false),
+	in_style_tag(false),
-		pending_space(false),
+	in_body_tag(false),
-		indexing_allowed(true) { }
+	in_pre_tag(false),
 	pending_space(false),
 	indexing_allowed(true) { }
 };