let plaintorich do the chunking, easier to make sure we dont confuse textedit by cutting inside a tag

2007-10-18 10:39:41 +00:00 · 2007-10-18 10:39:41 +00:00 · 607da9bb5e
commit 607da9bb5e
parent df1817414f
4 changed files with 102 additions and 84 deletions
--- a/src/qtgui/plaintorich.cpp
+++ b/src/qtgui/plaintorich.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.28 2007-10-17 16:12:38 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.29 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -301,7 +301,7 @@ bool myTextSplitCB::matchGroups()
 }
 // Setting searchable beacons in the text to walk the term list.
-static const char *termAnchorNameBase = "FIRSTTERM";
+static const char *termAnchorNameBase = "TRM";
 string termAnchorName(int i)
 {
    char acname[sizeof(termAnchorNameBase) + 20];
@ -314,8 +314,9 @@ string termAnchorName(int i)
 // search hit positions does not work well. So we mark the positions with
 // a special string which we then use with the find() function for positionning
 // We used to use some weird utf8 char for this, but this was displayed 
-// inconsistently depending of system, font, etc. We now use a good ole bel 
+// inconsistently depending of system, font, etc. We now use a good ole ctl
-// char which doesnt' seem to cause any trouble.
+// char which doesnt' seem to cause any trouble. Wanted to use ^L, but can't
 // be searched, so ^G
 const char *firstTermBeacon = "\007";
 #endif
@ -339,12 +340,11 @@ static string termBeacon(int i)
 // Instead, we mark the search term positions either with html anchor
 // (qt currently has problems with them), or a special string, and the
 // caller will use the editor's find() function to position on it
-bool plaintorich(const string& in, string& out, 
+bool plaintorich(const string& in, list<string>& out, 
 		 const HiliteData& hdata,
-		 bool noHeader, bool needBeacons)
+		 bool noHeader, bool needBeacons, int chunksize)
 {
    Chrono chron;
    out.erase();
    const vector<string>& terms(hdata.terms);
    const vector<vector<string> >& groups(hdata.groups);
    const vector<int>& slacks(hdata.gslks);
@ -375,11 +375,15 @@ bool plaintorich(const string& in, string& out,
    cb.matchGroups();
    out.clear();
    out.push_back("");
    list<string>::iterator sit = out.begin();
    // Rich text output
    if (noHeader)
-	out = "";
+	*sit = "";
    else 
-	out = "<qt><head><title></title></head><body><p>";
+	*sit = "<qt><head><title></title></head><body><p>";
    // Iterator for the list of input term positions. We use it to
    // output highlight tags and to compute term positions in the
@ -413,47 +417,61 @@ bool plaintorich(const string& in, string& out,
 	    int ibyteidx = chariter.getBpos();
 	    if (ibyteidx == tPosIt->first) {
 		if (needBeacons)
-		    out += termBeacon(anchoridx++);
+		    *sit += termBeacon(anchoridx++);
-		out += "<termtag>";
+		*sit += "<termtag>";
 	    } else if (ibyteidx == tPosIt->second) {
 		// Output end tag, then skip all highlight areas that
 		// would overlap this one
-		out += "</termtag>";
+		*sit += "</termtag>";
 		int crend = tPosIt->second;
 		while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
 		    tPosIt++;
 		// Maybe end chunk
 		if (sit->size() > (unsigned int)chunksize) {
 		    out.push_back("");
 		    sit++;
 		}
 	    }
 	}
 	switch(*chariter) {
 	case '\n':
 	    if (ateol < 2) {
-		out += "<br>\n";
+		*sit += "<br>\n";
 		ateol++;
 	    }
 	    break;
 	case '\r': 
 	    break;
 	case '\007': // used as anchor char, strip other instances
 	    break;
 	case '<':
 	    ateol = 0;
-	    out += "&lt;";
+	    *sit += "&lt;";
 	    break;
 	case '&':
 	    ateol = 0;
-	    out += "&amp;";
+	    *sit += "&amp;";
 	    break;
 	default:
 	    // We don't change the eol status for whitespace, want a real line
 	    if (!(*chariter == ' ' || *chariter == '\t')) {
 		ateol = 0;
 	    }
-	    chariter.appendchartostring(out);
+	    chariter.appendchartostring(*sit);
 	}
    }
-#if 1
+#if 0
    {
 	FILE *fp = fopen("/tmp/debugplaintorich", "a");
-	fprintf(fp, "%s\n", out.c_str());
+	fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
 	for (list<string>::iterator it = out.begin();
 	     it != out.end(); it++) {
 	    fprintf(fp, "BEGINOFPLAINTORICHCHUNK\n");
 	    fprintf(fp, "%s", it->c_str());
 	    fprintf(fp, "ENDOFPLAINTORICHCHUNK\n");
 	}
 	fprintf(fp, "ENDOFPLAINTORICHOUTPUT\n");
 	fclose(fp);
    }
 #endif
--- a/src/qtgui/plaintorich.h
+++ b/src/qtgui/plaintorich.h
@ -16,9 +16,12 @@
 */
 #ifndef _PLAINTORICH_H_INCLUDED_
 #define _PLAINTORICH_H_INCLUDED_
-/* @(#$Id: plaintorich.h,v 1.14 2007-06-25 10:13:40 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: plaintorich.h,v 1.15 2007-10-18 10:39:41 dockes Exp $  (C) 2004 J.F.Dockes */
 #include <string>
 #include <list>
 using std::list;
 using std::string;
 // A data struct to hold words and groups of words to be highlighted
 struct HiliteData {
@ -35,23 +38,26 @@ struct HiliteData {
 * of phrase/near searches. We treat all such searches as "near", not "phrase"
 * 
 * @param in          raw text out of internfile.
- * @param out         rich text output
+ * @param out         rich text output, divided in chunks (to help our caller
 *          avoid inserting half tags into textedit which doesnt like it)
 * @param hdata       terms and groups to be highlighted. These are
 *                     lowercase and unaccented.
 * @param noHeader    if true don't output header (<qt><title>...)
 * @param needBeacons Need to navigate highlighted terms, mark them.
 */
-extern bool plaintorich(const string &in, string &out,
+extern bool plaintorich(const string &in, list<string> &out,
 			const HiliteData& hdata,
-			bool noHeader = false,
+			bool noHeader,
-			bool needBeacons = true);
+			bool needBeacons,
 			int chunksize = 50000
 			);
 extern string termAnchorName(int i);
 #define QT_SCROLL_TO_ANCHOR_BUG
 #ifdef QT_SCROLL_TO_ANCHOR_BUG
-// For some reason, can't get scrollToAnchor() to work. We use a string made
+// For some reason, can't get scrollToAnchor() to work. We use a special 
-// of a few rare utf8 chars as a beacon for the match area.
+// string as a beacon for the match area.
 extern const char *firstTermBeacon;
 #endif
--- a/src/qtgui/preview_w.cpp
+++ b/src/qtgui/preview_w.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.27 2007-09-08 17:25:49 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.28 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif
 /*
 *   This program is free software; you can redistribute it and/or modify
@ -35,10 +35,12 @@ using std::pair;
 #if (QT_VERSION < 0x040000)
 #include <qtextedit.h>
 #include <qprogressdialog.h>
 #define THRFINISHED finished
 #else
 #include <q3textedit.h>
 #include <q3progressdialog.h>
 #include <q3stylesheet.h>
 #define THRFINISHED isFinished
 #endif
 #include <qevent.h>
 #include <qlabel.h>
@ -581,10 +583,10 @@ class LoadThread : public QThread {
 class ToRichThread : public QThread {
    string &in;
    const HiliteData &hdata;
-    QString &out;
+    list<string> &out;
    int loglevel;
 public:
-    ToRichThread(string &i, const HiliteData& hd, QString &o) 
+    ToRichThread(string &i, const HiliteData& hd, list<string> &o) 
 	: in(i), hdata(hd), out(o)
    {
 	    loglevel = DebugLog::getdbl()->getlevel();
@ -592,12 +594,10 @@ class ToRichThread : public QThread {
    virtual void run()
    {
 	DebugLog::getdbl()->setloglevel(loglevel);
 	string rich;
 	try {
-	    plaintorich(in, rich, hdata, false, true);
+	    plaintorich(in, out, hdata, false, true);
 	} catch (CancelExcept) {
 	}
 	out = QString::fromUtf8(rich.c_str(), rich.length());
    }
 };
@ -665,13 +665,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
    for (prog = 1;;prog++) {
 	waiter.start();
 	waiter.wait();
-#if (QT_VERSION < 0x040000)
+	if (lthr.THRFINISHED ())
 	if (lthr.finished())
 	    break;
 #else
 	if (lthr.isFinished())
 	    break;
 #endif
 	progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1);
 	qApp->processEvents();
 	if (progress.wasCanceled()) {
@ -703,29 +698,27 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
    // Reset config just in case.
    rclconfig->setKeyDir("");
-    // Create preview text: highlight search terms (if not too big):
+    // Create preview text: highlight search terms
    QString richTxt;
    // We don't do the highlighting for very big texts: too long. We
    // should at least do special char escaping, in case a '&' or '<'
    // somehow slipped through previous processing.
-    bool highlightTerms = fdoc.text.length() < (unsigned long)prefs.maxhltextmbs * 1024 * 1024;
+    bool highlightTerms = fdoc.text.length() < 
-    int beaconPos = -1;
+	(unsigned long)prefs.maxhltextmbs * 1024 * 1024;
    // Final text is produced in chunks so that we can display the top
    // while still inserting at bottom
    list<QString> qrichlst;
    if (highlightTerms) {
 	progress.setLabelText(tr("Creating preview text"));
 	qApp->processEvents();
-	ToRichThread rthr(fdoc.text, m_hData, richTxt);
+	list<string> richlst;
 	ToRichThread rthr(fdoc.text, m_hData, richlst);
 	rthr.start();
 	for (;;prog++) {
 	    waiter.start();	waiter.wait();
-#if (QT_VERSION < 0x040000)
+	    if (rthr.THRFINISHED ())
-	if (rthr.finished())
+		break;
 	    break;
 #else
 	if (rthr.isFinished())
 	    break;
 #endif
 	    progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1);
 	    qApp->processEvents();
 	    if (progress.wasCanceled()) {
@ -737,32 +730,36 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
 	// Conversion to rich text done
 	if (CancelCheck::instance().cancelState()) {
-	    if (richTxt.length() == 0) {
+	    if (richlst.size() == 0 || richlst.front().length() == 0) {
 		// We cant call closeCurrentTab here as it might delete
 		// the object which would be a nasty surprise to our
 		// caller.
 		return false;
 	    } else {
-		richTxt += "<b>Cancelled !</b>";
+		richlst.back() += "<b>Cancelled !</b>";
 	    }
 	}
-	beaconPos = richTxt.find(QString::fromUtf8(firstTermBeacon));
+	// Convert to QString list
 	for (list<string>::iterator it = richlst.begin(); 
 	     it != richlst.end(); it++) {
 	    qrichlst.push_back(QString::fromUtf8(it->c_str(), it->length()));
 	}
    } else {
-	// Note that in the case were we don't call plaintorich, the
+	// No plaintorich() call.
-	// text will no be identified as richtxt/html (no <html> or
+	// In this case, the text will no be identified as
-	// <qt> etc. at the beginning), and there is no need to escape
+	// richtxt/html (no <html> or <qt> etc. at the beginning), and
-	// special characters
+	// there is no need to escape special characters.
-	richTxt = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length());
+	// Also we need to split in chunks (so that the top is displayed faster),
 	// and we must do it on a QString (to avoid utf8 issues).
 	QString qr = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length());
 	int l = 0;
 	for (int pos = 0; pos < (int)qr.length(); pos += l) {
 	    l = MIN(CHUNKL, qr.length() - pos);
 	    qrichlst.push_back(qr.mid(pos, l));
 	}
    }
-
+	    
    m_haveAnchors = (beaconPos != -1);
    LOGDEB(("LoadFileInCurrentTab: rich: cancel %d txtln %d, hasAnchors %d "
 	    "(beaconPos %d)\n", 
 	    CancelCheck::instance().cancelState(), richTxt.length(), 
 	    m_haveAnchors, beaconPos));
    // Load into editor
    // Do it in several chunks 
    QTextEdit *editor = getCurrentEditor();
    editor->setText("");
    if (highlightTerms) {
@ -775,24 +772,18 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
    prog = 2 * nsteps / 3;
    progress.setLabelText(tr("Loading preview text into editor"));
    qApp->processEvents();
-    int l = 0;
+    int instep = 0;
-    for (int pos = 0; pos < (int)richTxt.length(); pos += l, prog++) {
+    for (list<QString>::iterator it = qrichlst.begin(); 
 	 it != qrichlst.end(); it++, prog++, instep++) {
 	progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1);
 	qApp->processEvents();
-	
+	if (it->find(QString::fromUtf8(firstTermBeacon)) != -1) 
-	l = MIN(CHUNKL, richTxt.length() - pos);
+	    m_haveAnchors = true;
-	// Avoid breaking inside a tag. Our tags are short (ie: <br>)
+
-	if (pos + l != (int)richTxt.length()) {
+	editor->append(*it);
-	    for (int i = -15; i < 0; i++) {
+
 		if (richTxt[pos+l+i] == '<') {
 		    l = l+i;
 		    break;
 		}
 	    }
 	}
 	editor->append(richTxt.mid(pos, l));
 	// Stay at top
-	if (pos < 5) {
+	if (instep < 5) {
 	    editor->setCursorPosition(0,0);
 	    editor->ensureCursorVisible();
 	}
@ -803,6 +794,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
 	    break;
 	}
    }
    progress.close();
    if (searchTextLine->text().length() != 0) {
--- a/src/qtgui/reslist.cpp
+++ b/src/qtgui/reslist.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: reslist.cpp,v 1.34 2007-08-07 08:42:47 dockes Exp $ (C) 2005 J.F.Dockes";
+static char rcsid[] = "@(#$Id: reslist.cpp,v 1.35 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes";
 #endif
 #include <time.h>
@ -478,8 +478,9 @@ void ResList::resultPageNext()
 	    abstract = doc.meta["abstract"];
 	}
 	// No need to call escapeHtml(), plaintorich handles it
-	string richabst;
+	list<string> lr;
-	plaintorich(abstract, richabst, hdata, true, false);
+	plaintorich(abstract, lr, hdata, true, false, 100000);
 	string richabst = lr.front();
 	// Links;
 	string linksbuf;