diff --git a/src/qtgui/plaintorich.cpp b/src/qtgui/plaintorich.cpp index c85b91c0..1473d669 100644 --- a/src/qtgui/plaintorich.cpp +++ b/src/qtgui/plaintorich.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.28 2007-10-17 16:12:38 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.29 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -301,7 +301,7 @@ bool myTextSplitCB::matchGroups() } // Setting searchable beacons in the text to walk the term list. -static const char *termAnchorNameBase = "FIRSTTERM"; +static const char *termAnchorNameBase = "TRM"; string termAnchorName(int i) { char acname[sizeof(termAnchorNameBase) + 20]; @@ -314,8 +314,9 @@ string termAnchorName(int i) // search hit positions does not work well. So we mark the positions with // a special string which we then use with the find() function for positionning // We used to use some weird utf8 char for this, but this was displayed -// inconsistently depending of system, font, etc. We now use a good ole bel -// char which doesnt' seem to cause any trouble. +// inconsistently depending of system, font, etc. We now use a good ole ctl +// char which doesnt' seem to cause any trouble. Wanted to use ^L, but can't +// be searched, so ^G const char *firstTermBeacon = "\007"; #endif @@ -339,12 +340,11 @@ static string termBeacon(int i) // Instead, we mark the search term positions either with html anchor // (qt currently has problems with them), or a special string, and the // caller will use the editor's find() function to position on it -bool plaintorich(const string& in, string& out, +bool plaintorich(const string& in, list& out, const HiliteData& hdata, - bool noHeader, bool needBeacons) + bool noHeader, bool needBeacons, int chunksize) { Chrono chron; - out.erase(); const vector& terms(hdata.terms); const vector >& groups(hdata.groups); const vector& slacks(hdata.gslks); @@ -375,11 +375,15 @@ bool plaintorich(const string& in, string& out, cb.matchGroups(); + out.clear(); + out.push_back(""); + list::iterator sit = out.begin(); + // Rich text output if (noHeader) - out = ""; + *sit = ""; else - out = "

"; + *sit = "

"; // Iterator for the list of input term positions. We use it to // output highlight tags and to compute term positions in the @@ -413,47 +417,61 @@ bool plaintorich(const string& in, string& out, int ibyteidx = chariter.getBpos(); if (ibyteidx == tPosIt->first) { if (needBeacons) - out += termBeacon(anchoridx++); - out += ""; + *sit += termBeacon(anchoridx++); + *sit += ""; } else if (ibyteidx == tPosIt->second) { // Output end tag, then skip all highlight areas that // would overlap this one - out += ""; + *sit += ""; int crend = tPosIt->second; while (tPosIt != cb.tboffs.end() && tPosIt->first < crend) tPosIt++; + // Maybe end chunk + if (sit->size() > (unsigned int)chunksize) { + out.push_back(""); + sit++; + } } } switch(*chariter) { case '\n': if (ateol < 2) { - out += "
\n"; + *sit += "
\n"; ateol++; } break; case '\r': break; + case '\007': // used as anchor char, strip other instances + break; case '<': ateol = 0; - out += "<"; + *sit += "<"; break; case '&': ateol = 0; - out += "&"; + *sit += "&"; break; default: // We don't change the eol status for whitespace, want a real line if (!(*chariter == ' ' || *chariter == '\t')) { ateol = 0; } - chariter.appendchartostring(out); + chariter.appendchartostring(*sit); } } -#if 1 +#if 0 { FILE *fp = fopen("/tmp/debugplaintorich", "a"); - fprintf(fp, "%s\n", out.c_str()); + fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n"); + for (list::iterator it = out.begin(); + it != out.end(); it++) { + fprintf(fp, "BEGINOFPLAINTORICHCHUNK\n"); + fprintf(fp, "%s", it->c_str()); + fprintf(fp, "ENDOFPLAINTORICHCHUNK\n"); + } + fprintf(fp, "ENDOFPLAINTORICHOUTPUT\n"); fclose(fp); } #endif diff --git a/src/qtgui/plaintorich.h b/src/qtgui/plaintorich.h index 141f1de3..62e19261 100644 --- a/src/qtgui/plaintorich.h +++ b/src/qtgui/plaintorich.h @@ -16,9 +16,12 @@ */ #ifndef _PLAINTORICH_H_INCLUDED_ #define _PLAINTORICH_H_INCLUDED_ -/* @(#$Id: plaintorich.h,v 1.14 2007-06-25 10:13:40 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: plaintorich.h,v 1.15 2007-10-18 10:39:41 dockes Exp $ (C) 2004 J.F.Dockes */ #include +#include +using std::list; +using std::string; // A data struct to hold words and groups of words to be highlighted struct HiliteData { @@ -35,23 +38,26 @@ struct HiliteData { * of phrase/near searches. We treat all such searches as "near", not "phrase" * * @param in raw text out of internfile. - * @param out rich text output + * @param out rich text output, divided in chunks (to help our caller + * avoid inserting half tags into textedit which doesnt like it) * @param hdata terms and groups to be highlighted. These are * lowercase and unaccented. * @param noHeader if true don't output header (...) * @param needBeacons Need to navigate highlighted terms, mark them. */ -extern bool plaintorich(const string &in, string &out, +extern bool plaintorich(const string &in, list<string> &out, const HiliteData& hdata, - bool noHeader = false, - bool needBeacons = true); + bool noHeader, + bool needBeacons, + int chunksize = 50000 + ); extern string termAnchorName(int i); #define QT_SCROLL_TO_ANCHOR_BUG #ifdef QT_SCROLL_TO_ANCHOR_BUG -// For some reason, can't get scrollToAnchor() to work. We use a string made -// of a few rare utf8 chars as a beacon for the match area. +// For some reason, can't get scrollToAnchor() to work. We use a special +// string as a beacon for the match area. extern const char *firstTermBeacon; #endif diff --git a/src/qtgui/preview_w.cpp b/src/qtgui/preview_w.cpp index ba660837..4bc6c3f1 100644 --- a/src/qtgui/preview_w.cpp +++ b/src/qtgui/preview_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.27 2007-09-08 17:25:49 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.28 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -35,10 +35,12 @@ using std::pair; #if (QT_VERSION < 0x040000) #include <qtextedit.h> #include <qprogressdialog.h> +#define THRFINISHED finished #else #include <q3textedit.h> #include <q3progressdialog.h> #include <q3stylesheet.h> +#define THRFINISHED isFinished #endif #include <qevent.h> #include <qlabel.h> @@ -581,10 +583,10 @@ class LoadThread : public QThread { class ToRichThread : public QThread { string ∈ const HiliteData &hdata; - QString &out; + list<string> &out; int loglevel; public: - ToRichThread(string &i, const HiliteData& hd, QString &o) + ToRichThread(string &i, const HiliteData& hd, list<string> &o) : in(i), hdata(hd), out(o) { loglevel = DebugLog::getdbl()->getlevel(); @@ -592,12 +594,10 @@ class ToRichThread : public QThread { virtual void run() { DebugLog::getdbl()->setloglevel(loglevel); - string rich; try { - plaintorich(in, rich, hdata, false, true); + plaintorich(in, out, hdata, false, true); } catch (CancelExcept) { } - out = QString::fromUtf8(rich.c_str(), rich.length()); } }; @@ -665,13 +665,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, for (prog = 1;;prog++) { waiter.start(); waiter.wait(); -#if (QT_VERSION < 0x040000) - if (lthr.finished()) + if (lthr.THRFINISHED ()) break; -#else - if (lthr.isFinished()) - break; -#endif progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1); qApp->processEvents(); if (progress.wasCanceled()) { @@ -703,29 +698,27 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, // Reset config just in case. rclconfig->setKeyDir(""); - // Create preview text: highlight search terms (if not too big): - QString richTxt; - + // Create preview text: highlight search terms // We don't do the highlighting for very big texts: too long. We // should at least do special char escaping, in case a '&' or '<' // somehow slipped through previous processing. - bool highlightTerms = fdoc.text.length() < (unsigned long)prefs.maxhltextmbs * 1024 * 1024; - int beaconPos = -1; + bool highlightTerms = fdoc.text.length() < + (unsigned long)prefs.maxhltextmbs * 1024 * 1024; + // Final text is produced in chunks so that we can display the top + // while still inserting at bottom + list<QString> qrichlst; + if (highlightTerms) { progress.setLabelText(tr("Creating preview text")); qApp->processEvents(); - ToRichThread rthr(fdoc.text, m_hData, richTxt); + list<string> richlst; + ToRichThread rthr(fdoc.text, m_hData, richlst); rthr.start(); for (;;prog++) { waiter.start(); waiter.wait(); -#if (QT_VERSION < 0x040000) - if (rthr.finished()) - break; -#else - if (rthr.isFinished()) - break; -#endif + if (rthr.THRFINISHED ()) + break; progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1); qApp->processEvents(); if (progress.wasCanceled()) { @@ -737,32 +730,36 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, // Conversion to rich text done if (CancelCheck::instance().cancelState()) { - if (richTxt.length() == 0) { + if (richlst.size() == 0 || richlst.front().length() == 0) { // We cant call closeCurrentTab here as it might delete // the object which would be a nasty surprise to our // caller. return false; } else { - richTxt += "<b>Cancelled !</b>"; + richlst.back() += "<b>Cancelled !</b>"; } } - beaconPos = richTxt.find(QString::fromUtf8(firstTermBeacon)); + // Convert to QString list + for (list<string>::iterator it = richlst.begin(); + it != richlst.end(); it++) { + qrichlst.push_back(QString::fromUtf8(it->c_str(), it->length())); + } } else { - // Note that in the case were we don't call plaintorich, the - // text will no be identified as richtxt/html (no <html> or - // <qt> etc. at the beginning), and there is no need to escape - // special characters - richTxt = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length()); + // No plaintorich() call. + // In this case, the text will no be identified as + // richtxt/html (no <html> or <qt> etc. at the beginning), and + // there is no need to escape special characters. + // Also we need to split in chunks (so that the top is displayed faster), + // and we must do it on a QString (to avoid utf8 issues). + QString qr = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length()); + int l = 0; + for (int pos = 0; pos < (int)qr.length(); pos += l) { + l = MIN(CHUNKL, qr.length() - pos); + qrichlst.push_back(qr.mid(pos, l)); + } } - - m_haveAnchors = (beaconPos != -1); - LOGDEB(("LoadFileInCurrentTab: rich: cancel %d txtln %d, hasAnchors %d " - "(beaconPos %d)\n", - CancelCheck::instance().cancelState(), richTxt.length(), - m_haveAnchors, beaconPos)); - + // Load into editor - // Do it in several chunks QTextEdit *editor = getCurrentEditor(); editor->setText(""); if (highlightTerms) { @@ -775,24 +772,18 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, prog = 2 * nsteps / 3; progress.setLabelText(tr("Loading preview text into editor")); qApp->processEvents(); - int l = 0; - for (int pos = 0; pos < (int)richTxt.length(); pos += l, prog++) { + int instep = 0; + for (list<QString>::iterator it = qrichlst.begin(); + it != qrichlst.end(); it++, prog++, instep++) { progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1); qApp->processEvents(); - - l = MIN(CHUNKL, richTxt.length() - pos); - // Avoid breaking inside a tag. Our tags are short (ie: <br>) - if (pos + l != (int)richTxt.length()) { - for (int i = -15; i < 0; i++) { - if (richTxt[pos+l+i] == '<') { - l = l+i; - break; - } - } - } - editor->append(richTxt.mid(pos, l)); + if (it->find(QString::fromUtf8(firstTermBeacon)) != -1) + m_haveAnchors = true; + + editor->append(*it); + // Stay at top - if (pos < 5) { + if (instep < 5) { editor->setCursorPosition(0,0); editor->ensureCursorVisible(); } @@ -803,6 +794,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, break; } } + + progress.close(); if (searchTextLine->text().length() != 0) { diff --git a/src/qtgui/reslist.cpp b/src/qtgui/reslist.cpp index 2d3548bd..4cffe571 100644 --- a/src/qtgui/reslist.cpp +++ b/src/qtgui/reslist.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: reslist.cpp,v 1.34 2007-08-07 08:42:47 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: reslist.cpp,v 1.35 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes"; #endif #include <time.h> @@ -478,8 +478,9 @@ void ResList::resultPageNext() abstract = doc.meta["abstract"]; } // No need to call escapeHtml(), plaintorich handles it - string richabst; - plaintorich(abstract, richabst, hdata, true, false); + list<string> lr; + plaintorich(abstract, lr, hdata, true, false, 100000); + string richabst = lr.front(); // Links; string linksbuf;