let plaintorich do the chunking, easier to make sure we dont confuse textedit by cutting inside a tag
This commit is contained in:
parent
df1817414f
commit
607da9bb5e
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.28 2007-10-17 16:12:38 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.29 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -301,7 +301,7 @@ bool myTextSplitCB::matchGroups()
|
||||
}
|
||||
|
||||
// Setting searchable beacons in the text to walk the term list.
|
||||
static const char *termAnchorNameBase = "FIRSTTERM";
|
||||
static const char *termAnchorNameBase = "TRM";
|
||||
string termAnchorName(int i)
|
||||
{
|
||||
char acname[sizeof(termAnchorNameBase) + 20];
|
||||
@ -314,8 +314,9 @@ string termAnchorName(int i)
|
||||
// search hit positions does not work well. So we mark the positions with
|
||||
// a special string which we then use with the find() function for positionning
|
||||
// We used to use some weird utf8 char for this, but this was displayed
|
||||
// inconsistently depending of system, font, etc. We now use a good ole bel
|
||||
// char which doesnt' seem to cause any trouble.
|
||||
// inconsistently depending of system, font, etc. We now use a good ole ctl
|
||||
// char which doesnt' seem to cause any trouble. Wanted to use ^L, but can't
|
||||
// be searched, so ^G
|
||||
const char *firstTermBeacon = "\007";
|
||||
#endif
|
||||
|
||||
@ -339,12 +340,11 @@ static string termBeacon(int i)
|
||||
// Instead, we mark the search term positions either with html anchor
|
||||
// (qt currently has problems with them), or a special string, and the
|
||||
// caller will use the editor's find() function to position on it
|
||||
bool plaintorich(const string& in, string& out,
|
||||
bool plaintorich(const string& in, list<string>& out,
|
||||
const HiliteData& hdata,
|
||||
bool noHeader, bool needBeacons)
|
||||
bool noHeader, bool needBeacons, int chunksize)
|
||||
{
|
||||
Chrono chron;
|
||||
out.erase();
|
||||
const vector<string>& terms(hdata.terms);
|
||||
const vector<vector<string> >& groups(hdata.groups);
|
||||
const vector<int>& slacks(hdata.gslks);
|
||||
@ -375,11 +375,15 @@ bool plaintorich(const string& in, string& out,
|
||||
|
||||
cb.matchGroups();
|
||||
|
||||
out.clear();
|
||||
out.push_back("");
|
||||
list<string>::iterator sit = out.begin();
|
||||
|
||||
// Rich text output
|
||||
if (noHeader)
|
||||
out = "";
|
||||
*sit = "";
|
||||
else
|
||||
out = "<qt><head><title></title></head><body><p>";
|
||||
*sit = "<qt><head><title></title></head><body><p>";
|
||||
|
||||
// Iterator for the list of input term positions. We use it to
|
||||
// output highlight tags and to compute term positions in the
|
||||
@ -413,47 +417,61 @@ bool plaintorich(const string& in, string& out,
|
||||
int ibyteidx = chariter.getBpos();
|
||||
if (ibyteidx == tPosIt->first) {
|
||||
if (needBeacons)
|
||||
out += termBeacon(anchoridx++);
|
||||
out += "<termtag>";
|
||||
*sit += termBeacon(anchoridx++);
|
||||
*sit += "<termtag>";
|
||||
} else if (ibyteidx == tPosIt->second) {
|
||||
// Output end tag, then skip all highlight areas that
|
||||
// would overlap this one
|
||||
out += "</termtag>";
|
||||
*sit += "</termtag>";
|
||||
int crend = tPosIt->second;
|
||||
while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
|
||||
tPosIt++;
|
||||
// Maybe end chunk
|
||||
if (sit->size() > (unsigned int)chunksize) {
|
||||
out.push_back("");
|
||||
sit++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch(*chariter) {
|
||||
case '\n':
|
||||
if (ateol < 2) {
|
||||
out += "<br>\n";
|
||||
*sit += "<br>\n";
|
||||
ateol++;
|
||||
}
|
||||
break;
|
||||
case '\r':
|
||||
break;
|
||||
case '\007': // used as anchor char, strip other instances
|
||||
break;
|
||||
case '<':
|
||||
ateol = 0;
|
||||
out += "<";
|
||||
*sit += "<";
|
||||
break;
|
||||
case '&':
|
||||
ateol = 0;
|
||||
out += "&";
|
||||
*sit += "&";
|
||||
break;
|
||||
default:
|
||||
// We don't change the eol status for whitespace, want a real line
|
||||
if (!(*chariter == ' ' || *chariter == '\t')) {
|
||||
ateol = 0;
|
||||
}
|
||||
chariter.appendchartostring(out);
|
||||
chariter.appendchartostring(*sit);
|
||||
}
|
||||
}
|
||||
#if 1
|
||||
#if 0
|
||||
{
|
||||
FILE *fp = fopen("/tmp/debugplaintorich", "a");
|
||||
fprintf(fp, "%s\n", out.c_str());
|
||||
fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
|
||||
for (list<string>::iterator it = out.begin();
|
||||
it != out.end(); it++) {
|
||||
fprintf(fp, "BEGINOFPLAINTORICHCHUNK\n");
|
||||
fprintf(fp, "%s", it->c_str());
|
||||
fprintf(fp, "ENDOFPLAINTORICHCHUNK\n");
|
||||
}
|
||||
fprintf(fp, "ENDOFPLAINTORICHOUTPUT\n");
|
||||
fclose(fp);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -16,9 +16,12 @@
|
||||
*/
|
||||
#ifndef _PLAINTORICH_H_INCLUDED_
|
||||
#define _PLAINTORICH_H_INCLUDED_
|
||||
/* @(#$Id: plaintorich.h,v 1.14 2007-06-25 10:13:40 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: plaintorich.h,v 1.15 2007-10-18 10:39:41 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
using std::list;
|
||||
using std::string;
|
||||
|
||||
// A data struct to hold words and groups of words to be highlighted
|
||||
struct HiliteData {
|
||||
@ -35,23 +38,26 @@ struct HiliteData {
|
||||
* of phrase/near searches. We treat all such searches as "near", not "phrase"
|
||||
*
|
||||
* @param in raw text out of internfile.
|
||||
* @param out rich text output
|
||||
* @param out rich text output, divided in chunks (to help our caller
|
||||
* avoid inserting half tags into textedit which doesnt like it)
|
||||
* @param hdata terms and groups to be highlighted. These are
|
||||
* lowercase and unaccented.
|
||||
* @param noHeader if true don't output header (<qt><title>...)
|
||||
* @param needBeacons Need to navigate highlighted terms, mark them.
|
||||
*/
|
||||
extern bool plaintorich(const string &in, string &out,
|
||||
extern bool plaintorich(const string &in, list<string> &out,
|
||||
const HiliteData& hdata,
|
||||
bool noHeader = false,
|
||||
bool needBeacons = true);
|
||||
bool noHeader,
|
||||
bool needBeacons,
|
||||
int chunksize = 50000
|
||||
);
|
||||
|
||||
extern string termAnchorName(int i);
|
||||
|
||||
#define QT_SCROLL_TO_ANCHOR_BUG
|
||||
#ifdef QT_SCROLL_TO_ANCHOR_BUG
|
||||
// For some reason, can't get scrollToAnchor() to work. We use a string made
|
||||
// of a few rare utf8 chars as a beacon for the match area.
|
||||
// For some reason, can't get scrollToAnchor() to work. We use a special
|
||||
// string as a beacon for the match area.
|
||||
extern const char *firstTermBeacon;
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.27 2007-09-08 17:25:49 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.28 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -35,10 +35,12 @@ using std::pair;
|
||||
#if (QT_VERSION < 0x040000)
|
||||
#include <qtextedit.h>
|
||||
#include <qprogressdialog.h>
|
||||
#define THRFINISHED finished
|
||||
#else
|
||||
#include <q3textedit.h>
|
||||
#include <q3progressdialog.h>
|
||||
#include <q3stylesheet.h>
|
||||
#define THRFINISHED isFinished
|
||||
#endif
|
||||
#include <qevent.h>
|
||||
#include <qlabel.h>
|
||||
@ -581,10 +583,10 @@ class LoadThread : public QThread {
|
||||
class ToRichThread : public QThread {
|
||||
string ∈
|
||||
const HiliteData &hdata;
|
||||
QString &out;
|
||||
list<string> &out;
|
||||
int loglevel;
|
||||
public:
|
||||
ToRichThread(string &i, const HiliteData& hd, QString &o)
|
||||
ToRichThread(string &i, const HiliteData& hd, list<string> &o)
|
||||
: in(i), hdata(hd), out(o)
|
||||
{
|
||||
loglevel = DebugLog::getdbl()->getlevel();
|
||||
@ -592,12 +594,10 @@ class ToRichThread : public QThread {
|
||||
virtual void run()
|
||||
{
|
||||
DebugLog::getdbl()->setloglevel(loglevel);
|
||||
string rich;
|
||||
try {
|
||||
plaintorich(in, rich, hdata, false, true);
|
||||
plaintorich(in, out, hdata, false, true);
|
||||
} catch (CancelExcept) {
|
||||
}
|
||||
out = QString::fromUtf8(rich.c_str(), rich.length());
|
||||
}
|
||||
};
|
||||
|
||||
@ -665,13 +665,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
for (prog = 1;;prog++) {
|
||||
waiter.start();
|
||||
waiter.wait();
|
||||
#if (QT_VERSION < 0x040000)
|
||||
if (lthr.finished())
|
||||
if (lthr.THRFINISHED ())
|
||||
break;
|
||||
#else
|
||||
if (lthr.isFinished())
|
||||
break;
|
||||
#endif
|
||||
progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1);
|
||||
qApp->processEvents();
|
||||
if (progress.wasCanceled()) {
|
||||
@ -703,29 +698,27 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
// Reset config just in case.
|
||||
rclconfig->setKeyDir("");
|
||||
|
||||
// Create preview text: highlight search terms (if not too big):
|
||||
QString richTxt;
|
||||
|
||||
// Create preview text: highlight search terms
|
||||
// We don't do the highlighting for very big texts: too long. We
|
||||
// should at least do special char escaping, in case a '&' or '<'
|
||||
// somehow slipped through previous processing.
|
||||
bool highlightTerms = fdoc.text.length() < (unsigned long)prefs.maxhltextmbs * 1024 * 1024;
|
||||
int beaconPos = -1;
|
||||
bool highlightTerms = fdoc.text.length() <
|
||||
(unsigned long)prefs.maxhltextmbs * 1024 * 1024;
|
||||
// Final text is produced in chunks so that we can display the top
|
||||
// while still inserting at bottom
|
||||
list<QString> qrichlst;
|
||||
|
||||
if (highlightTerms) {
|
||||
progress.setLabelText(tr("Creating preview text"));
|
||||
qApp->processEvents();
|
||||
ToRichThread rthr(fdoc.text, m_hData, richTxt);
|
||||
list<string> richlst;
|
||||
ToRichThread rthr(fdoc.text, m_hData, richlst);
|
||||
rthr.start();
|
||||
|
||||
for (;;prog++) {
|
||||
waiter.start(); waiter.wait();
|
||||
#if (QT_VERSION < 0x040000)
|
||||
if (rthr.finished())
|
||||
break;
|
||||
#else
|
||||
if (rthr.isFinished())
|
||||
break;
|
||||
#endif
|
||||
if (rthr.THRFINISHED ())
|
||||
break;
|
||||
progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1);
|
||||
qApp->processEvents();
|
||||
if (progress.wasCanceled()) {
|
||||
@ -737,32 +730,36 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
|
||||
// Conversion to rich text done
|
||||
if (CancelCheck::instance().cancelState()) {
|
||||
if (richTxt.length() == 0) {
|
||||
if (richlst.size() == 0 || richlst.front().length() == 0) {
|
||||
// We cant call closeCurrentTab here as it might delete
|
||||
// the object which would be a nasty surprise to our
|
||||
// caller.
|
||||
return false;
|
||||
} else {
|
||||
richTxt += "<b>Cancelled !</b>";
|
||||
richlst.back() += "<b>Cancelled !</b>";
|
||||
}
|
||||
}
|
||||
beaconPos = richTxt.find(QString::fromUtf8(firstTermBeacon));
|
||||
// Convert to QString list
|
||||
for (list<string>::iterator it = richlst.begin();
|
||||
it != richlst.end(); it++) {
|
||||
qrichlst.push_back(QString::fromUtf8(it->c_str(), it->length()));
|
||||
}
|
||||
} else {
|
||||
// Note that in the case were we don't call plaintorich, the
|
||||
// text will no be identified as richtxt/html (no <html> or
|
||||
// <qt> etc. at the beginning), and there is no need to escape
|
||||
// special characters
|
||||
richTxt = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length());
|
||||
// No plaintorich() call.
|
||||
// In this case, the text will no be identified as
|
||||
// richtxt/html (no <html> or <qt> etc. at the beginning), and
|
||||
// there is no need to escape special characters.
|
||||
// Also we need to split in chunks (so that the top is displayed faster),
|
||||
// and we must do it on a QString (to avoid utf8 issues).
|
||||
QString qr = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length());
|
||||
int l = 0;
|
||||
for (int pos = 0; pos < (int)qr.length(); pos += l) {
|
||||
l = MIN(CHUNKL, qr.length() - pos);
|
||||
qrichlst.push_back(qr.mid(pos, l));
|
||||
}
|
||||
}
|
||||
|
||||
m_haveAnchors = (beaconPos != -1);
|
||||
LOGDEB(("LoadFileInCurrentTab: rich: cancel %d txtln %d, hasAnchors %d "
|
||||
"(beaconPos %d)\n",
|
||||
CancelCheck::instance().cancelState(), richTxt.length(),
|
||||
m_haveAnchors, beaconPos));
|
||||
|
||||
|
||||
// Load into editor
|
||||
// Do it in several chunks
|
||||
QTextEdit *editor = getCurrentEditor();
|
||||
editor->setText("");
|
||||
if (highlightTerms) {
|
||||
@ -775,24 +772,18 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
prog = 2 * nsteps / 3;
|
||||
progress.setLabelText(tr("Loading preview text into editor"));
|
||||
qApp->processEvents();
|
||||
int l = 0;
|
||||
for (int pos = 0; pos < (int)richTxt.length(); pos += l, prog++) {
|
||||
int instep = 0;
|
||||
for (list<QString>::iterator it = qrichlst.begin();
|
||||
it != qrichlst.end(); it++, prog++, instep++) {
|
||||
progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1);
|
||||
qApp->processEvents();
|
||||
|
||||
l = MIN(CHUNKL, richTxt.length() - pos);
|
||||
// Avoid breaking inside a tag. Our tags are short (ie: <br>)
|
||||
if (pos + l != (int)richTxt.length()) {
|
||||
for (int i = -15; i < 0; i++) {
|
||||
if (richTxt[pos+l+i] == '<') {
|
||||
l = l+i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
editor->append(richTxt.mid(pos, l));
|
||||
if (it->find(QString::fromUtf8(firstTermBeacon)) != -1)
|
||||
m_haveAnchors = true;
|
||||
|
||||
editor->append(*it);
|
||||
|
||||
// Stay at top
|
||||
if (pos < 5) {
|
||||
if (instep < 5) {
|
||||
editor->setCursorPosition(0,0);
|
||||
editor->ensureCursorVisible();
|
||||
}
|
||||
@ -803,6 +794,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
progress.close();
|
||||
|
||||
if (searchTextLine->text().length() != 0) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: reslist.cpp,v 1.34 2007-08-07 08:42:47 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: reslist.cpp,v 1.35 2007-10-18 10:39:41 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
@ -478,8 +478,9 @@ void ResList::resultPageNext()
|
||||
abstract = doc.meta["abstract"];
|
||||
}
|
||||
// No need to call escapeHtml(), plaintorich handles it
|
||||
string richabst;
|
||||
plaintorich(abstract, richabst, hdata, true, false);
|
||||
list<string> lr;
|
||||
plaintorich(abstract, lr, hdata, true, false, 100000);
|
||||
string richabst = lr.front();
|
||||
|
||||
// Links;
|
||||
string linksbuf;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user