replace computation of term positions in editor text with search for 1st query term

This commit is contained in:
dockes 2006-02-07 09:44:33 +00:00
parent d788625849
commit cf07573b57
4 changed files with 49 additions and 64 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.9 2006-01-27 13:42:02 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.10 2006-02-07 09:44:33 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -43,6 +43,7 @@ using std::set;
// inside the result text. This is then used to post highlight tags.
class myTextSplitCB : public TextSplitCB {
public:
string firstTerm;
set<string> terms; // in: user query terms
list<pair<int, int> > tboffs; // out: begin and end positions of
// query terms in text
@ -62,8 +63,11 @@ class myTextSplitCB : public TextSplitCB {
Rcl::dumb_string(term, dumb);
//LOGDEB(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
// pos, bts, bte));
if (terms.find(dumb) != terms.end())
if (terms.find(dumb) != terms.end()) {
tboffs.push_back(pair<int, int>(bts, bte));
if (firstTerm.empty())
firstTerm = term;
}
CancelCheck::instance().checkCancel();
return true;
}
@ -71,19 +75,20 @@ class myTextSplitCB : public TextSplitCB {
// Fix result text for display inside the gui text window.
//
// To compute the term character positions in the output text, we have
// To compute the term character positions in the output text, we used
// to emulate how qt's textedit counts chars (ignoring tags and
// duplicate whitespace etc...). This is tricky business and it might
// be better to insert the text char by char, taking note of where qt
// thinks it is at each term.
// duplicate whitespace etc...). This was tricky business, dependant
// on qtextedit internals, and we don't do it any more, so we finally
// don't know the term par/car positions in the editor text. Instead,
// we return the first term encountered, and the caller will use the
// editor's find() function to position on it
bool plaintorich(const string& in, string& out, const list<string>& terms,
list<pair<int, int> >&termoffsets)
string *firstTerm)
{
Chrono chron;
LOGDEB(("plaintorich: terms: %s\n",
stringlistdisp(terms).c_str()));
out.erase();
termoffsets.erase(termoffsets.begin(), termoffsets.end());
// We first use the text splitter to break the text into words,
// and compare the words to the search terms, which yields the
@ -94,6 +99,8 @@ bool plaintorich(const string& in, string& out, const list<string>& terms,
// character offset
splitter.text_to_words(in);
if (firstTerm)
*firstTerm = cb.firstTerm;
LOGDEB(("plaintorich: split done %d mS\n", chron.millis()));
// Rich text output
@ -102,12 +109,8 @@ bool plaintorich(const string& in, string& out, const list<string>& terms,
// Iterator for the list of input term positions. We use it to
// output highlight tags and to compute term positions in the
// output text
list<pair<int, int> >::iterator it = cb.tboffs.begin();
list<pair<int, int> >::iterator tPosIt = cb.tboffs.begin();
// Storage for the current term _character_ position in output.
pair<int, int> otermcpos;
// Current char position in output, excluding tags
int outcpos=0;
// Input character iterator
Utf8Iter chariter(in);
// State variable used to limitate the number of consecutive empty lines
@ -120,16 +123,13 @@ bool plaintorich(const string& in, string& out, const list<string>& terms,
CancelCheck::instance().checkCancel();
}
// If we still have terms positions, check (byte) position
if (it != cb.tboffs.end()) {
if (tPosIt != cb.tboffs.end()) {
int ibyteidx = chariter.getBpos();
if (ibyteidx == it->first) {
if (ibyteidx == tPosIt->first) {
out += "<termtag>";
otermcpos.first = outcpos;
} else if (ibyteidx == it->second) {
if (it != cb.tboffs.end())
it++;
otermcpos.second = outcpos;
termoffsets.push_back(otermcpos);
} else if (ibyteidx == tPosIt->second) {
if (tPosIt != cb.tboffs.end())
tPosIt++;
out += "</termtag>";
}
}
@ -138,7 +138,6 @@ bool plaintorich(const string& in, string& out, const list<string>& terms,
if (ateol < 2) {
out += "<br>\n";
ateol++;
outcpos++;
}
break;
case '\r':
@ -146,23 +145,18 @@ bool plaintorich(const string& in, string& out, const list<string>& terms,
case '<':
ateol = 0;
out += "&lt;";
outcpos++;
break;
case '&':
ateol = 0;
out += "&amp;";
outcpos++;
break;
default:
// We don't change the eol status for whitespace, want a real line
if (*chariter == ' ' || *chariter == '\t') {
if (!atblank)
outcpos++;
atblank = 1;
} else {
ateol = 0;
atblank = 0;
outcpos++;
}
chariter.appendchartostring(out);
}

View File

@ -16,7 +16,7 @@
*/
#ifndef _PLAINTORICH_H_INCLUDED_
#define _PLAINTORICH_H_INCLUDED_
/* @(#$Id: plaintorich.h,v 1.5 2006-01-30 11:15:28 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: plaintorich.h,v 1.6 2006-02-07 09:44:33 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -32,6 +32,6 @@
*/
extern bool plaintorich(const string &in, string &out,
const list<string>& terms,
list<pair<int, int> >& termoffsets);
string* firstTerm);
#endif /* _PLAINTORICH_H_INCLUDED_ */

View File

@ -202,7 +202,7 @@
</signals>
<slots>
<slot>searchTextLine_textChanged( const QString &amp; text )</slot>
<slot>doSearch( bool next, bool reverse )</slot>
<slot>doSearch( const QString &amp;str, bool next, bool reverse )</slot>
<slot>nextPressed()</slot>
<slot>prevPressed()</slot>
<slot>currentChanged( QWidget * tw )</slot>

View File

@ -100,7 +100,7 @@ bool Preview::eventFilter(QObject *target, QEvent *event)
return true;
} else if (dynSearchActive) {
if (keyEvent->key() == Key_F3) {
doSearch(true, false);
doSearch(searchTextLine->text(), true, false);
return true;
}
if (target != searchTextLine)
@ -134,7 +134,7 @@ void Preview::searchTextLine_textChanged(const QString & text)
nextButton->setEnabled(true);
prevButton->setEnabled(true);
clearPB->setEnabled(true);
doSearch(false, false);
doSearch(text, false, false);
}
}
@ -152,7 +152,7 @@ QTextEdit * Preview::getCurrentEditor()
// current search, trying to advance and possibly wrapping around. If next is
// false, the search string has been modified, we search for the new string,
// starting from the current position
void Preview::doSearch(bool next, bool reverse)
void Preview::doSearch(const QString &text, bool next, bool reverse)
{
LOGDEB1(("Preview::doSearch: next %d rev %d\n", int(next), int(reverse)));
QTextEdit *edit = getCurrentEditor();
@ -184,8 +184,9 @@ void Preview::doSearch(bool next, bool reverse)
}
}
bool found = edit->find(searchTextLine->text(), matchCase, false,
bool found = edit->find(text, matchCase, false,
!reverse, &mspara, &msindex);
LOGDEB(("Found at para: %d index %d\n", mspara, msindex));
if (!found && next && true) { // need a 'canwrap' test here
if (reverse) {
@ -194,8 +195,7 @@ void Preview::doSearch(bool next, bool reverse)
} else {
mspara = msindex = 0;
}
found = edit->find(searchTextLine->text(), matchCase, false,
!reverse, &mspara, &msindex);
found = edit->find(text, matchCase, false, !reverse, &mspara, &msindex);
}
if (found) {
@ -210,13 +210,13 @@ void Preview::doSearch(bool next, bool reverse)
void Preview::nextPressed()
{
doSearch(true, false);
doSearch(searchTextLine->text(), true, false);
}
void Preview::prevPressed()
{
doSearch(true, true);
doSearch(searchTextLine->text(), true, true);
}
@ -387,19 +387,19 @@ class LoadThread : public QThread {
class ToRichThread : public QThread {
string &in;
list<string> &terms;
list<pair<int, int> > &termoffsets;
string& firstTerm;
QString &out;
public:
ToRichThread(string &i, list<string> &trms,
list<pair<int, int> > &toffs, QString &o)
: in(i), terms(trms), termoffsets(toffs), out(o)
string& ft, QString &o)
: in(i), terms(trms), firstTerm(ft), out(o)
{}
virtual void run()
{
DebugLog::getdbl()->setloglevel(DEBDEB1);
string rich;
try {
plaintorich(in, rich, terms, termoffsets);
plaintorich(in, rich, terms, &firstTerm);
} catch (CancelExcept) {
}
out = QString::fromUtf8(rich.c_str(), rich.length());
@ -478,14 +478,13 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc)
// Create preview text: highlight search terms (if not too big):
QString richTxt;
list<pair<int, int> > termoffsets;
bool highlightTerms = fdoc.text.length() < 1000 *1024;
string firstTerm;
list<string> terms;
rcldb->getQueryTerms(terms);
if (highlightTerms) {
progress.setLabelText(tr("Creating preview text"));
list<string> terms;
rcldb->getQueryTerms(terms);
ToRichThread rthr(fdoc.text, terms, termoffsets, richTxt);
ToRichThread rthr(fdoc.text, terms, firstTerm, richTxt);
rthr.start();
for (;;prog++) {
@ -516,6 +515,7 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc)
}
// Load into editor
// Do it in several chunks
QTextEdit *editor = getCurrentEditor();
if (highlightTerms) {
QStyleSheetItem *item =
@ -523,11 +523,10 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc)
item->setColor("blue");
item->setFontWeight(QFont::Bold);
}
prog = 2 * nsteps / 3;
progress.setLabelText(tr("Loading preview text into editor"));
qApp->processEvents();
// Do it in several chunks
int l = 0;
for (unsigned int pos = 0; pos < richTxt.length(); pos += l, prog++) {
progress.setProgress(prog , prog <= nsteps-1 ? nsteps : prog+1);
@ -535,14 +534,14 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc)
l = MIN(CHUNKL, richTxt.length() - pos);
// Avoid breaking inside a tag. Our tags are short (ie: <br>)
if (pos + l != richTxt.length())
if (pos + l != richTxt.length()) {
for (int i = -15; i < 0; i++) {
if (richTxt[pos+l+i] == '<') {
l = l+i;
break;
}
}
}
editor->append(richTxt.mid(pos, l));
// Stay at top
if (pos < 5) {
@ -557,19 +556,11 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc)
}
}
if (highlightTerms) {
int para = 0, index = 1;
if (!termoffsets.empty()) {
index = (termoffsets.begin())->first;
LOGDEB(("Set cursor position: para %d, character index %d\n",
para,index));
editor->setCursorPosition(0, index);
}
editor->ensureCursorVisible();
editor->getCursorPosition(&para, &index);
LOGDEB(("PREVIEW len %d paragraphs: %d. Cpos: %d %d\n",
editor->length(), editor->paragraphs(), para, index));
if (!firstTerm.empty()) {
bool wasC = matchCheck->isChecked();
matchCheck->setChecked(false);
doSearch(QString::fromUtf8(terms.begin()->c_str()), true, false);
matchCheck->setChecked(wasC);
}
return true;
}