diff --git a/src/qtgui/plaintorich.cpp b/src/qtgui/plaintorich.cpp index ea232395..5381e5a5 100644 --- a/src/qtgui/plaintorich.cpp +++ b/src/qtgui/plaintorich.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.15 2006-11-17 12:32:40 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.16 2006-11-18 12:31:16 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -61,6 +61,7 @@ class myTextSplitCB : public TextSplitCB { // Out: first query term found in text string firstTerm; + int firstTermOcc; // Out: begin and end byte positions of query terms/groups in text vector > tboffs; @@ -190,30 +191,49 @@ bool myTextSplitCB::matchGroup(const vector& terms, int window) { LOGDEB0(("myTextSplitCB::matchGroup:d %d: %s\n", window, vecStringToString(terms).c_str())); + + // The position lists we are going to work with. We extract them from the + // (string->plist) map vector* > plists; - // Check that each of the group terms has a position list - for (vector::const_iterator it = terms.begin(); it != terms.end(); - it++) { - map >::iterator pl; - if ((pl = m_plists.find(*it)) == m_plists.end()) { - LOGDEB(("myTextSplitCB::matchGroup: [%s] not found in m_plists\n", + // A revert plist->term map. This is so that we can find who is who after + // sorting the plists by length. + map*, string> plistToTerm; + // For traces + vector realgroup; + + // Find the position list for each term in the group. Not all + // necessarily exist (esp for NEAR where terms have been + // stem-expanded: we don't know which matched) + for (vector::const_iterator it = terms.begin(); + it != terms.end(); it++) { + map >::iterator pl = m_plists.find(*it); + if (pl == m_plists.end()) { + LOGDEB1(("myTextSplitCB::matchGroup: [%s] not found in m_plists\n", (*it).c_str())); - return false; + continue; } plists.push_back(&(pl->second)); + plistToTerm[&(pl->second)] = *it; + realgroup.push_back(*it); } - + LOGDEB0(("myTextSplitCB::matchGroup:d %d:real group %s\n", window, + vecStringToString(realgroup).c_str())); + if (plists.size() < 2) + return false; // Sort the positions lists so that the shorter is first std::sort(plists.begin(), plists.end(), VecIntCmpShorter()); // Walk the shortest plist and look for matches int sta = int(10E9), sto = 0; int pos; + // Occurrences are from 1->N + firstTermOcc = 0; vector::iterator it = plists[0]->begin(); do { if (it == plists[0]->end()) return false; pos = *it++; + firstTermOcc++; } while (!do_proximity_test(window, plists, 1, pos, pos, &sta, &sto)); SETMINMAX(pos, sta, sto); @@ -221,22 +241,20 @@ bool myTextSplitCB::matchGroup(const vector& terms, int window) if (firstTerm.empty() || m_firstTermPos > sta) { // firsTerm is used to try an position the preview window over - // the match. As it's difficult to divine byte/word positions, - // we use a string search. Try to use the shortest plist for - // this, which hopefully gives a better chance for the group - // to be found (it's hopeless to try and match the whole - // group) - unsigned int minl = (unsigned int)10E9; - for (vector::const_iterator it = terms.begin(); - it != terms.end(); it++) { - map >::iterator pl = m_plists.find(*it); - if (pl != m_plists.end() && pl->second.size() < minl) { - firstTerm = *it; - minl = pl->second.size(); - } - } + // the match. As it's difficult to divine byte/word positions + // in qtextedit, we use a string search. Use the + // shortest plist for this, which hopefully gives a better + // chance for the group to be found (it's hopeless to try and + // match the whole group) + map*, string>::iterator it = + plistToTerm.find(plists.front()); + if (it != plistToTerm.end()) + firstTerm = it->second; + LOGDEB(("myTextSplitCB:: best group term %s, firstTermOcc %d\n", + firstTerm.c_str(), firstTermOcc)); } + // Translate the position window into a byte offset window map >::iterator i1 = m_gpostobytes.find(sta); map >::iterator i2 = m_gpostobytes.find(sto); if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) { @@ -247,6 +265,7 @@ bool myTextSplitCB::matchGroup(const vector& terms, int window) LOGDEB(("myTextSplitCB::matchGroup: no bpos found for %d or %d\n", sta, sto)); } + return true; } @@ -281,7 +300,9 @@ bool myTextSplitCB::matchGroups() // editor's find() function to position on it bool plaintorich(const string& in, string& out, RefCntr sdata, - string *firstTerm, bool noHeader) + string *firstTerm, + int *firstTermOcc, + bool noHeader) { Chrono chron; out.erase(); @@ -319,6 +340,8 @@ bool plaintorich(const string& in, string& out, if (firstTerm) *firstTerm = cb.firstTerm; + if (firstTermOcc) + *firstTermOcc = cb.firstTermOcc; // Rich text output if (noHeader) diff --git a/src/qtgui/plaintorich.h b/src/qtgui/plaintorich.h index 2034d674..c3891c86 100644 --- a/src/qtgui/plaintorich.h +++ b/src/qtgui/plaintorich.h @@ -16,7 +16,7 @@ */ #ifndef _PLAINTORICH_H_INCLUDED_ #define _PLAINTORICH_H_INCLUDED_ -/* @(#$Id: plaintorich.h,v 1.9 2006-11-17 12:31:50 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: plaintorich.h,v 1.10 2006-11-18 12:31:16 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -33,10 +33,12 @@ * @param out rich text output * @param terms list of query terms. These are out of Rcl::Db and dumb * @param firstTerm out: value of the first search term in text. + * @param frsttocc out: occurrence of 1st term to look for * @param noHeader if true don't output header (...) */ extern bool plaintorich(const string &in, string &out, RefCntr<Rcl::SearchData> sdata, - string* firstTerm, bool noHeader = false); + string* firstTerm, int *frsttocc, + bool noHeader = false); #endif /* _PLAINTORICH_H_INCLUDED_ */ diff --git a/src/qtgui/preview_w.cpp b/src/qtgui/preview_w.cpp index 16ddca42..b3f6b6f4 100644 --- a/src/qtgui/preview_w.cpp +++ b/src/qtgui/preview_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.6 2006-11-17 10:09:07 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.7 2006-11-18 12:31:16 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -172,7 +172,7 @@ QTextEdit *Preview::getCurrentEditor() // false, the search string has been modified, we search for the new string, // starting from the current position void Preview::doSearch(const QString &text, bool next, bool reverse, - bool wo) + bool wordOnly) { LOGDEB1(("Preview::doSearch: next %d rev %d\n", int(next), int(reverse))); QTextEdit *edit = getCurrentEditor(); @@ -204,7 +204,7 @@ void Preview::doSearch(const QString &text, bool next, bool reverse, } } - bool found = edit->find(text, matchCase, wo, + bool found = edit->find(text, matchCase, wordOnly, !reverse, &mspara, &msindex); LOGDEB(("Found at para: %d index %d\n", mspara, msindex)); @@ -451,12 +451,14 @@ class ToRichThread : public QThread { string ∈ RefCntr<Rcl::SearchData> m_searchData; string& firstTerm; + int& firstTermOcc; QString &out; int loglevel; public: ToRichThread(string &i, RefCntr<Rcl::SearchData> searchData, - string& ft, QString &o) - : in(i), m_searchData(searchData), firstTerm(ft), out(o) + string& ft, int& fto, QString &o) + : in(i), m_searchData(searchData), firstTerm(ft), firstTermOcc(fto), + out(o) { loglevel = DebugLog::getdbl()->getlevel(); } @@ -465,7 +467,7 @@ class ToRichThread : public QThread { DebugLog::getdbl()->setloglevel(loglevel); string rich; try { - plaintorich(in, rich, m_searchData, &firstTerm); + plaintorich(in, rich, m_searchData, &firstTerm, &firstTermOcc); } catch (CancelExcept) { } out = QString::fromUtf8(rich.c_str(), rich.length()); @@ -547,9 +549,11 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, QString richTxt; bool highlightTerms = fdoc.text.length() < 1000 *1024; string firstTerm; + int firstTermOcc; if (highlightTerms) { progress.setLabelText(tr("Creating preview text")); - ToRichThread rthr(fdoc.text, m_searchData, firstTerm, richTxt); + ToRichThread rthr(fdoc.text, m_searchData, firstTerm, firstTermOcc, + richTxt); rthr.start(); for (;;prog++) { @@ -629,7 +633,10 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, if (!firstTerm.empty()) { bool wasC = matchCheck->isChecked(); matchCheck->setChecked(false); - doSearch(QString::fromUtf8(firstTerm.c_str()), true, false, true); + for (int i = 0; i < firstTermOcc; i++) { + doSearch(QString::fromUtf8(firstTerm.c_str()), i, + false, true); + } matchCheck->setChecked(wasC); } } diff --git a/src/qtgui/reslist.cpp b/src/qtgui/reslist.cpp index b131edad..95f1f5b6 100644 --- a/src/qtgui/reslist.cpp +++ b/src/qtgui/reslist.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: reslist.cpp,v 1.11 2006-11-17 12:55:59 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: reslist.cpp,v 1.12 2006-11-18 12:31:16 dockes Exp $ (C) 2005 J.F.Dockes"; #endif #include <time.h> @@ -381,7 +381,7 @@ void ResList::resultPageNext() // Abstract string abst; - plaintorich(doc.abstract, abst, m_searchData, 0, true); + plaintorich(doc.abstract, abst, m_searchData, 0, 0, true); // Links; string linksbuf;