diff --git a/src/lib/Makefile b/src/lib/Makefile index 212b301d..c256efad 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -8,8 +8,8 @@ LIBS = librcl.a all: $(LIBS) -OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o history.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o searchdata.o stemdb.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o -DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp history.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp +OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o searchdata.o stemdb.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o +DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp searchdata.dep.stamp stemdb.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp librcl.a : $(DEPS) $(OBJS) unac.o ar ru librcl.a $(OBJS) unac.o @@ -53,6 +53,10 @@ mh_text.o : ../internfile/mh_text.cpp $(CXX) $(ALL_CXXFLAGS) -c ../internfile/mh_text.cpp docseq.o : ../query/docseq.cpp $(CXX) $(ALL_CXXFLAGS) -c ../query/docseq.cpp +docseqdb.o : ../query/docseqdb.cpp + $(CXX) $(ALL_CXXFLAGS) -c ../query/docseqdb.cpp +docseqhist.o : ../query/docseqhist.cpp + $(CXX) $(ALL_CXXFLAGS) -c ../query/docseqhist.cpp history.o : ../query/history.cpp $(CXX) $(ALL_CXXFLAGS) -c ../query/history.cpp sortseq.o : ../query/sortseq.cpp @@ -159,6 +163,12 @@ mh_text.dep.stamp : ../internfile/mh_text.cpp docseq.dep.stamp : ../query/docseq.cpp $(CXX) -M $(ALL_CXXFLAGS) ../query/docseq.cpp > docseq.dep touch docseq.dep.stamp +docseqdb.dep.stamp : ../query/docseqdb.cpp + $(CXX) -M $(ALL_CXXFLAGS) ../query/docseqdb.cpp > docseqdb.dep + touch docseqdb.dep.stamp +docseqhist.dep.stamp : ../query/docseqhist.cpp + $(CXX) -M $(ALL_CXXFLAGS) ../query/docseqhist.cpp > docseqhist.dep + touch docseqhist.dep.stamp history.dep.stamp : ../query/history.cpp $(CXX) -M $(ALL_CXXFLAGS) ../query/history.cpp > history.dep touch history.dep.stamp @@ -246,6 +256,8 @@ include mh_mail.dep include mh_mbox.dep include mh_text.dep include docseq.dep +include docseqdb.dep +include docseqhist.dep include history.dep include sortseq.dep include wasastringtoquery.dep diff --git a/src/lib/mkMake b/src/lib/mkMake index 51e989d4..315a2f56 100755 --- a/src/lib/mkMake +++ b/src/lib/mkMake @@ -22,6 +22,8 @@ ${depth}/internfile/mh_mail.cpp \ ${depth}/internfile/mh_mbox.cpp \ ${depth}/internfile/mh_text.cpp \ ${depth}/query/docseq.cpp \ +${depth}/query/docseqdb.cpp \ +${depth}/query/docseqhist.cpp \ ${depth}/query/history.cpp \ ${depth}/query/sortseq.cpp \ ${depth}/query/wasastringtoquery.cpp \ diff --git a/src/qtgui/rclmain_w.cpp b/src/qtgui/rclmain_w.cpp index 205d3afb..48de7704 100644 --- a/src/qtgui/rclmain_w.cpp +++ b/src/qtgui/rclmain_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.22 2007-01-13 15:21:41 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.23 2007-01-19 10:32:39 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -67,6 +67,8 @@ using std::pair; #include "ssearch_w.h" #include "execmd.h" #include "internfile.h" +#include "docseqdb.h" +#include "docseqhist.h" #include "rclmain_w.h" #include "moc_rclmain_w.cpp" @@ -825,11 +827,15 @@ void RclMain::startManual() // Search for document 'like' the selected one. void RclMain::docExpand(int docnum) { + if (!rcldb) + return; Rcl::Doc doc; if (!resList->getDoc(docnum, doc)) return; list terms; terms = rcldb->expand(doc); + if (terms.empty()) + return; // Do we keep the original query. I think we'd better not. // rcldb->expand is set to keep the original query terms instead. QString text;// = sSearch->queryText->currentText(); diff --git a/src/qtgui/reslist.cpp b/src/qtgui/reslist.cpp index 7befae8f..7ab355d3 100644 --- a/src/qtgui/reslist.cpp +++ b/src/qtgui/reslist.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: reslist.cpp,v 1.19 2007-01-12 09:01:11 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: reslist.cpp,v 1.20 2007-01-19 10:32:39 dockes Exp $ (C) 2005 J.F.Dockes"; #endif #include @@ -125,16 +125,20 @@ int ResList::parnumfromdocnum(int docnum) return -1; } -// Return doc from current or adjacent result pages +// Return doc from current or adjacent result pages. We can get called +// for a document not in the current page if the user browses through +// results inside a result window (with shift-arrow). This can only +// result in a one-page change. bool ResList::getDoc(int docnum, Rcl::Doc &doc) { + LOGDEB(("ResList::getDoc: docnum %d m_winfirst %d\n", docnum, m_winfirst)); if (docnum < 0) return false; // Is docnum in current page ? Then all Ok if (docnum >= int(m_winfirst) && docnum < int(m_winfirst + m_curDocs.size())) { doc = m_curDocs[docnum - m_winfirst]; - goto found; + return true; } // Else we accept to page down or up but not further @@ -148,12 +152,9 @@ bool ResList::getDoc(int docnum, Rcl::Doc &doc) if (docnum >= int(m_winfirst) && docnum < int(m_winfirst + m_curDocs.size())) { doc = m_curDocs[docnum - m_winfirst]; - goto found; + return true; } return false; - - found: - return true; } void ResList::keyPressEvent(QKeyEvent * e) @@ -249,6 +250,7 @@ static string displayableBytes(long size) // Fill up result list window with next screen of hits void ResList::resultPageNext() { + // This checks that the RefCntr pseudo-pointer is not holding a null. if (m_docSource.getcnt() == 0) return; @@ -258,91 +260,118 @@ void ResList::resultPageNext() LOGDEB(("resultPageNext: rescnt %d, winfirst %d\n", resCnt, m_winfirst)); - // If we are already on the last page, nothing to do: - if (m_winfirst >= 0 && - (m_winfirst + prefs.respagesize > resCnt)) { - emit nextPageAvailable(false); - return; - } - bool hasPrev = false; if (m_winfirst < 0) { m_winfirst = 0; } else { - hasPrev = true; m_winfirst += prefs.respagesize; } + if (m_winfirst) + hasPrev = true; emit prevPageAvailable(hasPrev); - bool gotone = false; + // Get the next page of results. + vector respage; + int pagelen = m_docSource->getSeqSlice(m_winfirst, + prefs.respagesize, respage); + + // If page was truncated, there is no next + bool hasNext = pagelen == prefs.respagesize; + emit nextPageAvailable(hasNext); + + if (pagelen <= 0) { + // No results ? This can only happen on the first page or if the + // actual result list size is a multiple of the page pref (else + // there would have been no Next on the last page) + if (m_winfirst) { + // Have already results. Let them show, just disable the + // Next button. We'd need to remove the Next link from the page + // too. + // Restore the m_winfirst value + m_winfirst -= prefs.respagesize; + return; + } + clear(); + QString chunk = "

"; + chunk += "

"; + chunk += QString::fromUtf8(m_docSource->title().c_str()); + chunk += "
"; + chunk += ""; + chunk += tr("Show query details"); + chunk += "
"; + append(chunk); + append(tr("

No results found
")); + if (m_winfirst < 0) + m_winfirst = -1; + return; + } + clear(); - - int last = MIN(resCnt - m_winfirst, prefs.respagesize); - m_curDocs.clear(); // Query term colorization - QStyleSheetItem *item = - new QStyleSheetItem(styleSheet(), "termtag" ); + QStyleSheetItem *item = new QStyleSheetItem(styleSheet(), "termtag" ); item->setColor("blue"); - // item->setFontWeight(QFont::Bold); // Result paragraph format string sformat = string(prefs.reslistformat.utf8()); LOGDEB(("resultPageNext: format: [%s]\n", sformat.c_str())); - // Insert results if any in result list window. We have to send - // the text to the widgets, because we need the paragraph number - // each time we add a result paragraph (its diffult and - // error-prone to compute the paragraph numbers in parallel). We - // would like to disable updates while we're doing this, but - // couldn't find a way to make it work, the widget seems to become - // confused if appended while updates are disabled + // Display list header + // We could use a but the textedit doesnt display + // it prominently + // Note: have to append text in chunks that make sense + // html-wise. If we break things up to much, the editor + // gets confused. Hence the use of the 'chunk' text + // accumulator + // Also note that there can be results beyond the estimated resCnt. + QString chunk = "<qt><head></head><body><p>"; + + chunk += "<font size=+1><b>"; + chunk += QString::fromUtf8(m_docSource->title().c_str()); + chunk += ".</b></font>"; + + chunk += "   "; + + if (m_winfirst + pagelen < resCnt) { + chunk += + tr("Documents <b>%1-%2</b> out of at least <b>%3</b> for ") + .arg(m_winfirst+1) + .arg(m_winfirst+pagelen) + .arg(resCnt); + } else { + chunk += tr("Documents <b>%1-%2</b> for ") + .arg(m_winfirst+1) + .arg(m_winfirst+pagelen); + } + + chunk += "<a href=\"H-1\">"; + chunk += tr("(show query)"); + chunk += "</a>"; + + append(chunk); + + // Insert results in result list window. We have to actually send + // the text to the widget (instead of setting the whole at the + // end), because we need the paragraph number each time we add a + // result paragraph (its diffult and error-prone to compute the + // paragraph numbers in parallel). We would like to disable + // updates while we're doing this, but couldn't find a way to make + // it work, the widget seems to become confused if appended while + // updates are disabled // setUpdatesEnabled(false); - for (int i = 0; i < last; i++) { - string sh; - Rcl::Doc doc; - int percent; - if (!m_docSource->getDoc(m_winfirst + i, doc, &percent, &sh)) { - // Error or end of docs, stop. - break; - } + for (int i = 0; i < pagelen; i++) { + + int &percent(respage[i].percent); + Rcl::Doc &doc(respage[i].doc); + string& sh(respage[i].subHeader); + if (percent == -1) { percent = 0; // Document not available, maybe other further, will go on. doc.abstract = string(tr("Unavailable document").utf8()); } - if (i == 0) { - // Display list header - // We could use a <title> but the textedit doesnt display - // it prominently - // Note: have to append text in chunks that make sense - // html-wise. If we break things up to much, the editor - // gets confused. Hence the use of the 'chunk' text - // accumulator - QString chunk = "<qt><head></head><body><p>"; - - chunk += "<font size=+1><b>"; - chunk += QString::fromUtf8(m_docSource->title().c_str()); - chunk += ".</b></font>"; - - chunk += "   "; - - chunk += tr("Documents <b>%1-%2</b> out of <b>%3</b> for ") - .arg(m_winfirst+1) - .arg(m_winfirst+last) - .arg(resCnt); - - chunk += "<a href=\"H-1\">"; - chunk += tr("(show query)"); - chunk += "</a>"; - - append(chunk); - } - - gotone = true; - // Determine icon to display if any string img_name; if (prefs.showicons) { @@ -373,7 +402,7 @@ void ResList::resultPageNext() // Result number char numbuf[20]; - int docnumforlinks = m_winfirst+1+i; + int docnumforlinks = m_winfirst + 1 + i; sprintf(numbuf, "%d", docnumforlinks); // Document date: either doc or file modification time @@ -405,26 +434,16 @@ void ResList::resultPageNext() sizebuf = displayableBytes(fsize); } - // Abstract. The docsequence should deal with this as we don't - // know if a query is open or if we're displaying - // history. OTOH, if the docsequence does it, we're going to - // generate a lot of unneeded abstracts for sorted sequences - // (for all the queried for but undisplayed entries) - string richabst; string abstract; - LOGDEB2(("Abstract: clcnt %d prfs.build %d syntabs %d prfs.repl %d\n", - m_searchData->clauseCount(), prefs.queryBuildAbstract, - doc.syntabs, prefs.queryReplaceAbstract)); - if (m_searchData->clauseCount() > 0 && prefs.queryBuildAbstract && + if (prefs.queryBuildAbstract && (doc.syntabs || prefs.queryReplaceAbstract)) { - rcldb->makeDocAbstract(doc, abstract); - if (abstract.empty()) - abstract = doc.abstract; + abstract = m_docSource->getAbstract(doc); } else { abstract = doc.abstract; } - plaintorich(abstract, richabst, m_searchData, true); + string richabst; + plaintorich(abstract, richabst, m_searchData, true); // Links; string linksbuf; @@ -439,7 +458,7 @@ void ResList::resultPageNext() linksbuf += string("<a href=") + vlbuf + ">" + "Edit" + "</a>"; } - // Concatenate chunks to build the result list paragraph: + // Build the result list paragraph: string result; // Subheader: this is used by history @@ -481,47 +500,27 @@ void ResList::resultPageNext() m_curDocs.push_back(doc); } - bool hasNext = false; - if (m_winfirst >= 0 && m_winfirst + prefs.respagesize < resCnt) { - hasNext = true; - } - - if (gotone) { - QString chunk = "<p align=\"center\">"; - if (hasPrev || hasNext) { - if (hasPrev) { - chunk += "<a href=\"p-1\"><b>"; - chunk += tr("Previous"); - chunk += "</b></a>   "; - } - if (hasNext) { - chunk += "<a href=\"n-1\"><b>"; - chunk += tr("Next"); - chunk += "</b></a>"; - } - chunk += "</p>\n"; - append(chunk); + // Footer + chunk = "<p align=\"center\">"; + if (hasPrev || hasNext) { + if (hasPrev) { + chunk += "<a href=\"p-1\"><b>"; + chunk += tr("Previous"); + chunk += "</b></a>   "; } - append("</body></qt>"); - ensureCursorVisible(); - } else { - // Restore first in win parameter that we shouln't have incremented - QString chunk = "<p><font size=+1><b>"; - chunk += QString::fromUtf8(m_docSource->title().c_str()); - chunk += "</b></font><br>"; - chunk += "<a href=\"H-1\">"; - chunk += tr("Show query details"); - chunk += "</a><br>"; + if (hasNext) { + chunk += "<a href=\"n-1\"><b>"; + chunk += tr("Next"); + chunk += "</b></a>"; + } + chunk += "</p>\n"; append(chunk); - append(tr("<p><b>No results found</b><br>")); - m_winfirst -= prefs.respagesize; - if (m_winfirst < 0) - m_winfirst = -1; - hasNext = false; } + append("</body></qt>"); + // Possibly color paragraph of current preview if any previewExposed(m_curPvDoc); - emit nextPageAvailable(hasNext); + ensureCursorVisible(); } // Single click in result list use this for document selection, if no @@ -678,10 +677,7 @@ void ResList::menuCopyURL() } void ResList::menuExpand() { - Rcl::Doc doc; - if (rcldb && getDoc(m_popDoc, doc)) { - emit docExpand(m_popDoc); - } + emit docExpand(m_popDoc); } QString ResList::getDescription() diff --git a/src/qtgui/ssearch_w.cpp b/src/qtgui/ssearch_w.cpp index e44eb3fb..6f30e06a 100644 --- a/src/qtgui/ssearch_w.cpp +++ b/src/qtgui/ssearch_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.17 2006-12-19 12:11:21 dockes Exp $ (C) 2006 J.F.Dockes"; +static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.18 2007-01-19 10:32:39 dockes Exp $ (C) 2006 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -34,14 +34,18 @@ static char rcsid[] = "@(#$Id: ssearch_w.cpp,v 1.17 2006-12-19 12:11:21 dockes E #include "ssearch_w.h" #include "refcntr.h" #include "textsplit.h" +#include "wasatorcl.h" -enum SSearchType {SST_ANY = 0, SST_ALL = 1, SST_FNM = 2}; +enum SSearchType {SST_ANY = 0, SST_ALL = 1, SST_FNM = 2, SST_LANG}; void SSearch::init() { + // See enum above and keep in order ! searchTypCMB->insertItem(tr("Any term")); searchTypCMB->insertItem(tr("All terms")); searchTypCMB->insertItem(tr("File name")); + searchTypCMB->insertItem(tr("Query language")); + queryText->insertStringList(prefs.ssearchHistory); queryText->setEditText(""); connect(queryText->lineEdit(), SIGNAL(returnPressed()), @@ -76,27 +80,44 @@ void SSearch::startSimpleSearch() string u8 = (const char *)queryText->currentText().utf8(); LOGDEB(("SSearch::startSimpleSearch: [%s]\n", u8.c_str())); - RefCntr<Rcl::SearchData> sdata(new Rcl::SearchData(Rcl::SCLT_OR)); SSearchType tp = (SSearchType)searchTypCMB->currentItem(); + Rcl::SearchData *sdata = 0; - if (prefs.ssearchAutoPhrase && (tp == SST_ANY || tp == SST_ALL) && - u8.find_first_of("\"") == string::npos && - TextSplit::countWords(u8) > 1) { - sdata->addClause(new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, - u8, 0)); - } + if (tp == SST_LANG) { + string reason; + sdata = wasaStringToRcl(u8, reason); + if (sdata == 0) { + QMessageBox::warning(0, "Recoll", tr("Bad query string") + + QString::fromAscii(reason.c_str())); + return; + } + } else { + sdata = new Rcl::SearchData(Rcl::SCLT_OR); + if (sdata == 0) { + QMessageBox::warning(0, "Recoll", tr("Out of memory")); + return; + } - switch (tp) { - case SST_ANY: - default: - sdata->addClause(new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR, u8)); - break; - case SST_ALL: - sdata->addClause(new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, u8)); - break; - case SST_FNM: - sdata->addClause(new Rcl::SearchDataClauseFilename(u8)); - break; + // Maybe add automatic phrase? + if (prefs.ssearchAutoPhrase && (tp == SST_ANY || tp == SST_ALL) && + u8.find_first_of("\"") == string::npos && + TextSplit::countWords(u8) > 1) { + sdata->addClause(new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, + u8, 0)); + } + + switch (tp) { + case SST_ANY: + default: + sdata->addClause(new Rcl::SearchDataClauseSimple(Rcl::SCLT_OR,u8)); + break; + case SST_ALL: + sdata->addClause(new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,u8)); + break; + case SST_FNM: + sdata->addClause(new Rcl::SearchDataClauseFilename(u8)); + break; + } } // Search terms history @@ -135,7 +156,10 @@ void SSearch::startSimpleSearch() for (int index = 0; index < queryText->count(); index++) { prefs.ssearchHistory.push_back(queryText->text(index)); } - emit startSearch(sdata); + + + RefCntr<Rcl::SearchData> rsdata(sdata); + emit startSearch(rsdata); } void SSearch::setAnyTermMode() diff --git a/src/query/docseq.cpp b/src/query/docseq.cpp index 1f73f9cf..40456272 100644 --- a/src/query/docseq.cpp +++ b/src/query/docseq.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: docseq.cpp,v 1.9 2006-09-13 14:57:56 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: docseq.cpp,v 1.10 2007-01-19 10:32:39 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -22,70 +22,17 @@ static char rcsid[] = "@(#$Id: docseq.cpp,v 1.9 2006-09-13 14:57:56 dockes Exp $ #include "docseq.h" -bool DocSequenceDb::getDoc(int num, Rcl::Doc &doc, int *percent, string *sh) +int DocSequence::getSeqSlice(int offs, int cnt, vector<ResListEntry>& result) { - if (sh) sh->erase(); - return m_db ? m_db->getDoc(num, doc, percent) : false; -} - -int DocSequenceDb::getResCnt() -{ - if (!m_db) - return -1; - if (m_rescnt < 0) { - m_rescnt= m_db->getResCnt(); + int ret = 0; + for (int num = offs; num < offs + cnt; num++, ret++) { + result.push_back(ResListEntry()); + if (!getDoc(num, result.back().doc, &result.back().percent, + &result.back().subHeader)) { + result.pop_back(); + return ret; + } } - return m_rescnt; -} - -void DocSequenceDb::getTerms(list<string> &terms) -{ - if (!m_db) - return; - m_db->getQueryTerms(terms); -} - -bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, int *percent, - string *sh) -{ - // Retrieve history list - if (!m_hist) - return false; - if (m_hlist.empty()) - m_hlist = m_hist->getDocHistory(); - - if (num < 0 || num >= (int)m_hlist.size()) - return false; - int skip; - if (m_prevnum >= 0 && num >= m_prevnum) { - skip = num - m_prevnum; - } else { - skip = num; - m_it = m_hlist.begin(); - m_prevtime = -1; - } - m_prevnum = num; - while (skip--) - m_it++; - if (percent) - *percent = 100; - if (sh) { - if (m_prevtime < 0 || abs(m_prevtime - m_it->unixtime) > 86400) { - m_prevtime = m_it->unixtime; - time_t t = (time_t)(m_it->unixtime); - *sh = string(ctime(&t)); - // Get rid of the final \n in ctime - sh->erase(sh->length()-1); - } else - sh->erase(); - } - return m_db->getDoc(m_it->fn, m_it->ipath, doc, percent); -} - -int DocSequenceHistory::getResCnt() -{ - if (m_hlist.empty()) - m_hlist = m_hist->getDocHistory(); - return m_hlist.size(); + return ret; } diff --git a/src/query/docseq.h b/src/query/docseq.h index d7469da9..fb1468d9 100644 --- a/src/query/docseq.h +++ b/src/query/docseq.h @@ -16,28 +16,37 @@ */ #ifndef _DOCSEQ_H_INCLUDED_ #define _DOCSEQ_H_INCLUDED_ -/* @(#$Id: docseq.h,v 1.9 2006-09-13 14:57:56 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: docseq.h,v 1.10 2007-01-19 10:32:39 dockes Exp $ (C) 2004 J.F.Dockes */ #include <string> #include <list> - +#include <vector> #ifndef NO_NAMESPACES using std::string; using std::list; +using std::vector; #endif -#include "rcldb.h" -#include "history.h" +#include "rcldoc.h" + +// A result list entry. +struct ResListEntry { + Rcl::Doc doc; + int percent; + string subHeader; + ResListEntry() : percent(0) {} +}; /** Interface for a list of documents coming from some source. The result list display data may come from different sources (ie: - history or Db query). We have an interface to make things cleaner. + history or Db query), and be post-processed (DocSeqSorted). */ class DocSequence { public: DocSequence(const string &t) : m_title(t) {} virtual ~DocSequence() {} - /** Get document at given rank + + /** Get document at given rank. * * @param num document rank in sequence * @param doc return data @@ -52,49 +61,30 @@ class DocSequence { */ virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string *sh = 0) = 0; + + /** Get next page of documents. This accumulates entries into the result + * list (doesn't reset it). */ + virtual int getSeqSlice(int offs, int cnt, vector<ResListEntry>& result); + + /** Get abstract for document. This is special because it may take time. + * The default is to return the input doc's abstract fields, but some + * sequences can compute a better value (ie: docseqdb) */ + virtual string getAbstract(Rcl::Doc& doc) { + return doc.abstract; + } + + /** Get estimated total count in results */ virtual int getResCnt() = 0; + + /** Get title for result list */ virtual string title() {return m_title;} + + /** Get search terms (for highlighting abstracts). Some sequences + * may have no associated search terms. Implement this for them. */ virtual void getTerms(list<string>& t) {t.clear();} private: string m_title; }; - -/** A DocSequence from a Db query (there should be one active for this - to make sense */ -class DocSequenceDb : public DocSequence { - public: - DocSequenceDb(Rcl::Db *d, const string &t) : - DocSequence(t), m_db(d), m_rescnt(-1) - {} - virtual ~DocSequenceDb() {} - virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string * = 0); - virtual int getResCnt(); - virtual void getTerms(list<string>&); - - private: - Rcl::Db *m_db; - int m_rescnt; -}; - -/** A DocSequence coming from the history file */ -class DocSequenceHistory : public DocSequence { - public: - DocSequenceHistory(Rcl::Db *d, RclHistory *h, const string &t) - : DocSequence(t), m_db(d), m_hist(h), m_prevnum(-1), m_prevtime(-1) {} - virtual ~DocSequenceHistory() {} - - virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string *sh = 0); - virtual int getResCnt(); - private: - Rcl::Db *m_db; - RclHistory *m_hist; - int m_prevnum; - long m_prevtime; - - list<RclDHistoryEntry> m_hlist; - list<RclDHistoryEntry>::const_iterator m_it; -}; - #endif /* _DOCSEQ_H_INCLUDED_ */ diff --git a/src/query/docseqdb.cpp b/src/query/docseqdb.cpp new file mode 100644 index 00000000..4741f294 --- /dev/null +++ b/src/query/docseqdb.cpp @@ -0,0 +1,57 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: docseqdb.cpp,v 1.1 2007-01-19 10:32:39 dockes Exp $ (C) 2005 J.F.Dockes"; +#endif +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include <math.h> +#include <time.h> + +#include "docseqdb.h" +#include "rcldb.h" + +bool DocSequenceDb::getDoc(int num, Rcl::Doc &doc, int *percent, string *sh) +{ + if (sh) sh->erase(); + return m_db ? m_db->getDoc(num, doc, percent) : false; +} + +int DocSequenceDb::getResCnt() +{ + if (!m_db) + return -1; + if (m_rescnt < 0) { + m_rescnt= m_db->getResCnt(); + } + return m_rescnt; +} + +void DocSequenceDb::getTerms(list<string> &terms) +{ + if (!m_db) + return; + m_db->getQueryTerms(terms); +} + +string DocSequenceDb::getAbstract(Rcl::Doc &doc) +{ + if (!m_db) + return doc.abstract; + string abstract; + m_db->makeDocAbstract(doc, abstract); + return abstract.empty() ? doc.abstract : abstract; +} + diff --git a/src/query/docseqdb.h b/src/query/docseqdb.h new file mode 100644 index 00000000..8b9f9dc3 --- /dev/null +++ b/src/query/docseqdb.h @@ -0,0 +1,50 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _DOCSEQDB_H_INCLUDED_ +#define _DOCSEQDB_H_INCLUDED_ +/* @(#$Id: docseqdb.h,v 1.1 2007-01-19 10:32:39 dockes Exp $ (C) 2004 J.F.Dockes */ +#include <string> +#include <list> +#ifndef NO_NAMESPACES +using std::string; +using std::list; +#endif + +#include "docseq.h" +namespace Rcl { +class Db; +} + +/** A DocSequence from a Db query (there should be one active for this + to make sense */ +class DocSequenceDb : public DocSequence { + public: + DocSequenceDb(Rcl::Db *d, const string &t) : + DocSequence(t), m_db(d), m_rescnt(-1) + {} + virtual ~DocSequenceDb() {} + virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string * = 0); + virtual int getResCnt(); + virtual void getTerms(list<string>&); + virtual string getAbstract(Rcl::Doc &doc); + + private: + Rcl::Db *m_db; + int m_rescnt; +}; + +#endif /* _DOCSEQDB_H_INCLUDED_ */ diff --git a/src/query/docseqhist.cpp b/src/query/docseqhist.cpp new file mode 100644 index 00000000..872ebe70 --- /dev/null +++ b/src/query/docseqhist.cpp @@ -0,0 +1,68 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: docseqhist.cpp,v 1.1 2007-01-19 10:32:39 dockes Exp $ (C) 2005 J.F.Dockes"; +#endif +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include <math.h> +#include <time.h> + +#include "docseqhist.h" +#include "rcldb.h" + +bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, int *percent, + string *sh) +{ + // Retrieve history list + if (!m_hist) + return false; + if (m_hlist.empty()) + m_hlist = m_hist->getDocHistory(); + + if (num < 0 || num >= (int)m_hlist.size()) + return false; + int skip; + if (m_prevnum >= 0 && num >= m_prevnum) { + skip = num - m_prevnum; + } else { + skip = num; + m_it = m_hlist.begin(); + m_prevtime = -1; + } + m_prevnum = num; + while (skip--) + m_it++; + if (percent) + *percent = 100; + if (sh) { + if (m_prevtime < 0 || abs(m_prevtime - m_it->unixtime) > 86400) { + m_prevtime = m_it->unixtime; + time_t t = (time_t)(m_it->unixtime); + *sh = string(ctime(&t)); + // Get rid of the final \n in ctime + sh->erase(sh->length()-1); + } else + sh->erase(); + } + return m_db->getDoc(m_it->fn, m_it->ipath, doc, percent); +} + +int DocSequenceHistory::getResCnt() +{ + if (m_hlist.empty()) + m_hlist = m_hist->getDocHistory(); + return m_hlist.size(); +} diff --git a/src/query/docseqhist.h b/src/query/docseqhist.h new file mode 100644 index 00000000..b179f5bc --- /dev/null +++ b/src/query/docseqhist.h @@ -0,0 +1,55 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _DOCSEQHIST_H_INCLUDED_ +#define _DOCSEQHIST_H_INCLUDED_ +/* @(#$Id: docseqhist.h,v 1.1 2007-01-19 10:32:39 dockes Exp $ (C) 2004 J.F.Dockes */ +#include <string> +#include <list> +#ifndef NO_NAMESPACES +using std::string; +using std::list; +#endif + +#include "docseq.h" +#include "history.h" +namespace Rcl { +class Db; +} +class RclHistory; + +/** A DocSequence coming from the history file. + * History is kept as a list of urls. This queries the db to fetch + * metadata for an url key */ +class DocSequenceHistory : public DocSequence { + public: + DocSequenceHistory(Rcl::Db *d, RclHistory *h, const string &t) + : DocSequence(t), m_db(d), m_hist(h), m_prevnum(-1), m_prevtime(-1) {} + virtual ~DocSequenceHistory() {} + + virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string *sh = 0); + virtual int getResCnt(); + private: + Rcl::Db *m_db; + RclHistory *m_hist; + int m_prevnum; + long m_prevtime; + + list<RclDHistoryEntry> m_hlist; + list<RclDHistoryEntry>::const_iterator m_it; +}; + +#endif /* _DOCSEQ_H_INCLUDED_ */ diff --git a/src/query/sortseq.cpp b/src/query/sortseq.cpp index 4c53f460..195ad9bd 100644 --- a/src/query/sortseq.cpp +++ b/src/query/sortseq.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: sortseq.cpp,v 1.9 2006-12-05 15:18:48 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: sortseq.cpp,v 1.10 2007-01-19 10:32:39 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -84,7 +84,7 @@ public: DocSeqSorted::DocSeqSorted(RefCntr<DocSequence> iseq, DocSeqSortSpec &sortspec, const std::string &t) - : DocSequence(t) + : DocSequence(t), m_seq(iseq) { m_spec = sortspec; LOGDEB(("DocSeqSorted:: count %d\n", m_spec.sortwidth)); @@ -120,3 +120,8 @@ bool DocSeqSorted::getDoc(int num, Rcl::Doc &doc, int *percent, string *) doc = *m_docsp[num]; return true; } + +string DocSeqSorted::getAbstract(Rcl::Doc& doc) +{ + return m_seq->getAbstract(doc); +} diff --git a/src/query/sortseq.h b/src/query/sortseq.h index 16e8bd02..7f055bca 100644 --- a/src/query/sortseq.h +++ b/src/query/sortseq.h @@ -16,7 +16,7 @@ */ #ifndef _SORTSEQ_H_INCLUDED_ #define _SORTSEQ_H_INCLUDED_ -/* @(#$Id: sortseq.h,v 1.8 2006-12-05 15:18:48 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: sortseq.h,v 1.9 2007-01-19 10:32:39 dockes Exp $ (C) 2004 J.F.Dockes */ #include <vector> #include <string> @@ -48,10 +48,12 @@ class DocSeqSorted : public DocSequence { virtual ~DocSeqSorted() {} virtual bool getDoc(int num, Rcl::Doc &doc, int *percent, string *sh = 0); virtual int getResCnt() {return m_spec.sortwidth;} + virtual string getAbstract(Rcl::Doc& doc); private: - DocSeqSortSpec m_spec; - std::vector<Rcl::Doc> m_docs; + RefCntr<DocSequence> m_seq; + DocSeqSortSpec m_spec; + std::vector<Rcl::Doc> m_docs; std::vector<Rcl::Doc *> m_docsp; };