From a9200df71a93e17beda9048bdc42ed5c20561172 Mon Sep 17 00:00:00 2001 From: dockes Date: Sat, 29 Jan 2005 15:41:11 +0000 Subject: [PATCH] external viewer+ deleted doc purging --- src/index/recollindex.cpp | 131 ++++++++++++++----- src/internfile/mimehandler.cpp | 23 +++- src/internfile/mimehandler.h | 11 +- src/lib/Makefile | 5 +- src/qtgui/main.cpp | 23 ++-- src/qtgui/recollmain.ui | 23 ++-- src/qtgui/recollmain.ui.h | 224 ++++++++++++++++++++------------- src/rcldb/rcldb.cpp | 35 ++++-- src/rcldb/rcldb.h | 3 +- src/utils/Makefile | 9 +- 10 files changed, 329 insertions(+), 158 deletions(-) diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index a9746a8a..d515bb32 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.6 2005-01-26 13:03:02 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -7,6 +7,8 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.6 2005-01-26 13:03:02 dockes #include #include +#include +#include #include "pathut.h" #include "conftree.h" @@ -30,39 +32,51 @@ using namespace std; class DirIndexer { FsTreeWalker walker; RclConfig *config; - string topdir; + list *topdirs; string dbdir; Rcl::Db db; public: - DirIndexer(RclConfig *cnf, const string &dbd, const string &top) - : config(cnf), topdir(top), dbdir(dbd) + DirIndexer(RclConfig *cnf, const string &dbd, list *top) + : config(cnf), topdirs(top), dbdir(dbd) { } friend FsTreeWalker::Status indexfile(void *, const std::string &, const struct stat *, FsTreeWalker::CbFlag); - void index(); + bool index(); }; -void DirIndexer::index() +bool DirIndexer::index() { if (!db.open(dbdir, Rcl::Db::DbUpd)) { - cerr << "Error opening database in " << dbdir << " for " << - topdir << endl; - return; + LOGERR(("DirIndexer::index: error opening database in %s\n", + dbdir.c_str())); + return false; } - walker.walk(topdir, indexfile, this); + for (list::const_iterator it = topdirs->begin(); + it != topdirs->end(); it++) { + LOGDEB(("DirIndexer::index: Indexing %s into %s\n", it->c_str(), + dbdir.c_str())); + if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) { + LOGERR(("DirIndexer::index: error while indexing %s\n", + it->c_str())); + db.close(); + return false; + } + } + db.purge(); if (!db.close()) { - cerr << "Error closing database in " << dbdir << " for " << - topdir << endl; - return; + LOGERR(("DirIndexer::index: error closing database in %s\n", + dbdir.c_str())); + return false; } + return true; } /** * This function gets called for every file and directory found by the - * tree walker. It checks with the db is the file has changed and needs to + * tree walker. It checks with the db if the file has changed and needs to * be reindexed. If so, it calls an appropriate handler depending on the mime * type, which is responsible for populating an Rcl::Doc. * Accent and majuscule handling are performed by the db module when doing @@ -119,34 +133,89 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp, return FsTreeWalker::FtwOk; } +DirIndexer *indexer; + +static void cleanup() +{ + delete indexer; + indexer = 0; +} + +static void sigcleanup(int sig) +{ + fprintf(stderr, "sigcleanup\n"); + cleanup(); + exit(1); +} int main(int argc, const char **argv) { - RclConfig *config = new RclConfig; + atexit(cleanup); + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) + signal(SIGHUP, sigcleanup); + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, sigcleanup); + if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) + signal(SIGQUIT, sigcleanup); + if (signal(SIGTERM, SIG_IGN) != SIG_IGN) + signal(SIGTERM, sigcleanup); - if (!config->ok()) + RclConfig config; + if (!config.ok()) cerr << "Config could not be built" << endl; - ConfTree *conf = config->getConfig(); - + ConfTree *conf = config.getConfig(); + + // Retrieve the list of directories to be indexed. string topdirs; if (conf->get("topdirs", topdirs, "") == 0) { cerr << "No top directories in configuration" << endl; exit(1); } - vector tdl; - if (ConfTree::stringToStrings(topdirs, tdl)) { - for (unsigned int i = 0; i < tdl.size(); i++) { - string topdir = tdl[i]; - cout << topdir << endl; - string dbdir; - if (conf->get("dbdir", dbdir, topdir) == 0) { - cerr << "No database directory in configuration for " - << topdir << endl; - exit(1); - } - DirIndexer indexer(config, dbdir, topdir); - indexer.index(); + + // Group the directories by database: it is important that all + // directories for a database be indexed at once so that deleted + // file cleanup works + vector tdl; // List of directories to be indexed + if (!ConfTree::stringToStrings(topdirs, tdl)) { + cerr << "Parse error for directory list" << endl; + exit(1); + } + + vector::iterator dirit; + map > dbmap; + map >::iterator dbit; + for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) { + string db; + if (conf->get("dbdir", db, *dirit) == 0) { + cerr << "No database directory in configuration for " + << *dirit << endl; + exit(1); + } + dbit = dbmap.find(db); + if (dbit == dbmap.end()) { + list l; + l.push_back(*dirit); + dbmap[db] = l; + } else { + dbit->second.push_back(*dirit); } } + + for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) { + cout << dbit->first << " -> "; + list::const_iterator dit; + for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) { + cout << *dit << " "; + } + cout << endl; + indexer = new DirIndexer(&config, dbit->first, &dbit->second); + if (!indexer->index()) { + delete indexer; + indexer = 0; + exit(1); + } + delete indexer; + indexer = 0; + } } diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 53180539..96ff9579 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.3 2005-01-26 13:03:02 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.4 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -11,6 +11,7 @@ using namespace std; #include "csguess.h" #include "transcode.h" #include "debuglog.h" +#include "smallut.h" bool textPlainToDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout) @@ -66,8 +67,10 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) { // Return handler definition for mime type string hs; - if (!mhandlers->get(mtype, hs, "")) + if (!mhandlers->get(mtype, hs, "index")) { + LOGDEB(("getMimeHandler: no handler for %s\n", mtype.c_str())); return 0; + } // Break definition into type and name vector toks; @@ -78,7 +81,7 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) } // Retrieve handler function according to type - if (!strcasecmp(toks[0].c_str(), "internal")) { + if (!stringlowercmp("internal", toks[0])) { map::const_iterator it = ihandlers.find(mtype); if (it == ihandlers.end()) { @@ -87,11 +90,11 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) return 0; } return it->second; - } else if (!strcasecmp(toks[0].c_str(), "dll")) { + } else if (!stringlowercmp("dll", toks[0])) { if (toks.size() != 2) return 0; return 0; - } else if (!strcasecmp(toks[0].c_str(), "exec")) { + } else if (!stringlowercmp("exec", toks[0])) { if (toks.size() != 2) return 0; return 0; @@ -99,3 +102,13 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) return 0; } } + +/** + * Return external viewer exec string for given mime type + */ +string getMimeViewer(const std::string &mtype, ConfTree *mhandlers) +{ + string hs; + mhandlers->get(mtype, hs, "view"); + return hs; +} diff --git a/src/internfile/mimehandler.h b/src/internfile/mimehandler.h index 4cedb41c..9542ef95 100644 --- a/src/internfile/mimehandler.h +++ b/src/internfile/mimehandler.h @@ -1,6 +1,6 @@ #ifndef _MIMEHANDLER_H_INCLUDED_ #define _MIMEHANDLER_H_INCLUDED_ -/* @(#$Id: mimehandler.h,v 1.2 2005-01-26 11:47:27 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mimehandler.h,v 1.3 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -11,9 +11,18 @@ typedef bool (*MimeHandlerFunc)(RclConfig *, const std::string &, const std::string &, Rcl::Doc&); +/** + * Return indexing handler function for given mime type + */ extern MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers); +/** + * Return external viewer exec string for given mime type + */ +extern string getMimeViewer(const std::string &mtype, + ConfTree *mhandlers); + extern bool textHtmlToDoc(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout); diff --git a/src/lib/Makefile b/src/lib/Makefile index e1a437e2..055ab798 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -10,7 +10,7 @@ all: $(LIBS) OBJS = conftree.o csguess.o debuglog.o \ fstreewalk.o html.o htmlparse.o \ mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \ - rclconfig.o rcldb.o readfile.o \ + rclconfig.o rcldb.o readfile.o smallut.o \ textsplit.o transcode.o \ unacpp.o unac.o SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \ @@ -18,6 +18,7 @@ SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \ ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \ ../common/myhtmlparse.cpp ../utils/pathut.cpp \ ../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \ + ../utils/smallut.cpp \ ../common/textsplit.cpp ../utils/transcode.cpp \ ../common/unacpp.cpp ../unac/unac.c @@ -56,6 +57,8 @@ rcldb.o : ../common/rcldb.cpp $(CXX) $(CXXFLAGS) -c $< readfile.o : ../utils/readfile.cpp $(CXX) $(CXXFLAGS) -c $< +smallut.o : ../utils/smallut.cpp + $(CXX) $(CXXFLAGS) -c $< textsplit.o : ../common/textsplit.cpp $(CXX) $(CXXFLAGS) -c $< transcode.o : ../utils/transcode.cpp diff --git a/src/qtgui/main.cpp b/src/qtgui/main.cpp index 1c41a359..dfbef209 100644 --- a/src/qtgui/main.cpp +++ b/src/qtgui/main.cpp @@ -29,6 +29,17 @@ int main( int argc, char ** argv ) w.show(); a.connect( &a, SIGNAL( lastWindowClosed() ), &a, SLOT( quit() ) ); + atexit(cleanup); + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) + signal(SIGHUP, sigcleanup); + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, sigcleanup); + if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) + signal(SIGQUIT, sigcleanup); + if (signal(SIGTERM, SIG_IGN) != SIG_IGN) + signal(SIGTERM, sigcleanup); + + rclconfig = new RclConfig; if (!rclconfig || !rclconfig->ok()) { QMessageBox::critical(0, "Recoll", @@ -52,17 +63,5 @@ int main( int argc, char ** argv ) QString(dbdir)); exit(1); } - atexit(cleanup); - if (signal(SIGHUP, SIG_IGN) != SIG_IGN) - signal(SIGHUP, sigcleanup); - if (signal(SIGINT, SIG_IGN) != SIG_IGN) - signal(SIGINT, sigcleanup); - if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) - signal(SIGQUIT, sigcleanup); - if (signal(SIGTERM, SIG_IGN) != SIG_IGN) - signal(SIGTERM, sigcleanup); - - - return a.exec(); } diff --git a/src/qtgui/recollmain.ui b/src/qtgui/recollmain.ui index 47efb043..63e3957b 100644 --- a/src/qtgui/recollmain.ui +++ b/src/qtgui/recollmain.ui @@ -106,7 +106,7 @@ - resTextEdit + reslistTE @@ -220,12 +220,6 @@ RecollMain fileExit() - - resTextEdit - clicked(int,int) - RecollMain - resTextEdit_clicked(int,int) - fileExitAction activated() @@ -256,6 +250,18 @@ RecollMain listNextPB_clicked() + + reslistTE + doubleClicked(int,int) + RecollMain + reslistTE_doubleClicked(int,int) + + + reslistTE + clicked(int,int) + RecollMain + reslistTE_clicked(int,int) + recollmain.ui.h @@ -266,7 +272,8 @@ fileExit() - resTextEdit_clicked( int par, int car ) + reslistTE_doubleClicked( int par, int car ) + reslistTE_clicked( int par, int car ) queryText_returnPressed() Search_clicked() listPrevPB_clicked() diff --git a/src/qtgui/recollmain.ui.h b/src/qtgui/recollmain.ui.h index 7333648d..4130b230 100644 --- a/src/qtgui/recollmain.ui.h +++ b/src/qtgui/recollmain.ui.h @@ -10,22 +10,30 @@ ** destructor. *****************************************************************************/ -void RecollMain::fileExit() -{ - exit(0); -} - +#include +#include +#include +#include #include +#include #include "rcldb.h" #include "rclconfig.h" #include "debuglog.h" #include "mimehandler.h" +#include "pathut.h" extern RclConfig *rclconfig; extern Rcl::Db *rcldb; + +void RecollMain::fileExit() +{ + exit(0); +} + + static string plaintorich(const string &in) { string out = "

"; @@ -35,95 +43,144 @@ static string plaintorich(const string &in) } else { out += in[i]; } - if (i == 10) { - out += ""; - } - if (i == 20) { - out += ""; - } - } return out; } -// Click in the result list window: display preview for selected document, -// and highlight entry. The paragraph number is doc number in window + 1 -void RecollMain::resTextEdit_clicked(int par, int car) +static string urltolocalpath(string url) { - LOGDEB(("RecollMain::resTextEdi_clicked: par %d, char %d\n", par, car)); + return url.substr(7, string::npos); +} + +// Use external viewer to display file +void RecollMain::reslistTE_doubleClicked(int par, int car) +{ + // restlistTE_clicked(par, car); + Rcl::Doc doc; + int reldocnum = par - 1; + if (!rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) + return; + + // Look for appropriate viewer + string cmd = getMimeViewer(doc.mimetype, rclconfig->getMimeConf()); + if (cmd.length() == 0) { + QMessageBox::warning(0, "Recoll", QString("No viewer for mime type ") + + doc.mimetype.c_str()); + return; + } + + string fn = urltolocalpath(doc.url); + // substitute + string ncmd; + string::const_iterator it1; + for (it1 = cmd.begin(); it1 != cmd.end();it1++) { + if (*it1 == '%') { + if (++it1 == cmd.end()) { + ncmd += '%'; + break; + } + if (*it1 == '%') + ncmd += '%'; + if (*it1 == 'u') + ncmd += doc.url; + if (*it1 == 'f') + ncmd += fn; + } else { + ncmd += *it1; + } + } + + ncmd += " &"; + LOGDEB(("Executing: '%s'\n", ncmd.c_str())); + system(ncmd.c_str()); +} + +// Display preview for the selected document, and highlight entry. The +// paragraph number is doc number in window + 1 +void RecollMain::reslistTE_clicked(int par, int car) +{ + LOGDEB(("RecollMain::reslistTE_clicked: par %d, char %d\n", par, car)); if (reslist_winfirst == -1) return; + + // If same doc, don't bother redisplaying + if (reslist_current == par - 1) + return; + Rcl::Doc doc; - doc.erase(); if (reslist_current != -1) { QColor color("white"); - resTextEdit->setParagraphBackgroundColor(reslist_current+1, color); + reslistTE->setParagraphBackgroundColor(reslist_current+1, color); } QColor color("lightblue"); - resTextEdit->setParagraphBackgroundColor(par, color); + reslistTE->setParagraphBackgroundColor(par, color); - int reldocnum = par-1; + int reldocnum = par - 1; reslist_current = reldocnum; previewTextEdit->clear(); - if (rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) { + if (!rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) { + QMessageBox::warning(0, "Recoll", + QString("Can't retrieve document from database")); + return; + } - // Go to the file system to retrieve / convert the document text - // for preview: + // Go to the file system to retrieve / convert the document text + // for preview: - // Look for appropriate handler - MimeHandlerFunc fun = - getMimeHandler(doc.mimetype, rclconfig->getMimeConf()); - if (!fun) { - QMessageBox::warning(0, "Recoll", - QString("No mime handler for mime type ") + - doc.mimetype.c_str()); - return; - } + // Look for appropriate handler + MimeHandlerFunc fun = + getMimeHandler(doc.mimetype, rclconfig->getMimeConf()); + if (!fun) { + QMessageBox::warning(0, "Recoll", + QString("No mime handler for mime type ") + + doc.mimetype.c_str()); + return; + } - string fn = doc.url.substr(6, string::npos); - Rcl::Doc fdoc; - if (!fun(rclconfig, fn, doc.mimetype, fdoc)) { - QMessageBox::warning(0, "Recoll", - QString("Failed to convert document for preview!\n") + - fn.c_str() + " mimetype " + - doc.mimetype.c_str()); - return; - } + string fn = urltolocalpath(doc.url); + Rcl::Doc fdoc; + if (!fun(rclconfig, fn, doc.mimetype, fdoc)) { + QMessageBox::warning(0, "Recoll", + QString("Failed to convert document for preview!\n") + + fn.c_str() + " mimetype " + + doc.mimetype.c_str()); + return; + } - string rich = plaintorich(fdoc.text); + string rich = plaintorich(fdoc.text); #if 0 - //Highlighting; pass a list of (search term, style name) to plaintorich - // and create the corresponding styles with different colors here - // We need to : - // - Break the query into terms : wait for the query analyzer - // - Break the text into words. This should use a version of - // textsplit with an option to keep the punctuation (see how to do - // this). We do want the same splitter code to be used here and - // when indexing. - QStyleSheetItem *item = - new QStyleSheetItem( previewTextEdit->styleSheet(), "mytag" ); - item->setColor("red"); - item->setFontWeight(QFont::Bold); + //Highlighting; pass a list of (search term, style name) to plaintorich + // and create the corresponding styles with different colors here + // We need to : + // - Break the query into terms : wait for the query analyzer + // - Break the text into words. This should use a version of + // textsplit with an option to keep the punctuation (see how to do + // this). We do want the same splitter code to be used here and + // when indexing. + QStyleSheetItem *item = + new QStyleSheetItem( previewTextEdit->styleSheet(), "mytag" ); + item->setColor("red"); + item->setFontWeight(QFont::Bold); #endif - QString str = QString::fromUtf8(rich.c_str(), rich.length()); - previewTextEdit->setTextFormat(RichText); - previewTextEdit->setText(str); - } + QString str = QString::fromUtf8(rich.c_str(), rich.length()); + previewTextEdit->setTextFormat(RichText); + previewTextEdit->setText(str); } -#include "pathut.h" +// User asked to start query void RecollMain::queryText_returnPressed() { LOGDEB(("RecollMain::queryText_returnPressed()\n")); reslist_current = -1; reslist_winfirst = -1; - string rawq = queryText->text(); - rcldb->setQuery(rawq); + QCString u8 = queryText->text().utf8(); + + rcldb->setQuery(string((const char *)u8)); listNextPB_clicked(); } @@ -145,6 +202,7 @@ void RecollMain::listPrevPB_clicked() #define MIN(A,B) ((A) < (B) ? (A) : (B)) #endif +// Fill up result list window with next screen of hits void RecollMain::listNextPB_clicked() { LOGDEB(("listNextPB_clicked: winfirst %d\n", reslist_winfirst)); @@ -165,34 +223,22 @@ void RecollMain::listNextPB_clicked() int resCnt = rcldb->getResCnt(); int last = MIN(resCnt, reslist_winfirst+respagesize); if (i == 0) { - resTextEdit->clear(); + reslistTE->clear(); previewTextEdit->clear(); - resTextEdit->append("

"); + reslistTE->append("

"); char line[80]; sprintf(line, "

Displaying results %d-%d out of %d
", reslist_winfirst+1, last, resCnt); - resTextEdit->append(line); + reslistTE->append(line); } gotone = true; - LOGDEB1(("Url: %s\n", doc.url.c_str())); - LOGDEB1(("Mimetype: %s\n", doc.mimetype.c_str())); - LOGDEB1(("Mtime: %s\n", doc.mtime.c_str())); - LOGDEB1(("Origcharset: %s\n", doc.origcharset.c_str())); - LOGDEB1(("Title: %s\n", doc.title.c_str())); - LOGDEB1(("Text: %s\n", doc.text.c_str())); - LOGDEB1(("Keywords: %s\n", doc.keywords.c_str())); - LOGDEB1(("Abstract: %s\n", doc.abstract.c_str())); - - // Result list display. Standard Omega includes: - // - title or simple file name or url - // - abstract and keywords - // - url - // - relevancy percentage + keywords matched - // - date de modification - // - langue - // - taille + // Result list display: TOBEDONE + // - move abstract/keywords to Detail window ? + // - keywords matched + // - language + // - size char perbuf[10]; sprintf(perbuf, "%3d%%", percent); if (doc.title.empty()) @@ -202,27 +248,27 @@ void RecollMain::listNextPB_clicked() if (!doc.mtime.empty()) { time_t mtime = atol(doc.mtime.c_str()); struct tm *tm = localtime(&mtime); - strftime(datebuf, 99, "Modified: %F %T", tm); + strftime(datebuf, 99, "Modified: %F %T", tm); } - string result = "

" + string(perbuf) + " " + doc.title + "
" + + doc.mimetype + " " + (!doc.mtime.empty() ? string(datebuf) + "
" : string("")) + (!doc.abstract.empty() ? doc.abstract + "
" : string("")) + (!doc.keywords.empty() ? doc.keywords + "
" : string("")) + "" + doc.url + +"
" + "

"; - QString str = QString::fromUtf8(result.c_str(), result.length()); - resTextEdit->append(str); + QString str = QString::fromUtf8(result.c_str(), result.length()); + reslistTE->append(str); } if (gotone) { - resTextEdit->append("
"); - resTextEdit->setCursorPosition(0,0); - resTextEdit->ensureCursorVisible(); + reslistTE->append(""); + reslistTE->setCursorPosition(0,0); + reslistTE->ensureCursorVisible(); // Display preview for 1st doc in list - resTextEdit_clicked(1, 0); + reslistTE_clicked(1, 0); } else { // Restore first in win parameter that we shouln't have incremented reslist_winfirst -= respagesize; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 0b30ad05..db501287 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.12 2005-01-28 15:25:40 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.13 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -263,7 +263,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) // - sample // - caption (title limited to 100 chars) // - mime type - string record = "url=file:/" + fn; + string record = "url=file://" + fn; record += "\nmtype=" + doc.mimetype; record += "\nmtime=" + doc.mtime; record += "\norigcharset=" + doc.origcharset; @@ -277,18 +277,14 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) // If this document has already been indexed, update the existing // entry. try { -#if 0 Xapian::docid did = -#endif ndb->wdb.replace_document(pathterm, newdocument); -#if 0 if (did < ndb->updated.size()) { ndb->updated[did] = true; LOGDEB(("%s updated\n", fnc)); } else { LOGDEB(("%s added\n", fnc)); } -#endif } catch (...) { // FIXME: is this ever actually needed? ndb->wdb.add_document(newdocument); @@ -313,9 +309,8 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp) if (did == ndb->wdb.postlist_end(pathterm)) return true; Xapian::Document doc = ndb->wdb.get_document(*did); -#if 0 - ndb->updated[*did] = true; -#endif + if (*did < ndb->updated.size()) + ndb->updated[*did] = true; string data = doc.get_data(); //cerr << "DOCUMENT EXISTS " << data << endl; const char *cp = strstr(data.c_str(), "mtime="); @@ -332,6 +327,27 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp) return true; } +bool Rcl::Db::purge() +{ + if (pdata == 0) + return false; + Native *ndb = (Native *)pdata; + if (ndb->isopen == false || ndb->iswritable == false) + return false; + + for (Xapian::docid did = 1; did < ndb->updated.size(); ++did) { + if (!ndb->updated[did]) { + try { + ndb->wdb.delete_document(did); + LOGDEB(("Rcl::Db::purge: deleted document #%d\n", did)); + } catch (const Xapian::DocNotFoundError &) { + } + } + } + return true; +} + + #include class wsQData { @@ -369,6 +385,7 @@ bool Rcl::Db::setQuery(const std::string &querystring) ndb->mset = Xapian::MSet(); return true; } + int Rcl::Db::getResCnt() { Native *ndb = (Native *)pdata; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 335edb04..5af9eaf4 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -1,6 +1,6 @@ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.6 2005-01-28 15:25:40 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -65,6 +65,7 @@ class Db { // Update-related functions bool add(const std::string &filename, const Doc &doc); bool needUpdate(const std::string &filename, const struct stat *stp); + bool purge(); // Query-related functions diff --git a/src/utils/Makefile b/src/utils/Makefile index 0697fd95..476c104a 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -3,7 +3,7 @@ CXXFLAGS = -I. BIGLIB = ../lib/librcl.a -PROGS = trfstreewalk trpathut execmd transcode trmimeparse +PROGS = smallut trfstreewalk trpathut execmd transcode trmimeparse all: $(PROGS) FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o @@ -38,5 +38,12 @@ mimeparse : $(MIMEPARSE_OBJS) trmimeparse.o : ../utils/mimeparse.cpp $(CXX) $(CXXFLAGS) -DTEST_MIMEPARSE -c -o trmimeparse.o \ mimeparse.cpp +SMALLUT_OBJS= trsmallut.o $(BIGLIB) +smallut : $(SMALLUT_OBJS) + $(CXX) $(CXXFLAGS) -o smallut $(SMALLUT_OBJS) \ + -L/usr/local/lib -liconv +trsmallut.o : ../utils/smallut.cpp + $(CXX) $(CXXFLAGS) -DTEST_SMALLUT -c -o trsmallut.o \ + smallut.cpp clean: rm -f *.o $(PROGS)