external viewer+ deleted doc purging

This commit is contained in:
dockes 2005-01-29 15:41:11 +00:00
parent 82334f2957
commit a9200df71a
10 changed files with 329 additions and 158 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.6 2005-01-26 13:03:02 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
@ -7,6 +7,8 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.6 2005-01-26 13:03:02 dockes
#include <strings.h>
#include <iostream>
#include <list>
#include <map>
#include "pathut.h"
#include "conftree.h"
@ -30,39 +32,51 @@ using namespace std;
class DirIndexer {
FsTreeWalker walker;
RclConfig *config;
string topdir;
list<string> *topdirs;
string dbdir;
Rcl::Db db;
public:
DirIndexer(RclConfig *cnf, const string &dbd, const string &top)
: config(cnf), topdir(top), dbdir(dbd)
DirIndexer(RclConfig *cnf, const string &dbd, list<string> *top)
: config(cnf), topdirs(top), dbdir(dbd)
{ }
friend FsTreeWalker::Status
indexfile(void *, const std::string &, const struct stat *,
FsTreeWalker::CbFlag);
void index();
bool index();
};
void DirIndexer::index()
bool DirIndexer::index()
{
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
cerr << "Error opening database in " << dbdir << " for " <<
topdir << endl;
return;
LOGERR(("DirIndexer::index: error opening database in %s\n",
dbdir.c_str()));
return false;
}
walker.walk(topdir, indexfile, this);
for (list<string>::const_iterator it = topdirs->begin();
it != topdirs->end(); it++) {
LOGDEB(("DirIndexer::index: Indexing %s into %s\n", it->c_str(),
dbdir.c_str()));
if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) {
LOGERR(("DirIndexer::index: error while indexing %s\n",
it->c_str()));
db.close();
return false;
}
}
db.purge();
if (!db.close()) {
cerr << "Error closing database in " << dbdir << " for " <<
topdir << endl;
return;
LOGERR(("DirIndexer::index: error closing database in %s\n",
dbdir.c_str()));
return false;
}
return true;
}
/**
* This function gets called for every file and directory found by the
* tree walker. It checks with the db is the file has changed and needs to
* tree walker. It checks with the db if the file has changed and needs to
* be reindexed. If so, it calls an appropriate handler depending on the mime
* type, which is responsible for populating an Rcl::Doc.
* Accent and majuscule handling are performed by the db module when doing
@ -119,34 +133,89 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
return FsTreeWalker::FtwOk;
}
DirIndexer *indexer;
static void cleanup()
{
delete indexer;
indexer = 0;
}
static void sigcleanup(int sig)
{
fprintf(stderr, "sigcleanup\n");
cleanup();
exit(1);
}
int main(int argc, const char **argv)
{
RclConfig *config = new RclConfig;
atexit(cleanup);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, sigcleanup);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, sigcleanup);
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
signal(SIGQUIT, sigcleanup);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
signal(SIGTERM, sigcleanup);
if (!config->ok())
RclConfig config;
if (!config.ok())
cerr << "Config could not be built" << endl;
ConfTree *conf = config->getConfig();
ConfTree *conf = config.getConfig();
// Retrieve the list of directories to be indexed.
string topdirs;
if (conf->get("topdirs", topdirs, "") == 0) {
cerr << "No top directories in configuration" << endl;
exit(1);
}
vector<string> tdl;
if (ConfTree::stringToStrings(topdirs, tdl)) {
for (unsigned int i = 0; i < tdl.size(); i++) {
string topdir = tdl[i];
cout << topdir << endl;
string dbdir;
if (conf->get("dbdir", dbdir, topdir) == 0) {
cerr << "No database directory in configuration for "
<< topdir << endl;
exit(1);
}
DirIndexer indexer(config, dbdir, topdir);
indexer.index();
// Group the directories by database: it is important that all
// directories for a database be indexed at once so that deleted
// file cleanup works
vector<string> tdl; // List of directories to be indexed
if (!ConfTree::stringToStrings(topdirs, tdl)) {
cerr << "Parse error for directory list" << endl;
exit(1);
}
vector<string>::iterator dirit;
map<string, list<string> > dbmap;
map<string, list<string> >::iterator dbit;
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
string db;
if (conf->get("dbdir", db, *dirit) == 0) {
cerr << "No database directory in configuration for "
<< *dirit << endl;
exit(1);
}
dbit = dbmap.find(db);
if (dbit == dbmap.end()) {
list<string> l;
l.push_back(*dirit);
dbmap[db] = l;
} else {
dbit->second.push_back(*dirit);
}
}
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
cout << dbit->first << " -> ";
list<string>::const_iterator dit;
for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) {
cout << *dit << " ";
}
cout << endl;
indexer = new DirIndexer(&config, dbit->first, &dbit->second);
if (!indexer->index()) {
delete indexer;
indexer = 0;
exit(1);
}
delete indexer;
indexer = 0;
}
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.3 2005-01-26 13:03:02 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.4 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <iostream>
@ -11,6 +11,7 @@ using namespace std;
#include "csguess.h"
#include "transcode.h"
#include "debuglog.h"
#include "smallut.h"
bool textPlainToDoc(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
@ -66,8 +67,10 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
{
// Return handler definition for mime type
string hs;
if (!mhandlers->get(mtype, hs, ""))
if (!mhandlers->get(mtype, hs, "index")) {
LOGDEB(("getMimeHandler: no handler for %s\n", mtype.c_str()));
return 0;
}
// Break definition into type and name
vector<string> toks;
@ -78,7 +81,7 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
}
// Retrieve handler function according to type
if (!strcasecmp(toks[0].c_str(), "internal")) {
if (!stringlowercmp("internal", toks[0])) {
map<string, MimeHandlerFunc>::const_iterator it =
ihandlers.find(mtype);
if (it == ihandlers.end()) {
@ -87,11 +90,11 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
return 0;
}
return it->second;
} else if (!strcasecmp(toks[0].c_str(), "dll")) {
} else if (!stringlowercmp("dll", toks[0])) {
if (toks.size() != 2)
return 0;
return 0;
} else if (!strcasecmp(toks[0].c_str(), "exec")) {
} else if (!stringlowercmp("exec", toks[0])) {
if (toks.size() != 2)
return 0;
return 0;
@ -99,3 +102,13 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
return 0;
}
}
/**
* Return external viewer exec string for given mime type
*/
string getMimeViewer(const std::string &mtype, ConfTree *mhandlers)
{
string hs;
mhandlers->get(mtype, hs, "view");
return hs;
}

View File

@ -1,6 +1,6 @@
#ifndef _MIMEHANDLER_H_INCLUDED_
#define _MIMEHANDLER_H_INCLUDED_
/* @(#$Id: mimehandler.h,v 1.2 2005-01-26 11:47:27 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mimehandler.h,v 1.3 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -11,9 +11,18 @@
typedef bool (*MimeHandlerFunc)(RclConfig *, const std::string &,
const std::string &, Rcl::Doc&);
/**
* Return indexing handler function for given mime type
*/
extern MimeHandlerFunc getMimeHandler(const std::string &mtype,
ConfTree *mhandlers);
/**
* Return external viewer exec string for given mime type
*/
extern string getMimeViewer(const std::string &mtype,
ConfTree *mhandlers);
extern bool textHtmlToDoc(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout);

View File

@ -10,7 +10,7 @@ all: $(LIBS)
OBJS = conftree.o csguess.o debuglog.o \
fstreewalk.o html.o htmlparse.o \
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
rclconfig.o rcldb.o readfile.o \
rclconfig.o rcldb.o readfile.o smallut.o \
textsplit.o transcode.o \
unacpp.o unac.o
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
@ -18,6 +18,7 @@ SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
../common/myhtmlparse.cpp ../utils/pathut.cpp \
../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \
../utils/smallut.cpp \
../common/textsplit.cpp ../utils/transcode.cpp \
../common/unacpp.cpp ../unac/unac.c
@ -56,6 +57,8 @@ rcldb.o : ../common/rcldb.cpp
$(CXX) $(CXXFLAGS) -c $<
readfile.o : ../utils/readfile.cpp
$(CXX) $(CXXFLAGS) -c $<
smallut.o : ../utils/smallut.cpp
$(CXX) $(CXXFLAGS) -c $<
textsplit.o : ../common/textsplit.cpp
$(CXX) $(CXXFLAGS) -c $<
transcode.o : ../utils/transcode.cpp

View File

@ -29,6 +29,17 @@ int main( int argc, char ** argv )
w.show();
a.connect( &a, SIGNAL( lastWindowClosed() ), &a, SLOT( quit() ) );
atexit(cleanup);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, sigcleanup);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, sigcleanup);
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
signal(SIGQUIT, sigcleanup);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
signal(SIGTERM, sigcleanup);
rclconfig = new RclConfig;
if (!rclconfig || !rclconfig->ok()) {
QMessageBox::critical(0, "Recoll",
@ -52,17 +63,5 @@ int main( int argc, char ** argv )
QString(dbdir));
exit(1);
}
atexit(cleanup);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, sigcleanup);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, sigcleanup);
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
signal(SIGQUIT, sigcleanup);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
signal(SIGTERM, sigcleanup);
return a.exec();
}

View File

@ -106,7 +106,7 @@
</property>
<widget class="QTextEdit">
<property name="name">
<cstring>resTextEdit</cstring>
<cstring>reslistTE</cstring>
</property>
<property name="sizePolicy">
<sizepolicy>
@ -220,12 +220,6 @@
<receiver>RecollMain</receiver>
<slot>fileExit()</slot>
</connection>
<connection>
<sender>resTextEdit</sender>
<signal>clicked(int,int)</signal>
<receiver>RecollMain</receiver>
<slot>resTextEdit_clicked(int,int)</slot>
</connection>
<connection>
<sender>fileExitAction</sender>
<signal>activated()</signal>
@ -256,6 +250,18 @@
<receiver>RecollMain</receiver>
<slot>listNextPB_clicked()</slot>
</connection>
<connection>
<sender>reslistTE</sender>
<signal>doubleClicked(int,int)</signal>
<receiver>RecollMain</receiver>
<slot>reslistTE_doubleClicked(int,int)</slot>
</connection>
<connection>
<sender>reslistTE</sender>
<signal>clicked(int,int)</signal>
<receiver>RecollMain</receiver>
<slot>reslistTE_clicked(int,int)</slot>
</connection>
</connections>
<includes>
<include location="local" impldecl="in implementation">recollmain.ui.h</include>
@ -266,7 +272,8 @@
</variables>
<slots>
<slot>fileExit()</slot>
<slot>resTextEdit_clicked( int par, int car )</slot>
<slot>reslistTE_doubleClicked( int par, int car )</slot>
<slot>reslistTE_clicked( int par, int car )</slot>
<slot>queryText_returnPressed()</slot>
<slot>Search_clicked()</slot>
<slot>listPrevPB_clicked()</slot>

View File

@ -10,22 +10,30 @@
** destructor.
*****************************************************************************/
void RecollMain::fileExit()
{
exit(0);
}
#include <regex.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <qmessagebox.h>
#include <qcstring.h>
#include "rcldb.h"
#include "rclconfig.h"
#include "debuglog.h"
#include "mimehandler.h"
#include "pathut.h"
extern RclConfig *rclconfig;
extern Rcl::Db *rcldb;
void RecollMain::fileExit()
{
exit(0);
}
static string plaintorich(const string &in)
{
string out = "<qt><head><title></title></head><body><p>";
@ -35,95 +43,144 @@ static string plaintorich(const string &in)
} else {
out += in[i];
}
if (i == 10) {
out += "<mytag>";
}
if (i == 20) {
out += "</mytag>";
}
}
return out;
}
// Click in the result list window: display preview for selected document,
// and highlight entry. The paragraph number is doc number in window + 1
void RecollMain::resTextEdit_clicked(int par, int car)
static string urltolocalpath(string url)
{
LOGDEB(("RecollMain::resTextEdi_clicked: par %d, char %d\n", par, car));
return url.substr(7, string::npos);
}
// Use external viewer to display file
void RecollMain::reslistTE_doubleClicked(int par, int car)
{
// restlistTE_clicked(par, car);
Rcl::Doc doc;
int reldocnum = par - 1;
if (!rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0))
return;
// Look for appropriate viewer
string cmd = getMimeViewer(doc.mimetype, rclconfig->getMimeConf());
if (cmd.length() == 0) {
QMessageBox::warning(0, "Recoll", QString("No viewer for mime type ") +
doc.mimetype.c_str());
return;
}
string fn = urltolocalpath(doc.url);
// substitute
string ncmd;
string::const_iterator it1;
for (it1 = cmd.begin(); it1 != cmd.end();it1++) {
if (*it1 == '%') {
if (++it1 == cmd.end()) {
ncmd += '%';
break;
}
if (*it1 == '%')
ncmd += '%';
if (*it1 == 'u')
ncmd += doc.url;
if (*it1 == 'f')
ncmd += fn;
} else {
ncmd += *it1;
}
}
ncmd += " &";
LOGDEB(("Executing: '%s'\n", ncmd.c_str()));
system(ncmd.c_str());
}
// Display preview for the selected document, and highlight entry. The
// paragraph number is doc number in window + 1
void RecollMain::reslistTE_clicked(int par, int car)
{
LOGDEB(("RecollMain::reslistTE_clicked: par %d, char %d\n", par, car));
if (reslist_winfirst == -1)
return;
// If same doc, don't bother redisplaying
if (reslist_current == par - 1)
return;
Rcl::Doc doc;
doc.erase();
if (reslist_current != -1) {
QColor color("white");
resTextEdit->setParagraphBackgroundColor(reslist_current+1, color);
reslistTE->setParagraphBackgroundColor(reslist_current+1, color);
}
QColor color("lightblue");
resTextEdit->setParagraphBackgroundColor(par, color);
reslistTE->setParagraphBackgroundColor(par, color);
int reldocnum = par-1;
int reldocnum = par - 1;
reslist_current = reldocnum;
previewTextEdit->clear();
if (rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) {
if (!rcldb->getDoc(reslist_winfirst + reldocnum, doc, 0)) {
QMessageBox::warning(0, "Recoll",
QString("Can't retrieve document from database"));
return;
}
// Go to the file system to retrieve / convert the document text
// for preview:
// Go to the file system to retrieve / convert the document text
// for preview:
// Look for appropriate handler
MimeHandlerFunc fun =
getMimeHandler(doc.mimetype, rclconfig->getMimeConf());
if (!fun) {
QMessageBox::warning(0, "Recoll",
QString("No mime handler for mime type ") +
doc.mimetype.c_str());
return;
}
// Look for appropriate handler
MimeHandlerFunc fun =
getMimeHandler(doc.mimetype, rclconfig->getMimeConf());
if (!fun) {
QMessageBox::warning(0, "Recoll",
QString("No mime handler for mime type ") +
doc.mimetype.c_str());
return;
}
string fn = doc.url.substr(6, string::npos);
Rcl::Doc fdoc;
if (!fun(rclconfig, fn, doc.mimetype, fdoc)) {
QMessageBox::warning(0, "Recoll",
QString("Failed to convert document for preview!\n") +
fn.c_str() + " mimetype " +
doc.mimetype.c_str());
return;
}
string fn = urltolocalpath(doc.url);
Rcl::Doc fdoc;
if (!fun(rclconfig, fn, doc.mimetype, fdoc)) {
QMessageBox::warning(0, "Recoll",
QString("Failed to convert document for preview!\n") +
fn.c_str() + " mimetype " +
doc.mimetype.c_str());
return;
}
string rich = plaintorich(fdoc.text);
string rich = plaintorich(fdoc.text);
#if 0
//Highlighting; pass a list of (search term, style name) to plaintorich
// and create the corresponding styles with different colors here
// We need to :
// - Break the query into terms : wait for the query analyzer
// - Break the text into words. This should use a version of
// textsplit with an option to keep the punctuation (see how to do
// this). We do want the same splitter code to be used here and
// when indexing.
QStyleSheetItem *item =
new QStyleSheetItem( previewTextEdit->styleSheet(), "mytag" );
item->setColor("red");
item->setFontWeight(QFont::Bold);
//Highlighting; pass a list of (search term, style name) to plaintorich
// and create the corresponding styles with different colors here
// We need to :
// - Break the query into terms : wait for the query analyzer
// - Break the text into words. This should use a version of
// textsplit with an option to keep the punctuation (see how to do
// this). We do want the same splitter code to be used here and
// when indexing.
QStyleSheetItem *item =
new QStyleSheetItem( previewTextEdit->styleSheet(), "mytag" );
item->setColor("red");
item->setFontWeight(QFont::Bold);
#endif
QString str = QString::fromUtf8(rich.c_str(), rich.length());
previewTextEdit->setTextFormat(RichText);
previewTextEdit->setText(str);
}
QString str = QString::fromUtf8(rich.c_str(), rich.length());
previewTextEdit->setTextFormat(RichText);
previewTextEdit->setText(str);
}
#include "pathut.h"
// User asked to start query
void RecollMain::queryText_returnPressed()
{
LOGDEB(("RecollMain::queryText_returnPressed()\n"));
reslist_current = -1;
reslist_winfirst = -1;
string rawq = queryText->text();
rcldb->setQuery(rawq);
QCString u8 = queryText->text().utf8();
rcldb->setQuery(string((const char *)u8));
listNextPB_clicked();
}
@ -145,6 +202,7 @@ void RecollMain::listPrevPB_clicked()
#define MIN(A,B) ((A) < (B) ? (A) : (B))
#endif
// Fill up result list window with next screen of hits
void RecollMain::listNextPB_clicked()
{
LOGDEB(("listNextPB_clicked: winfirst %d\n", reslist_winfirst));
@ -165,34 +223,22 @@ void RecollMain::listNextPB_clicked()
int resCnt = rcldb->getResCnt();
int last = MIN(resCnt, reslist_winfirst+respagesize);
if (i == 0) {
resTextEdit->clear();
reslistTE->clear();
previewTextEdit->clear();
resTextEdit->append("<qt><head></head><body><p>");
reslistTE->append("<qt><head></head><body><p>");
char line[80];
sprintf(line, "<p><b>Displaying results %d-%d out of %d</b><br>",
reslist_winfirst+1, last, resCnt);
resTextEdit->append(line);
reslistTE->append(line);
}
gotone = true;
LOGDEB1(("Url: %s\n", doc.url.c_str()));
LOGDEB1(("Mimetype: %s\n", doc.mimetype.c_str()));
LOGDEB1(("Mtime: %s\n", doc.mtime.c_str()));
LOGDEB1(("Origcharset: %s\n", doc.origcharset.c_str()));
LOGDEB1(("Title: %s\n", doc.title.c_str()));
LOGDEB1(("Text: %s\n", doc.text.c_str()));
LOGDEB1(("Keywords: %s\n", doc.keywords.c_str()));
LOGDEB1(("Abstract: %s\n", doc.abstract.c_str()));
// Result list display. Standard Omega includes:
// - title or simple file name or url
// - abstract and keywords
// - url
// - relevancy percentage + keywords matched
// - date de modification
// - langue
// - taille
// Result list display: TOBEDONE
// - move abstract/keywords to Detail window ?
// - keywords matched
// - language
// - size
char perbuf[10];
sprintf(perbuf, "%3d%%", percent);
if (doc.title.empty())
@ -202,27 +248,27 @@ void RecollMain::listNextPB_clicked()
if (!doc.mtime.empty()) {
time_t mtime = atol(doc.mtime.c_str());
struct tm *tm = localtime(&mtime);
strftime(datebuf, 99, "<i>Modified:</i> %F %T", tm);
strftime(datebuf, 99, "<i>Modified:</i>&nbsp;%F&nbsp;%T", tm);
}
string result = "<p>" +
string(perbuf) + " <b>" + doc.title + "</b><br>" +
doc.mimetype + "&nbsp;" +
(!doc.mtime.empty() ? string(datebuf) + "<br>" : string("")) +
(!doc.abstract.empty() ? doc.abstract + "<br>" : string("")) +
(!doc.keywords.empty() ? doc.keywords + "<br>" : string("")) +
"<i>" + doc.url + +"</i><br>" +
"</p>";
QString str = QString::fromUtf8(result.c_str(), result.length());
resTextEdit->append(str);
QString str = QString::fromUtf8(result.c_str(), result.length());
reslistTE->append(str);
}
if (gotone) {
resTextEdit->append("</body></qt>");
resTextEdit->setCursorPosition(0,0);
resTextEdit->ensureCursorVisible();
reslistTE->append("</body></qt>");
reslistTE->setCursorPosition(0,0);
reslistTE->ensureCursorVisible();
// Display preview for 1st doc in list
resTextEdit_clicked(1, 0);
reslistTE_clicked(1, 0);
} else {
// Restore first in win parameter that we shouln't have incremented
reslist_winfirst -= respagesize;

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.12 2005-01-28 15:25:40 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.13 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
@ -263,7 +263,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
// - sample
// - caption (title limited to 100 chars)
// - mime type
string record = "url=file:/" + fn;
string record = "url=file://" + fn;
record += "\nmtype=" + doc.mimetype;
record += "\nmtime=" + doc.mtime;
record += "\norigcharset=" + doc.origcharset;
@ -277,18 +277,14 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
// If this document has already been indexed, update the existing
// entry.
try {
#if 0
Xapian::docid did =
#endif
ndb->wdb.replace_document(pathterm, newdocument);
#if 0
if (did < ndb->updated.size()) {
ndb->updated[did] = true;
LOGDEB(("%s updated\n", fnc));
} else {
LOGDEB(("%s added\n", fnc));
}
#endif
} catch (...) {
// FIXME: is this ever actually needed?
ndb->wdb.add_document(newdocument);
@ -313,9 +309,8 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
if (did == ndb->wdb.postlist_end(pathterm))
return true;
Xapian::Document doc = ndb->wdb.get_document(*did);
#if 0
ndb->updated[*did] = true;
#endif
if (*did < ndb->updated.size())
ndb->updated[*did] = true;
string data = doc.get_data();
//cerr << "DOCUMENT EXISTS " << data << endl;
const char *cp = strstr(data.c_str(), "mtime=");
@ -332,6 +327,27 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
return true;
}
bool Rcl::Db::purge()
{
if (pdata == 0)
return false;
Native *ndb = (Native *)pdata;
if (ndb->isopen == false || ndb->iswritable == false)
return false;
for (Xapian::docid did = 1; did < ndb->updated.size(); ++did) {
if (!ndb->updated[did]) {
try {
ndb->wdb.delete_document(did);
LOGDEB(("Rcl::Db::purge: deleted document #%d\n", did));
} catch (const Xapian::DocNotFoundError &) {
}
}
}
return true;
}
#include <vector>
class wsQData {
@ -369,6 +385,7 @@ bool Rcl::Db::setQuery(const std::string &querystring)
ndb->mset = Xapian::MSet();
return true;
}
int Rcl::Db::getResCnt()
{
Native *ndb = (Native *)pdata;

View File

@ -1,6 +1,6 @@
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.6 2005-01-28 15:25:40 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -65,6 +65,7 @@ class Db {
// Update-related functions
bool add(const std::string &filename, const Doc &doc);
bool needUpdate(const std::string &filename, const struct stat *stp);
bool purge();
// Query-related functions

View File

@ -3,7 +3,7 @@ CXXFLAGS = -I.
BIGLIB = ../lib/librcl.a
PROGS = trfstreewalk trpathut execmd transcode trmimeparse
PROGS = smallut trfstreewalk trpathut execmd transcode trmimeparse
all: $(PROGS)
FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o
@ -38,5 +38,12 @@ mimeparse : $(MIMEPARSE_OBJS)
trmimeparse.o : ../utils/mimeparse.cpp
$(CXX) $(CXXFLAGS) -DTEST_MIMEPARSE -c -o trmimeparse.o \
mimeparse.cpp
SMALLUT_OBJS= trsmallut.o $(BIGLIB)
smallut : $(SMALLUT_OBJS)
$(CXX) $(CXXFLAGS) -o smallut $(SMALLUT_OBJS) \
-L/usr/local/lib -liconv
trsmallut.o : ../utils/smallut.cpp
$(CXX) $(CXXFLAGS) -DTEST_SMALLUT -c -o trsmallut.o \
smallut.cpp
clean:
rm -f *.o $(PROGS)