monitor: purge docs for deleted files from db

This commit is contained in:
dockes 2006-10-22 14:47:14 +00:00
parent 4269a149b2
commit 4e0d1e2483
11 changed files with 314 additions and 120 deletions

18
src/configure vendored
View File

@ -791,9 +791,9 @@ Optional Packages:
--with-aspell Use aspell spelling package to provide term
expansion to other spellings
--with-fam Use File Alteration Monitor for almost real time
indexing of modified files. Give directory where fam
library lives as argument if this is not found by
configure.
indexing of modified files. Give the fam/gamin
library as argument (ie: /usr/lib/libfam.so) if
configure does not find the right one.
Some influential environment variables:
CC C compiler command
@ -1286,11 +1286,11 @@ case $withFam in
no);;
yes)
for dir in /usr/local/lib /usr/lib;do
if test -f $dir/libfam.so ; then famLibDir=$dir;break;fi
if test -f $dir/libfam.so ; then famLib=$dir/libfam.so;break;fi
done
;;
*) # The argument should be the path to the fam library
famLibDir=$withFam
famLib=$withFam
;;
esac
@ -1305,14 +1305,18 @@ cat >>confdefs.h <<\_ACEOF
#define RCL_USE_FAM 1
_ACEOF
if test X$famLibDir != X ; then
if test X$famLib != X ; then
famLibDir=`dirname $famLib`
famBase=`dirname $famLibDir`
famBLib=`basename $famLib .so | sed -e s/lib//`
if test ! -f $famBase/include/fam.h ; then
{ { echo "$as_me:$LINENO: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&5
echo "$as_me: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&2;}
{ (exit 1); exit 1; }; }
fi
LIBFAM="-L$famLibDir -lfam"
LIBFAM="-L$famLibDir -l$famBLib"
{ echo "$as_me:$LINENO: fam library directive: $LIBFAM" >&5
echo "$as_me: fam library directive: $LIBFAM" >&6;}
cat >>confdefs.h <<_ACEOF
#define FAM_INCLUDE "$famBase/include/fam.h"

View File

@ -55,29 +55,32 @@ fi
# Real time monitoring with FAM
AC_ARG_WITH(fam,
AC_HELP_STRING([--with-fam],
[Use File Alteration Monitor for almost real time indexing of modified files. Give directory where fam library lives as argument if this is not found by configure.]),
[Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
withFam=$withval, withFam=no)
case $withFam in
no);;
yes)
for dir in /usr/local/lib /usr/lib;do
if test -f $dir/libfam.so ; then famLibDir=$dir;break;fi
if test -f $dir/libfam.so ; then famLib=$dir/libfam.so;break;fi
done
;;
*) # The argument should be the path to the fam library
famLibDir=$withFam
famLib=$withFam
;;
esac
if test X$withFam != Xno ; then
AC_DEFINE(RCL_MONITOR, 1, [Real time monitoring option])
AC_DEFINE(RCL_USE_FAM, 1, [Compile the fam interface])
if test X$famLibDir != X ; then
if test X$famLib != X ; then
famLibDir=`dirname $famLib`
famBase=`dirname $famLibDir`
famBLib=`basename $famLib .so | sed -e s/lib//`
if test ! -f $famBase/include/fam.h ; then
AC_MSG_ERROR([fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support])
fi
LIBFAM="-L$famLibDir -lfam"
LIBFAM="-L$famLibDir -l$famBLib"
AC_MSG_NOTICE([fam library directive: $LIBFAM])
AC_DEFINE_UNQUOTED(FAM_INCLUDE, "$famBase/include/fam.h",
[Path to the fam api include file])
else

View File

@ -9,7 +9,8 @@ all: depend $(PROGS) $(BIGLIB)
RECOLLINDEX_OBJS= recollindex.o rclmonrcv.o rclmonprc.o $(BIGLIB) $(MIMELIB)
recollindex : $(RECOLLINDEX_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o recollindex $(RECOLLINDEX_OBJS) \
$(BSTATIC) $(LIBXAPIAN) $(LIBICONV) $(BDYNAMIC) -lfam $(LIBSYS)
$(BSTATIC) $(LIBXAPIAN) $(LIBICONV) $(BDYNAMIC) \
$(LIBFAM) $(LIBSYS)
recollindex.o : recollindex.cpp
$(CXX) $(ALL_CXXFLAGS) -c -o recollindex.o $<
rclmonrcv.o : rclmonrcv.cpp

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.38 2006-10-16 15:33:08 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.39 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -223,13 +223,13 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db.setAbstractParams(abslen, -1, -1);
struct stat stb;
if (stat(it->c_str(), &stb) != 0) {
LOGERR(("DbIndexer::indexFiles: stat(%s): %s", it->c_str(),
if (lstat(it->c_str(), &stb) != 0) {
LOGERR(("DbIndexer::indexFiles: lstat(%s): %s", it->c_str(),
strerror(errno)));
continue;
}
if (!S_ISREG(stb.st_mode)) {
LOGERR(("DbIndexer::indexFiles: %s: not a regular file\n",
LOGDEB2(("DbIndexer::indexFiles: %s: not a regular file\n",
it->c_str()));
continue;
}
@ -257,7 +257,7 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
FsTreeWalker::FtwOk) {
LOGERR(("DbIndexer::indexFiles: Database error\n"));
LOGERR(("DbIndexer::indexFiles: processone failed\n"));
return false;
}
skipped:
@ -273,6 +273,31 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
return true;
}
/** Purge docs for given files out of the database */
bool DbIndexer::purgeFiles(const list<string> &filenames)
{
if (!init())
return false;
list<string>::const_iterator it;
for (it = filenames.begin(); it != filenames.end(); it++) {
if (!m_db.purgeFile(*it)) {
LOGERR(("DbIndexer::purgeFiles: Database error\n"));
return false;
}
}
// The close would be done in our destructor, but we want status here
if (!m_db.close()) {
LOGERR(("DbIndexer::purgefiles: error closing database in %s\n",
m_dbdir.c_str()));
return false;
}
return true;
}
/// This method gets called for every file and directory found by the
/// tree walker.
///
@ -308,7 +333,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
// without mime type will not be purged from the db, resulting
// in possible 'cannot intern file' messages at query time...
if (!m_db.needUpdate(fn, stp)) {
LOGDEB(("indexfile: up to date: %s\n", fn.c_str()));
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
if (m_updater) {
m_updater->status.fn = fn;
if (!m_updater->update()) {

View File

@ -16,7 +16,7 @@
*/
#ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.19 2006-10-16 15:33:08 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: indexer.h,v 1.20 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -116,6 +116,9 @@ class DbIndexer : public FsTreeWalkerCB {
/** Index a list of files. No db cleaning or stemdb updating */
bool indexFiles(const std::list<string> &files);
/** Purge a list of files. */
bool purgeFiles(const std::list<string> &files);
/** Create stem database for given language */
bool createStemDb(const string &lang);
@ -141,8 +144,9 @@ class DbIndexer : public FsTreeWalkerCB {
bool init(bool rst = false);
};
/** Helper method in recollindex.cpp for initial checks/setup to index
/** Helper methods in recollindex.cpp for initial checks/setup to index
* a list of files (either from the monitor or the command line) */
extern bool indexfiles(RclConfig *config, const list<string> &filenames);
extern bool purgefiles(RclConfig *config, const list<string> &filenames);
#endif /* _INDEXER_H_INCLUDED_ */

View File

@ -2,7 +2,7 @@
#ifdef RCL_MONITOR
#ifndef lint
static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.2 2006-10-17 14:41:59 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.3 2006-10-22 14:47:13 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -117,11 +117,13 @@ RclConfig *RclMonEventQueue::getConfig()
return m_data->m_config;
}
extern int stopindexing;
bool RclMonEventQueue::ok()
{
if (m_data == 0)
return false;
return m_data->m_ok;
return !stopindexing && m_data->m_ok;
}
void RclMonEventQueue::setTerminate()
@ -143,11 +145,9 @@ bool RclMonEventQueue::pushEvent(const RclMonEvent &ev)
return true;
}
pthread_t rcv_thrid;
void *rcv_result;
extern void *rclMonRcvRun(void *);
extern int stopindexing;
bool startMonitor(RclConfig *conf, bool nofork)
{
@ -163,7 +163,7 @@ bool startMonitor(RclConfig *conf, bool nofork)
LOGDEB(("start_monitoring: entering main loop\n"));
while (rclEQ.wait()) {
LOGDEB2(("startMonitor: wait returned\n"));
if (stopindexing || !rclEQ.ok())
if (!rclEQ.ok())
break;
list<string> modified;
list<string> deleted;
@ -191,11 +191,13 @@ bool startMonitor(RclConfig *conf, bool nofork)
// Unlock queue before processing lists
rclEQ.unlock();
// Process
indexfiles(conf, modified);
if (!indexfiles(conf, modified))
break;
if (!purgefiles(conf, deleted))
break;
// Lock queue before waiting again
rclEQ.lock();
}
LOGERR(("start_monitoring: rclEQ::wait() failed\n"));
return false;
return true;
}
#endif // RCL_MONITOR

View File

@ -1,7 +1,7 @@
#include "autoconfig.h"
#ifdef RCL_MONITOR
#ifndef lint
static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.2 2006-10-17 14:41:59 dockes Exp $ (C) 2006 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.3 2006-10-22 14:47:13 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -35,71 +35,92 @@ static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.2 2006-10-17 14:41:59 dockes Ex
*/
/** A small virtual interface for monitors. Suitable to let either of
fam/gamin/ or raw imonitor hide behind */
/** A small virtual interface for monitors. Probably suitable to let
either of fam/gamin or raw imonitor hide behind */
class RclMonitor {
public:
RclMonitor(){}
virtual ~RclMonitor() {}
virtual bool addWatch(const string& path, const struct stat&) = 0;
virtual bool getEvent(RclMonEvent& ev) = 0;
virtual bool getEvent(RclMonEvent& ev, int secs = -1) = 0;
virtual bool ok() = 0;
};
// Monitor factory
// Monitor factory. We only have one compiled-in kind at a time, no
// need for a 'kind' parameter
static RclMonitor *makeMonitor();
/** Class used to create the directory watches */
/** This class is a callback for the file system tree walker
class. The callback method alternatively creates the directory
watches and flushes the event queue (to avoid a possible overflow
while we create the watches)*/
class WalkCB : public FsTreeWalkerCB {
public:
WalkCB(RclConfig *conf, RclMonitor *mon)
: m_conf(conf), m_mon(mon)
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue)
: m_conf(conf), m_mon(mon), m_queue(queue)
{}
virtual ~WalkCB()
{}
virtual FsTreeWalker::Status
processone(const string &fn, const struct stat *st,
FsTreeWalker::CbFlag flg)
processone(const string &fn, const struct stat *st, FsTreeWalker::CbFlag flg)
{
LOGDEB2(("rclMonRcvRun: processone %s m_mon %p m_mon->ok %d\n",
fn.c_str(), m_mon, m_mon?m_mon->ok():0));
// Create watch when entering directory
if (flg == FsTreeWalker::FtwDirEnter) {
// Empty whatever events we may already have on queue
while (m_queue->ok() && m_mon->ok()) {
RclMonEvent ev;
if (m_mon->getEvent(ev, 0)) {
m_queue->pushEvent(ev);
} else {
break;
}
}
if (!m_mon || !m_mon->ok() || !m_mon->addWatch(fn, *st))
return FsTreeWalker::FtwError;
}
return FsTreeWalker::FtwOk;
}
private:
RclConfig *m_conf;
RclMonitor *m_mon;
RclConfig *m_conf;
RclMonitor *m_mon;
RclMonEventQueue *m_queue;
};
/** Main thread routine: create watches, then wait for events an queue them */
/** Main thread routine: create watches, then forever wait for and queue events */
void *rclMonRcvRun(void *q)
{
RclMonEventQueue *queue = (RclMonEventQueue *)q;
RclMonitor *mon;
LOGDEB(("rclMonRcvRun: running\n"));
// Create the fam/whatever interface object
RclMonitor *mon;
if ((mon = makeMonitor()) == 0) {
LOGERR(("rclMonRcvRun: makeMonitor failed\n"));
rclEQ.setTerminate();
queue->setTerminate();
return 0;
}
// Get top directories from config and walk trees to add watches
FsTreeWalker walker;
WalkCB walkcb(queue->getConfig(), mon);
// Get top directories from config
list<string> tdl = queue->getConfig()->getTopdirs();
if (tdl.empty()) {
LOGERR(("rclMonRcvRun:: top directory list (topdirs param.) not"
"found in config or Directory list parse error"));
rclEQ.setTerminate();
queue->setTerminate();
return 0;
}
// Walk the directory trees to add watches
FsTreeWalker walker;
WalkCB walkcb(queue->getConfig(), mon, queue);
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
queue->getConfig()->setKeyDir(*it);
// Adjust the skipped names according to config
walker.clearSkippedNames();
string skipped;
if (queue->getConfig()->getConfParam("skippedNames", skipped)) {
@ -112,19 +133,16 @@ void *rclMonRcvRun(void *q)
}
// Forever wait for monitoring events and add them to queue:
LOGDEB2(("rclMonRcvRun: waiting for events. rclEQ.ok() %d\n", rclEQ.ok()));
while (rclEQ.ok()) {
if (!mon->ok())
break;
LOGDEB2(("rclMonRcvRun: waiting for events. queue->ok() %d\n", queue->ok()));
while (queue->ok() && mon->ok()) {
RclMonEvent ev;
if (mon->getEvent(ev)) {
rclEQ.pushEvent(ev);
queue->pushEvent(ev);
}
if (!mon->ok())
break;
}
LOGDEB(("rclMonRcvRun: exiting\n"));
rclEQ.setTerminate();
queue->setTerminate();
return 0;
}
@ -133,6 +151,7 @@ void *rclMonRcvRun(void *q)
#include <fam.h>
#include <sys/select.h>
// Translate event code to string (debug)
static const char *event_name(int code)
{
static const char *famevent[] = {
@ -149,21 +168,22 @@ static const char *event_name(int code)
};
static char unknown_event[20];
if (code < FAMChanged || code > FAMEndExist)
{
if (code < FAMChanged || code > FAMEndExist) {
sprintf(unknown_event, "unknown (%d)", code);
return unknown_event;
}
return famevent[code];
}
// FAM based monitor class
/** FAM based monitor class. We have to keep a record of FAM watch
request numbers to directory names as the event only contain the
request number and file name, not the full path */
class RclFAM : public RclMonitor {
public:
RclFAM();
virtual ~RclFAM();
virtual bool addWatch(const string& path, const struct stat& st);
virtual bool getEvent(RclMonEvent& ev);
virtual bool getEvent(RclMonEvent& ev, int secs = -1);
bool ok() {return m_ok;}
private:
@ -213,7 +233,7 @@ bool RclFAM::addWatch(const string& path, const struct stat& st)
return true;
}
bool RclFAM::getEvent(RclMonEvent& ev)
bool RclFAM::getEvent(RclMonEvent& ev, int secs)
{
if (!ok())
return false;
@ -224,16 +244,22 @@ bool RclFAM::getEvent(RclMonEvent& ev)
FD_ZERO(&readfds);
FD_SET(fam_fd, &readfds);
// Note: can't see a reason to set a timeout. Only reason we might
// want out is signal which will break the select call anyway (I
// don't think that there is any system still using the old bsd-type
// syscall re-entrance after signal).
LOGDEB(("RclFAM::getEvent: select\n"));
if (select(fam_fd + 1, &readfds, 0, 0, 0) < 0) {
struct timeval timeout;
if (secs >= 0) {
memset(&timeout, 0, sizeof(timeout));
timeout.tv_sec = secs;
}
int ret;
if ((ret=select(fam_fd + 1, &readfds, 0, 0, secs >= 0 ? &timeout : 0)) < 0) {
LOGERR(("RclFAM::getEvent: select failed, errno %d\n", errno));
close();
return false;
} else if (ret == 0) {
// timeout
return false;
}
if (!FD_ISSET(fam_fd, &readfds))
return false;
@ -243,8 +269,10 @@ bool RclFAM::getEvent(RclMonEvent& ev)
close();
return false;
}
map<int,string>::const_iterator it;
if ((it = m_reqtodir.find(fe.fr.reqnum)) != m_reqtodir.end()) {
if ((fe.filename[0] != '/') &&
(it = m_reqtodir.find(fe.fr.reqnum)) != m_reqtodir.end()) {
ev.m_path = path_cat(it->second, fe.filename);
} else {
ev.m_path = fe.filename;
@ -279,7 +307,7 @@ bool RclFAM::getEvent(RclMonEvent& ev)
return true;
}
// The monitor factory
// The monitor 'factory'
static RclMonitor *makeMonitor()
{
return new RclFAM;

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.24 2006-10-17 14:41:59 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.25 2006-10-22 14:47:14 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -42,6 +42,27 @@ using namespace std;
ConfIndexer *confindexer;
DbIndexer *dbindexer;
int stopindexing;
// Mainly used to request indexing stop, we currently do not use the
// current file name
class MyUpdater : public DbIxStatusUpdater {
public:
virtual bool update() {
if (stopindexing) {
return false;
}
return true;
}
};
MyUpdater updater;
static void sigcleanup(int sig)
{
fprintf(stderr, "sigcleanup\n");
LOGDEB(("sigcleanup\n"));
stopindexing = 1;
}
static bool makeDbIndexer(RclConfig *config)
{
string dbdir = config->getDbDir();
@ -57,7 +78,7 @@ static bool makeDbIndexer(RclConfig *config)
}
if (!dbindexer)
dbindexer = new DbIndexer(config, dbdir);
dbindexer = new DbIndexer(config, dbdir, &updater);
return true;
}
@ -116,6 +137,40 @@ bool indexfiles(RclConfig *config, const list<string> &filenames)
return dbindexer->indexFiles(myfiles);
}
// Delete a list of files.
bool purgefiles(RclConfig *config, const list<string> &filenames)
{
if (filenames.empty())
return true;
if (o_tdl.empty()) {
o_tdl = config->getTopdirs();
if (o_tdl.empty()) {
fprintf(stderr, "Top directory list (topdirs param.) "
"not found in config or Directory list parse error");
return false;
}
}
list<string> myfiles;
for (list<string>::const_iterator it = filenames.begin();
it != filenames.end(); it++) {
myfiles.push_back(path_canon(*it));
}
// Note: we should sort the file names against the topdirs here
// and check for different databases. But we can for now only have
// one database per config, so we set the keydir from the first
// file (which is not really needed...), create the indexer/db and
// go:
config->setKeyDir(path_getfather(*myfiles.begin()));
if (!makeDbIndexer(config) || !dbindexer)
return false;
else
return dbindexer->purgeFiles(myfiles);
}
// Create additional stem database
static bool createstemdb(RclConfig *config, const string &lang)
{
@ -134,26 +189,6 @@ static void cleanup()
dbindexer = 0;
}
int stopindexing;
// Mainly used to request indexing stop, we currently do not use the
// current file name
class MyUpdater : public DbIxStatusUpdater {
public:
virtual bool update() {
if (stopindexing) {
return false;
}
return true;
}
};
MyUpdater updater;
static void sigcleanup(int sig)
{
fprintf(stderr, "sigcleanup\n");
stopindexing = 1;
}
static const char *thisprog;
static int op_flags;
#define OPT_MOINS 0x1

View File

@ -5,6 +5,8 @@ XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
LIBICONV=@LIBICONV@
INCICONV=@INCICONV@
LIBFAM = @LIBFAM@
prefix = @prefix@
datadir = @datadir@
RECOLL_DATADIR = ${datadir}/recoll

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.80 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.81 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -56,18 +56,21 @@ using namespace std;
#ifndef NO_NAMESPACES
namespace Rcl {
#endif
// Truncate longer path and uniquize with hash . The goal for this is
// to avoid xapian max term length limitations, not to gain space (we
// gain very little even with very short maxlens like 30)
// Max length for path terms stored for each document. Truncate
// longer path and uniquize with hash. The goal for this is to avoid
// xapian max term length limitations, not to gain space (we gain very
// little even with very short maxlens like 30)
#define PATHHASHLEN 150
// Synthetic abstract marker (to discriminate from abstract actually
// found in doc)
const static string rclSyntAbs = "?!#@";
// Data for a xapian database. There could actually be 2 different
// ones for indexing or query as there is not much in common.
// A class for data and methods that would have to expose
// Xapian-specific stuff if they were in Rcl::Db. There could actually be
// 2 different ones for indexing or query as there is not much in
// common.
class Native {
public:
Db *m_db;
@ -96,6 +99,35 @@ class Native {
Xapian::docid docid,
const list<string>& terms);
/** Compute list of subdocuments for a given path (given by hash) */
bool subDocs(const string &hash, vector<Xapian::docid>& docids) {
docids.clear();
string qterm = "Q"+ hash + "|";
Xapian::Database db = m_iswritable ? wdb: db;
Xapian::TermIterator it = db.allterms_begin();
it.skip_to(qterm);
string ermsg;
try {
for (;it != db.allterms_end(); it++) {
// If current term does not begin with qterm or has
// another |, not the same file
if ((*it).find(qterm) != 0 ||
(*it).find_last_of("|") != qterm.length() -1)
break;
docids.push_back(*(db.postlist_begin(*it)));
}
return true;
} catch (const Xapian::Error &e) {
ermsg = e.get_msg().c_str();
} catch (...) {
ermsg= "Unknown error";
}
LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
return false;
}
Native(Db *db)
: m_db(db),
m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
@ -655,7 +687,6 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
pathHash(filename, hash, PATHHASHLEN);
string pterm = "P" + hash;
const char *ermsg;
string qterm = "Q"+ hash + "|";
// Look for all documents with this path. We need to look at all
// to set their existence flag. We check the update time on the
@ -697,20 +728,16 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
m_ndb->updated[*docid] = true;
// Set the existence flag for all the subdocs (if any)
Xapian::TermIterator it = m_ndb->wdb.allterms_begin();
it.skip_to(qterm);
LOGDEB2(("First qterm: [%s]\n", (*it).c_str()));
for (;it != m_ndb->wdb.allterms_end(); it++) {
// If current term does not begin with qterm or has another |, not
// the same file
if ((*it).find(qterm) != 0 ||
(*it).find_last_of("|") != qterm.length() -1)
break;
docid = m_ndb->wdb.postlist_begin(*it);
if (*docid < m_ndb->updated.size()) {
LOGDEB2(("Db::needUpdate: set exist flag for docid %d [%s]\n",
*docid, (*it).c_str()));
m_ndb->updated[*docid] = true;
vector<Xapian::docid> docids;
if (!m_ndb->subDocs(hash, docids)) {
LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
return true;
}
for (vector<Xapian::docid>::iterator it = docids.begin();
it != docids.end(); it++) {
if (*it < m_ndb->updated.size()) {
LOGDEB2(("Db::needUpdate: set flag for docid %d\n", *it));
m_ndb->updated[*it] = true;
}
}
return false;
@ -764,7 +791,9 @@ bool Db::createStemDb(const string& lang)
/**
* This is called at the end of an indexing session, to delete the
* documents for files that are no longer there.
* documents for files that are no longer there. This can ONLY be called
* after a full file-system tree walk, else the file existence flags will
* be wrong.
*/
bool Db::purge()
{
@ -806,6 +835,47 @@ bool Db::purge()
return true;
}
/** Delete document(s) for given filename */
bool Db::purgeFile(const string &fn)
{
LOGDEB(("Db:purgeFile: [%s]\n", fn.c_str()));
if (m_ndb == 0)
return false;
Xapian::WritableDatabase db = m_ndb->wdb;
string hash;
pathHash(fn, hash, PATHHASHLEN);
string pterm = "P" + hash;
const char *ermsg = "";
try {
Xapian::PostingIterator docid = db.postlist_begin(pterm);
if (docid == db.postlist_end(pterm))
return true;
LOGDEB(("purgeFile: delete docid %d\n", *docid));
db.delete_document(*docid);
vector<Xapian::docid> docids;
m_ndb->subDocs(hash, docids);
LOGDEB(("purgeFile: subdocs cnt %d\n", docids.size()));
for (vector<Xapian::docid>::iterator it = docids.begin();
it != docids.end(); it++) {
LOGDEB2(("Db::purgeFile: delete subdoc %d\n", *it));
db.delete_document(*it);
}
return true;
} catch (const Xapian::Error &e) {
ermsg = e.get_msg().c_str();
} catch (const string &s) {
ermsg = s.c_str();
} catch (const char *s) {
ermsg = s;
} catch (...) {
ermsg = "Caught unknown exception";
}
if (*ermsg) {
LOGERR(("Db::purgeFile: %s\n", ermsg));
}
return false;
}
// Splitter callback for breaking query into terms
class wsQData : public TextSplitCB {
public:
@ -1378,6 +1448,7 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
}
// Retrieve document defined by file name and internal path.
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
{

View File

@ -16,7 +16,7 @@
*/
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.37 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.38 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -123,16 +123,35 @@ class Db {
bool close();
bool isopen();
int docCnt(); /// Return total docs in db
/** Return total docs in db */
int docCnt();
// Update-related functions
/* Update-related functions */
/** Add document. The Doc class should have been filled as much as
possible depending on the document type */
bool add(const string &filename, const Doc &doc, const struct stat *stp);
/** Test if the db entry for the given filename/stat is up to date */
bool needUpdate(const string &filename, const struct stat *stp);
/** Remove documents that no longer exist in the file system. This
depends on the update map, which is built during
indexation. This should only be called after a full walk of
the file system, else the update map will not be complete, and
many documents will be deleted that shouldn't */
bool purge();
/** Delete document(s) for given filename */
bool purgeFile(const string &filename);
/** Create stem expansion database for given language. */
bool createStemDb(const string &lang);
/** Delete stem expansion database for given language. */
bool deleteStemDb(const string &lang);
// Query-related functions
/* Query-related functions */
// Parse query string and initialize query
bool setQuery(AdvSearchData &q, int opts = QO_NONE,
@ -144,11 +163,11 @@ class Db {
// Stem expansion is performed if lang is not empty
list<string> completions(const string &s, const string &lang, int max=20);
/// Add extra database for querying
/** Add extra database for querying */
bool addQueryDb(const string &dir);
/// Remove extra database. if dir == "", remove all.
/** Remove extra database. if dir == "", remove all. */
bool rmQueryDb(const string &dir);
/// Tell if directory seems to hold xapian db
/** Tell if directory seems to hold xapian db */
static bool testDbDir(const string &dir);
/** Get document at rank i in current query.