monitor: purge docs for deleted files from db

This commit is contained in:
dockes 2006-10-22 14:47:14 +00:00
parent 4269a149b2
commit 4e0d1e2483
11 changed files with 314 additions and 120 deletions

18
src/configure vendored
View File

@ -791,9 +791,9 @@ Optional Packages:
--with-aspell Use aspell spelling package to provide term --with-aspell Use aspell spelling package to provide term
expansion to other spellings expansion to other spellings
--with-fam Use File Alteration Monitor for almost real time --with-fam Use File Alteration Monitor for almost real time
indexing of modified files. Give directory where fam indexing of modified files. Give the fam/gamin
library lives as argument if this is not found by library as argument (ie: /usr/lib/libfam.so) if
configure. configure does not find the right one.
Some influential environment variables: Some influential environment variables:
CC C compiler command CC C compiler command
@ -1286,11 +1286,11 @@ case $withFam in
no);; no);;
yes) yes)
for dir in /usr/local/lib /usr/lib;do for dir in /usr/local/lib /usr/lib;do
if test -f $dir/libfam.so ; then famLibDir=$dir;break;fi if test -f $dir/libfam.so ; then famLib=$dir/libfam.so;break;fi
done done
;; ;;
*) # The argument should be the path to the fam library *) # The argument should be the path to the fam library
famLibDir=$withFam famLib=$withFam
;; ;;
esac esac
@ -1305,14 +1305,18 @@ cat >>confdefs.h <<\_ACEOF
#define RCL_USE_FAM 1 #define RCL_USE_FAM 1
_ACEOF _ACEOF
if test X$famLibDir != X ; then if test X$famLib != X ; then
famLibDir=`dirname $famLib`
famBase=`dirname $famLibDir` famBase=`dirname $famLibDir`
famBLib=`basename $famLib .so | sed -e s/lib//`
if test ! -f $famBase/include/fam.h ; then if test ! -f $famBase/include/fam.h ; then
{ { echo "$as_me:$LINENO: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&5 { { echo "$as_me:$LINENO: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&5
echo "$as_me: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&2;} echo "$as_me: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&2;}
{ (exit 1); exit 1; }; } { (exit 1); exit 1; }; }
fi fi
LIBFAM="-L$famLibDir -lfam" LIBFAM="-L$famLibDir -l$famBLib"
{ echo "$as_me:$LINENO: fam library directive: $LIBFAM" >&5
echo "$as_me: fam library directive: $LIBFAM" >&6;}
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
#define FAM_INCLUDE "$famBase/include/fam.h" #define FAM_INCLUDE "$famBase/include/fam.h"

View File

@ -55,29 +55,32 @@ fi
# Real time monitoring with FAM # Real time monitoring with FAM
AC_ARG_WITH(fam, AC_ARG_WITH(fam,
AC_HELP_STRING([--with-fam], AC_HELP_STRING([--with-fam],
[Use File Alteration Monitor for almost real time indexing of modified files. Give directory where fam library lives as argument if this is not found by configure.]), [Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
withFam=$withval, withFam=no) withFam=$withval, withFam=no)
case $withFam in case $withFam in
no);; no);;
yes) yes)
for dir in /usr/local/lib /usr/lib;do for dir in /usr/local/lib /usr/lib;do
if test -f $dir/libfam.so ; then famLibDir=$dir;break;fi if test -f $dir/libfam.so ; then famLib=$dir/libfam.so;break;fi
done done
;; ;;
*) # The argument should be the path to the fam library *) # The argument should be the path to the fam library
famLibDir=$withFam famLib=$withFam
;; ;;
esac esac
if test X$withFam != Xno ; then if test X$withFam != Xno ; then
AC_DEFINE(RCL_MONITOR, 1, [Real time monitoring option]) AC_DEFINE(RCL_MONITOR, 1, [Real time monitoring option])
AC_DEFINE(RCL_USE_FAM, 1, [Compile the fam interface]) AC_DEFINE(RCL_USE_FAM, 1, [Compile the fam interface])
if test X$famLibDir != X ; then if test X$famLib != X ; then
famLibDir=`dirname $famLib`
famBase=`dirname $famLibDir` famBase=`dirname $famLibDir`
famBLib=`basename $famLib .so | sed -e s/lib//`
if test ! -f $famBase/include/fam.h ; then if test ! -f $famBase/include/fam.h ; then
AC_MSG_ERROR([fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support]) AC_MSG_ERROR([fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support])
fi fi
LIBFAM="-L$famLibDir -lfam" LIBFAM="-L$famLibDir -l$famBLib"
AC_MSG_NOTICE([fam library directive: $LIBFAM])
AC_DEFINE_UNQUOTED(FAM_INCLUDE, "$famBase/include/fam.h", AC_DEFINE_UNQUOTED(FAM_INCLUDE, "$famBase/include/fam.h",
[Path to the fam api include file]) [Path to the fam api include file])
else else

View File

@ -9,7 +9,8 @@ all: depend $(PROGS) $(BIGLIB)
RECOLLINDEX_OBJS= recollindex.o rclmonrcv.o rclmonprc.o $(BIGLIB) $(MIMELIB) RECOLLINDEX_OBJS= recollindex.o rclmonrcv.o rclmonprc.o $(BIGLIB) $(MIMELIB)
recollindex : $(RECOLLINDEX_OBJS) recollindex : $(RECOLLINDEX_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o recollindex $(RECOLLINDEX_OBJS) \ $(CXX) $(ALL_CXXFLAGS) -o recollindex $(RECOLLINDEX_OBJS) \
$(BSTATIC) $(LIBXAPIAN) $(LIBICONV) $(BDYNAMIC) -lfam $(LIBSYS) $(BSTATIC) $(LIBXAPIAN) $(LIBICONV) $(BDYNAMIC) \
$(LIBFAM) $(LIBSYS)
recollindex.o : recollindex.cpp recollindex.o : recollindex.cpp
$(CXX) $(ALL_CXXFLAGS) -c -o recollindex.o $< $(CXX) $(ALL_CXXFLAGS) -c -o recollindex.o $<
rclmonrcv.o : rclmonrcv.cpp rclmonrcv.o : rclmonrcv.cpp

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.38 2006-10-16 15:33:08 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: indexer.cpp,v 1.39 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -223,13 +223,13 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
if (m_config->getConfParam("idxabsmlen", &abslen)) if (m_config->getConfParam("idxabsmlen", &abslen))
m_db.setAbstractParams(abslen, -1, -1); m_db.setAbstractParams(abslen, -1, -1);
struct stat stb; struct stat stb;
if (stat(it->c_str(), &stb) != 0) { if (lstat(it->c_str(), &stb) != 0) {
LOGERR(("DbIndexer::indexFiles: stat(%s): %s", it->c_str(), LOGERR(("DbIndexer::indexFiles: lstat(%s): %s", it->c_str(),
strerror(errno))); strerror(errno)));
continue; continue;
} }
if (!S_ISREG(stb.st_mode)) { if (!S_ISREG(stb.st_mode)) {
LOGERR(("DbIndexer::indexFiles: %s: not a regular file\n", LOGDEB2(("DbIndexer::indexFiles: %s: not a regular file\n",
it->c_str())); it->c_str()));
continue; continue;
} }
@ -257,7 +257,7 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
if (processone(*it, &stb, FsTreeWalker::FtwRegular) != if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
FsTreeWalker::FtwOk) { FsTreeWalker::FtwOk) {
LOGERR(("DbIndexer::indexFiles: Database error\n")); LOGERR(("DbIndexer::indexFiles: processone failed\n"));
return false; return false;
} }
skipped: skipped:
@ -273,6 +273,31 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
return true; return true;
} }
/** Purge docs for given files out of the database */
bool DbIndexer::purgeFiles(const list<string> &filenames)
{
if (!init())
return false;
list<string>::const_iterator it;
for (it = filenames.begin(); it != filenames.end(); it++) {
if (!m_db.purgeFile(*it)) {
LOGERR(("DbIndexer::purgeFiles: Database error\n"));
return false;
}
}
// The close would be done in our destructor, but we want status here
if (!m_db.close()) {
LOGERR(("DbIndexer::purgefiles: error closing database in %s\n",
m_dbdir.c_str()));
return false;
}
return true;
}
/// This method gets called for every file and directory found by the /// This method gets called for every file and directory found by the
/// tree walker. /// tree walker.
/// ///
@ -308,7 +333,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
// without mime type will not be purged from the db, resulting // without mime type will not be purged from the db, resulting
// in possible 'cannot intern file' messages at query time... // in possible 'cannot intern file' messages at query time...
if (!m_db.needUpdate(fn, stp)) { if (!m_db.needUpdate(fn, stp)) {
LOGDEB(("indexfile: up to date: %s\n", fn.c_str())); LOGDEB(("processone: up to date: %s\n", fn.c_str()));
if (m_updater) { if (m_updater) {
m_updater->status.fn = fn; m_updater->status.fn = fn;
if (!m_updater->update()) { if (!m_updater->update()) {

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _INDEXER_H_INCLUDED_ #ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.19 2006-10-16 15:33:08 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: indexer.h,v 1.20 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -116,6 +116,9 @@ class DbIndexer : public FsTreeWalkerCB {
/** Index a list of files. No db cleaning or stemdb updating */ /** Index a list of files. No db cleaning or stemdb updating */
bool indexFiles(const std::list<string> &files); bool indexFiles(const std::list<string> &files);
/** Purge a list of files. */
bool purgeFiles(const std::list<string> &files);
/** Create stem database for given language */ /** Create stem database for given language */
bool createStemDb(const string &lang); bool createStemDb(const string &lang);
@ -141,8 +144,9 @@ class DbIndexer : public FsTreeWalkerCB {
bool init(bool rst = false); bool init(bool rst = false);
}; };
/** Helper method in recollindex.cpp for initial checks/setup to index /** Helper methods in recollindex.cpp for initial checks/setup to index
* a list of files (either from the monitor or the command line) */ * a list of files (either from the monitor or the command line) */
extern bool indexfiles(RclConfig *config, const list<string> &filenames); extern bool indexfiles(RclConfig *config, const list<string> &filenames);
extern bool purgefiles(RclConfig *config, const list<string> &filenames);
#endif /* _INDEXER_H_INCLUDED_ */ #endif /* _INDEXER_H_INCLUDED_ */

View File

@ -2,7 +2,7 @@
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.2 2006-10-17 14:41:59 dockes Exp $ (C) 2006 J.F.Dockes"; static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.3 2006-10-22 14:47:13 dockes Exp $ (C) 2006 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -117,11 +117,13 @@ RclConfig *RclMonEventQueue::getConfig()
return m_data->m_config; return m_data->m_config;
} }
extern int stopindexing;
bool RclMonEventQueue::ok() bool RclMonEventQueue::ok()
{ {
if (m_data == 0) if (m_data == 0)
return false; return false;
return m_data->m_ok; return !stopindexing && m_data->m_ok;
} }
void RclMonEventQueue::setTerminate() void RclMonEventQueue::setTerminate()
@ -143,11 +145,9 @@ bool RclMonEventQueue::pushEvent(const RclMonEvent &ev)
return true; return true;
} }
pthread_t rcv_thrid; pthread_t rcv_thrid;
void *rcv_result; void *rcv_result;
extern void *rclMonRcvRun(void *); extern void *rclMonRcvRun(void *);
extern int stopindexing;
bool startMonitor(RclConfig *conf, bool nofork) bool startMonitor(RclConfig *conf, bool nofork)
{ {
@ -163,7 +163,7 @@ bool startMonitor(RclConfig *conf, bool nofork)
LOGDEB(("start_monitoring: entering main loop\n")); LOGDEB(("start_monitoring: entering main loop\n"));
while (rclEQ.wait()) { while (rclEQ.wait()) {
LOGDEB2(("startMonitor: wait returned\n")); LOGDEB2(("startMonitor: wait returned\n"));
if (stopindexing || !rclEQ.ok()) if (!rclEQ.ok())
break; break;
list<string> modified; list<string> modified;
list<string> deleted; list<string> deleted;
@ -191,11 +191,13 @@ bool startMonitor(RclConfig *conf, bool nofork)
// Unlock queue before processing lists // Unlock queue before processing lists
rclEQ.unlock(); rclEQ.unlock();
// Process // Process
indexfiles(conf, modified); if (!indexfiles(conf, modified))
break;
if (!purgefiles(conf, deleted))
break;
// Lock queue before waiting again // Lock queue before waiting again
rclEQ.lock(); rclEQ.lock();
} }
LOGERR(("start_monitoring: rclEQ::wait() failed\n")); return true;
return false;
} }
#endif // RCL_MONITOR #endif // RCL_MONITOR

View File

@ -1,7 +1,7 @@
#include "autoconfig.h" #include "autoconfig.h"
#ifdef RCL_MONITOR #ifdef RCL_MONITOR
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.2 2006-10-17 14:41:59 dockes Exp $ (C) 2006 J.F.Dockes"; static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.3 2006-10-22 14:47:13 dockes Exp $ (C) 2006 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -35,71 +35,92 @@ static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.2 2006-10-17 14:41:59 dockes Ex
*/ */
/** A small virtual interface for monitors. Suitable to let either of
fam/gamin/ or raw imonitor hide behind */ /** A small virtual interface for monitors. Probably suitable to let
either of fam/gamin or raw imonitor hide behind */
class RclMonitor { class RclMonitor {
public: public:
RclMonitor(){} RclMonitor(){}
virtual ~RclMonitor() {} virtual ~RclMonitor() {}
virtual bool addWatch(const string& path, const struct stat&) = 0; virtual bool addWatch(const string& path, const struct stat&) = 0;
virtual bool getEvent(RclMonEvent& ev) = 0; virtual bool getEvent(RclMonEvent& ev, int secs = -1) = 0;
virtual bool ok() = 0; virtual bool ok() = 0;
}; };
// Monitor factory
// Monitor factory. We only have one compiled-in kind at a time, no
// need for a 'kind' parameter
static RclMonitor *makeMonitor(); static RclMonitor *makeMonitor();
/** Class used to create the directory watches */ /** This class is a callback for the file system tree walker
class. The callback method alternatively creates the directory
watches and flushes the event queue (to avoid a possible overflow
while we create the watches)*/
class WalkCB : public FsTreeWalkerCB { class WalkCB : public FsTreeWalkerCB {
public: public:
WalkCB(RclConfig *conf, RclMonitor *mon) WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue)
: m_conf(conf), m_mon(mon) : m_conf(conf), m_mon(mon), m_queue(queue)
{} {}
virtual ~WalkCB() virtual ~WalkCB()
{} {}
virtual FsTreeWalker::Status virtual FsTreeWalker::Status
processone(const string &fn, const struct stat *st, processone(const string &fn, const struct stat *st, FsTreeWalker::CbFlag flg)
FsTreeWalker::CbFlag flg)
{ {
LOGDEB2(("rclMonRcvRun: processone %s m_mon %p m_mon->ok %d\n", LOGDEB2(("rclMonRcvRun: processone %s m_mon %p m_mon->ok %d\n",
fn.c_str(), m_mon, m_mon?m_mon->ok():0)); fn.c_str(), m_mon, m_mon?m_mon->ok():0));
// Create watch when entering directory
if (flg == FsTreeWalker::FtwDirEnter) { if (flg == FsTreeWalker::FtwDirEnter) {
// Empty whatever events we may already have on queue
while (m_queue->ok() && m_mon->ok()) {
RclMonEvent ev;
if (m_mon->getEvent(ev, 0)) {
m_queue->pushEvent(ev);
} else {
break;
}
}
if (!m_mon || !m_mon->ok() || !m_mon->addWatch(fn, *st)) if (!m_mon || !m_mon->ok() || !m_mon->addWatch(fn, *st))
return FsTreeWalker::FtwError; return FsTreeWalker::FtwError;
} }
return FsTreeWalker::FtwOk; return FsTreeWalker::FtwOk;
} }
private: private:
RclConfig *m_conf; RclConfig *m_conf;
RclMonitor *m_mon; RclMonitor *m_mon;
RclMonEventQueue *m_queue;
}; };
/** Main thread routine: create watches, then wait for events an queue them */ /** Main thread routine: create watches, then forever wait for and queue events */
void *rclMonRcvRun(void *q) void *rclMonRcvRun(void *q)
{ {
RclMonEventQueue *queue = (RclMonEventQueue *)q; RclMonEventQueue *queue = (RclMonEventQueue *)q;
RclMonitor *mon;
LOGDEB(("rclMonRcvRun: running\n")); LOGDEB(("rclMonRcvRun: running\n"));
// Create the fam/whatever interface object
RclMonitor *mon;
if ((mon = makeMonitor()) == 0) { if ((mon = makeMonitor()) == 0) {
LOGERR(("rclMonRcvRun: makeMonitor failed\n")); LOGERR(("rclMonRcvRun: makeMonitor failed\n"));
rclEQ.setTerminate(); queue->setTerminate();
return 0; return 0;
} }
// Get top directories from config and walk trees to add watches // Get top directories from config
FsTreeWalker walker;
WalkCB walkcb(queue->getConfig(), mon);
list<string> tdl = queue->getConfig()->getTopdirs(); list<string> tdl = queue->getConfig()->getTopdirs();
if (tdl.empty()) { if (tdl.empty()) {
LOGERR(("rclMonRcvRun:: top directory list (topdirs param.) not" LOGERR(("rclMonRcvRun:: top directory list (topdirs param.) not"
"found in config or Directory list parse error")); "found in config or Directory list parse error"));
rclEQ.setTerminate(); queue->setTerminate();
return 0; return 0;
} }
// Walk the directory trees to add watches
FsTreeWalker walker;
WalkCB walkcb(queue->getConfig(), mon, queue);
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) { for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
queue->getConfig()->setKeyDir(*it); queue->getConfig()->setKeyDir(*it);
// Adjust the skipped names according to config
walker.clearSkippedNames(); walker.clearSkippedNames();
string skipped; string skipped;
if (queue->getConfig()->getConfParam("skippedNames", skipped)) { if (queue->getConfig()->getConfParam("skippedNames", skipped)) {
@ -112,19 +133,16 @@ void *rclMonRcvRun(void *q)
} }
// Forever wait for monitoring events and add them to queue: // Forever wait for monitoring events and add them to queue:
LOGDEB2(("rclMonRcvRun: waiting for events. rclEQ.ok() %d\n", rclEQ.ok())); LOGDEB2(("rclMonRcvRun: waiting for events. queue->ok() %d\n", queue->ok()));
while (rclEQ.ok()) { while (queue->ok() && mon->ok()) {
if (!mon->ok())
break;
RclMonEvent ev; RclMonEvent ev;
if (mon->getEvent(ev)) { if (mon->getEvent(ev)) {
rclEQ.pushEvent(ev); queue->pushEvent(ev);
} }
if (!mon->ok())
break;
} }
LOGDEB(("rclMonRcvRun: exiting\n")); LOGDEB(("rclMonRcvRun: exiting\n"));
rclEQ.setTerminate(); queue->setTerminate();
return 0; return 0;
} }
@ -133,6 +151,7 @@ void *rclMonRcvRun(void *q)
#include <fam.h> #include <fam.h>
#include <sys/select.h> #include <sys/select.h>
// Translate event code to string (debug)
static const char *event_name(int code) static const char *event_name(int code)
{ {
static const char *famevent[] = { static const char *famevent[] = {
@ -149,21 +168,22 @@ static const char *event_name(int code)
}; };
static char unknown_event[20]; static char unknown_event[20];
if (code < FAMChanged || code > FAMEndExist) if (code < FAMChanged || code > FAMEndExist) {
{
sprintf(unknown_event, "unknown (%d)", code); sprintf(unknown_event, "unknown (%d)", code);
return unknown_event; return unknown_event;
} }
return famevent[code]; return famevent[code];
} }
// FAM based monitor class /** FAM based monitor class. We have to keep a record of FAM watch
request numbers to directory names as the event only contain the
request number and file name, not the full path */
class RclFAM : public RclMonitor { class RclFAM : public RclMonitor {
public: public:
RclFAM(); RclFAM();
virtual ~RclFAM(); virtual ~RclFAM();
virtual bool addWatch(const string& path, const struct stat& st); virtual bool addWatch(const string& path, const struct stat& st);
virtual bool getEvent(RclMonEvent& ev); virtual bool getEvent(RclMonEvent& ev, int secs = -1);
bool ok() {return m_ok;} bool ok() {return m_ok;}
private: private:
@ -213,7 +233,7 @@ bool RclFAM::addWatch(const string& path, const struct stat& st)
return true; return true;
} }
bool RclFAM::getEvent(RclMonEvent& ev) bool RclFAM::getEvent(RclMonEvent& ev, int secs)
{ {
if (!ok()) if (!ok())
return false; return false;
@ -224,16 +244,22 @@ bool RclFAM::getEvent(RclMonEvent& ev)
FD_ZERO(&readfds); FD_ZERO(&readfds);
FD_SET(fam_fd, &readfds); FD_SET(fam_fd, &readfds);
// Note: can't see a reason to set a timeout. Only reason we might
// want out is signal which will break the select call anyway (I
// don't think that there is any system still using the old bsd-type
// syscall re-entrance after signal).
LOGDEB(("RclFAM::getEvent: select\n")); LOGDEB(("RclFAM::getEvent: select\n"));
if (select(fam_fd + 1, &readfds, 0, 0, 0) < 0) { struct timeval timeout;
if (secs >= 0) {
memset(&timeout, 0, sizeof(timeout));
timeout.tv_sec = secs;
}
int ret;
if ((ret=select(fam_fd + 1, &readfds, 0, 0, secs >= 0 ? &timeout : 0)) < 0) {
LOGERR(("RclFAM::getEvent: select failed, errno %d\n", errno)); LOGERR(("RclFAM::getEvent: select failed, errno %d\n", errno));
close(); close();
return false; return false;
} else if (ret == 0) {
// timeout
return false;
} }
if (!FD_ISSET(fam_fd, &readfds)) if (!FD_ISSET(fam_fd, &readfds))
return false; return false;
@ -243,8 +269,10 @@ bool RclFAM::getEvent(RclMonEvent& ev)
close(); close();
return false; return false;
} }
map<int,string>::const_iterator it; map<int,string>::const_iterator it;
if ((it = m_reqtodir.find(fe.fr.reqnum)) != m_reqtodir.end()) { if ((fe.filename[0] != '/') &&
(it = m_reqtodir.find(fe.fr.reqnum)) != m_reqtodir.end()) {
ev.m_path = path_cat(it->second, fe.filename); ev.m_path = path_cat(it->second, fe.filename);
} else { } else {
ev.m_path = fe.filename; ev.m_path = fe.filename;
@ -279,7 +307,7 @@ bool RclFAM::getEvent(RclMonEvent& ev)
return true; return true;
} }
// The monitor factory // The monitor 'factory'
static RclMonitor *makeMonitor() static RclMonitor *makeMonitor()
{ {
return new RclFAM; return new RclFAM;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.24 2006-10-17 14:41:59 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.25 2006-10-22 14:47:14 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -42,6 +42,27 @@ using namespace std;
ConfIndexer *confindexer; ConfIndexer *confindexer;
DbIndexer *dbindexer; DbIndexer *dbindexer;
int stopindexing;
// Mainly used to request indexing stop, we currently do not use the
// current file name
class MyUpdater : public DbIxStatusUpdater {
public:
virtual bool update() {
if (stopindexing) {
return false;
}
return true;
}
};
MyUpdater updater;
static void sigcleanup(int sig)
{
fprintf(stderr, "sigcleanup\n");
LOGDEB(("sigcleanup\n"));
stopindexing = 1;
}
static bool makeDbIndexer(RclConfig *config) static bool makeDbIndexer(RclConfig *config)
{ {
string dbdir = config->getDbDir(); string dbdir = config->getDbDir();
@ -57,7 +78,7 @@ static bool makeDbIndexer(RclConfig *config)
} }
if (!dbindexer) if (!dbindexer)
dbindexer = new DbIndexer(config, dbdir); dbindexer = new DbIndexer(config, dbdir, &updater);
return true; return true;
} }
@ -116,6 +137,40 @@ bool indexfiles(RclConfig *config, const list<string> &filenames)
return dbindexer->indexFiles(myfiles); return dbindexer->indexFiles(myfiles);
} }
// Delete a list of files.
bool purgefiles(RclConfig *config, const list<string> &filenames)
{
if (filenames.empty())
return true;
if (o_tdl.empty()) {
o_tdl = config->getTopdirs();
if (o_tdl.empty()) {
fprintf(stderr, "Top directory list (topdirs param.) "
"not found in config or Directory list parse error");
return false;
}
}
list<string> myfiles;
for (list<string>::const_iterator it = filenames.begin();
it != filenames.end(); it++) {
myfiles.push_back(path_canon(*it));
}
// Note: we should sort the file names against the topdirs here
// and check for different databases. But we can for now only have
// one database per config, so we set the keydir from the first
// file (which is not really needed...), create the indexer/db and
// go:
config->setKeyDir(path_getfather(*myfiles.begin()));
if (!makeDbIndexer(config) || !dbindexer)
return false;
else
return dbindexer->purgeFiles(myfiles);
}
// Create additional stem database // Create additional stem database
static bool createstemdb(RclConfig *config, const string &lang) static bool createstemdb(RclConfig *config, const string &lang)
{ {
@ -134,26 +189,6 @@ static void cleanup()
dbindexer = 0; dbindexer = 0;
} }
int stopindexing;
// Mainly used to request indexing stop, we currently do not use the
// current file name
class MyUpdater : public DbIxStatusUpdater {
public:
virtual bool update() {
if (stopindexing) {
return false;
}
return true;
}
};
MyUpdater updater;
static void sigcleanup(int sig)
{
fprintf(stderr, "sigcleanup\n");
stopindexing = 1;
}
static const char *thisprog; static const char *thisprog;
static int op_flags; static int op_flags;
#define OPT_MOINS 0x1 #define OPT_MOINS 0x1

View File

@ -5,6 +5,8 @@ XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
LIBICONV=@LIBICONV@ LIBICONV=@LIBICONV@
INCICONV=@INCICONV@ INCICONV=@INCICONV@
LIBFAM = @LIBFAM@
prefix = @prefix@ prefix = @prefix@
datadir = @datadir@ datadir = @datadir@
RECOLL_DATADIR = ${datadir}/recoll RECOLL_DATADIR = ${datadir}/recoll

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.80 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.81 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -56,18 +56,21 @@ using namespace std;
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
namespace Rcl { namespace Rcl {
#endif #endif
// Truncate longer path and uniquize with hash . The goal for this is // Max length for path terms stored for each document. Truncate
// to avoid xapian max term length limitations, not to gain space (we // longer path and uniquize with hash. The goal for this is to avoid
// gain very little even with very short maxlens like 30) // xapian max term length limitations, not to gain space (we gain very
// little even with very short maxlens like 30)
#define PATHHASHLEN 150 #define PATHHASHLEN 150
// Synthetic abstract marker (to discriminate from abstract actually // Synthetic abstract marker (to discriminate from abstract actually
// found in doc) // found in doc)
const static string rclSyntAbs = "?!#@"; const static string rclSyntAbs = "?!#@";
// Data for a xapian database. There could actually be 2 different // A class for data and methods that would have to expose
// ones for indexing or query as there is not much in common. // Xapian-specific stuff if they were in Rcl::Db. There could actually be
// 2 different ones for indexing or query as there is not much in
// common.
class Native { class Native {
public: public:
Db *m_db; Db *m_db;
@ -96,6 +99,35 @@ class Native {
Xapian::docid docid, Xapian::docid docid,
const list<string>& terms); const list<string>& terms);
/** Compute list of subdocuments for a given path (given by hash) */
bool subDocs(const string &hash, vector<Xapian::docid>& docids) {
docids.clear();
string qterm = "Q"+ hash + "|";
Xapian::Database db = m_iswritable ? wdb: db;
Xapian::TermIterator it = db.allterms_begin();
it.skip_to(qterm);
string ermsg;
try {
for (;it != db.allterms_end(); it++) {
// If current term does not begin with qterm or has
// another |, not the same file
if ((*it).find(qterm) != 0 ||
(*it).find_last_of("|") != qterm.length() -1)
break;
docids.push_back(*(db.postlist_begin(*it)));
}
return true;
} catch (const Xapian::Error &e) {
ermsg = e.get_msg().c_str();
} catch (...) {
ermsg= "Unknown error";
}
LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
return false;
}
Native(Db *db) Native(Db *db)
: m_db(db), : m_db(db),
m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0) m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
@ -655,7 +687,6 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
pathHash(filename, hash, PATHHASHLEN); pathHash(filename, hash, PATHHASHLEN);
string pterm = "P" + hash; string pterm = "P" + hash;
const char *ermsg; const char *ermsg;
string qterm = "Q"+ hash + "|";
// Look for all documents with this path. We need to look at all // Look for all documents with this path. We need to look at all
// to set their existence flag. We check the update time on the // to set their existence flag. We check the update time on the
@ -697,20 +728,16 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
m_ndb->updated[*docid] = true; m_ndb->updated[*docid] = true;
// Set the existence flag for all the subdocs (if any) // Set the existence flag for all the subdocs (if any)
Xapian::TermIterator it = m_ndb->wdb.allterms_begin(); vector<Xapian::docid> docids;
it.skip_to(qterm); if (!m_ndb->subDocs(hash, docids)) {
LOGDEB2(("First qterm: [%s]\n", (*it).c_str())); LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
for (;it != m_ndb->wdb.allterms_end(); it++) { return true;
// If current term does not begin with qterm or has another |, not }
// the same file for (vector<Xapian::docid>::iterator it = docids.begin();
if ((*it).find(qterm) != 0 || it != docids.end(); it++) {
(*it).find_last_of("|") != qterm.length() -1) if (*it < m_ndb->updated.size()) {
break; LOGDEB2(("Db::needUpdate: set flag for docid %d\n", *it));
docid = m_ndb->wdb.postlist_begin(*it); m_ndb->updated[*it] = true;
if (*docid < m_ndb->updated.size()) {
LOGDEB2(("Db::needUpdate: set exist flag for docid %d [%s]\n",
*docid, (*it).c_str()));
m_ndb->updated[*docid] = true;
} }
} }
return false; return false;
@ -764,7 +791,9 @@ bool Db::createStemDb(const string& lang)
/** /**
* This is called at the end of an indexing session, to delete the * This is called at the end of an indexing session, to delete the
* documents for files that are no longer there. * documents for files that are no longer there. This can ONLY be called
* after a full file-system tree walk, else the file existence flags will
* be wrong.
*/ */
bool Db::purge() bool Db::purge()
{ {
@ -806,6 +835,47 @@ bool Db::purge()
return true; return true;
} }
/** Delete document(s) for given filename */
bool Db::purgeFile(const string &fn)
{
LOGDEB(("Db:purgeFile: [%s]\n", fn.c_str()));
if (m_ndb == 0)
return false;
Xapian::WritableDatabase db = m_ndb->wdb;
string hash;
pathHash(fn, hash, PATHHASHLEN);
string pterm = "P" + hash;
const char *ermsg = "";
try {
Xapian::PostingIterator docid = db.postlist_begin(pterm);
if (docid == db.postlist_end(pterm))
return true;
LOGDEB(("purgeFile: delete docid %d\n", *docid));
db.delete_document(*docid);
vector<Xapian::docid> docids;
m_ndb->subDocs(hash, docids);
LOGDEB(("purgeFile: subdocs cnt %d\n", docids.size()));
for (vector<Xapian::docid>::iterator it = docids.begin();
it != docids.end(); it++) {
LOGDEB2(("Db::purgeFile: delete subdoc %d\n", *it));
db.delete_document(*it);
}
return true;
} catch (const Xapian::Error &e) {
ermsg = e.get_msg().c_str();
} catch (const string &s) {
ermsg = s.c_str();
} catch (const char *s) {
ermsg = s;
} catch (...) {
ermsg = "Caught unknown exception";
}
if (*ermsg) {
LOGERR(("Db::purgeFile: %s\n", ermsg));
}
return false;
}
// Splitter callback for breaking query into terms // Splitter callback for breaking query into terms
class wsQData : public TextSplitCB { class wsQData : public TextSplitCB {
public: public:
@ -1378,6 +1448,7 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms); return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
} }
// Retrieve document defined by file name and internal path. // Retrieve document defined by file name and internal path.
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc) bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
{ {

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _DB_H_INCLUDED_ #ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_ #define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.37 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rcldb.h,v 1.38 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -123,16 +123,35 @@ class Db {
bool close(); bool close();
bool isopen(); bool isopen();
int docCnt(); /// Return total docs in db /** Return total docs in db */
int docCnt();
// Update-related functions
/* Update-related functions */
/** Add document. The Doc class should have been filled as much as
possible depending on the document type */
bool add(const string &filename, const Doc &doc, const struct stat *stp); bool add(const string &filename, const Doc &doc, const struct stat *stp);
/** Test if the db entry for the given filename/stat is up to date */
bool needUpdate(const string &filename, const struct stat *stp); bool needUpdate(const string &filename, const struct stat *stp);
/** Remove documents that no longer exist in the file system. This
depends on the update map, which is built during
indexation. This should only be called after a full walk of
the file system, else the update map will not be complete, and
many documents will be deleted that shouldn't */
bool purge(); bool purge();
/** Delete document(s) for given filename */
bool purgeFile(const string &filename);
/** Create stem expansion database for given language. */
bool createStemDb(const string &lang); bool createStemDb(const string &lang);
/** Delete stem expansion database for given language. */
bool deleteStemDb(const string &lang); bool deleteStemDb(const string &lang);
// Query-related functions /* Query-related functions */
// Parse query string and initialize query // Parse query string and initialize query
bool setQuery(AdvSearchData &q, int opts = QO_NONE, bool setQuery(AdvSearchData &q, int opts = QO_NONE,
@ -144,11 +163,11 @@ class Db {
// Stem expansion is performed if lang is not empty // Stem expansion is performed if lang is not empty
list<string> completions(const string &s, const string &lang, int max=20); list<string> completions(const string &s, const string &lang, int max=20);
/// Add extra database for querying /** Add extra database for querying */
bool addQueryDb(const string &dir); bool addQueryDb(const string &dir);
/// Remove extra database. if dir == "", remove all. /** Remove extra database. if dir == "", remove all. */
bool rmQueryDb(const string &dir); bool rmQueryDb(const string &dir);
/// Tell if directory seems to hold xapian db /** Tell if directory seems to hold xapian db */
static bool testDbDir(const string &dir); static bool testDbDir(const string &dir);
/** Get document at rank i in current query. /** Get document at rank i in current query.