monitor: purge docs for deleted files from db
This commit is contained in:
parent
4269a149b2
commit
4e0d1e2483
18
src/configure
vendored
18
src/configure
vendored
@ -791,9 +791,9 @@ Optional Packages:
|
||||
--with-aspell Use aspell spelling package to provide term
|
||||
expansion to other spellings
|
||||
--with-fam Use File Alteration Monitor for almost real time
|
||||
indexing of modified files. Give directory where fam
|
||||
library lives as argument if this is not found by
|
||||
configure.
|
||||
indexing of modified files. Give the fam/gamin
|
||||
library as argument (ie: /usr/lib/libfam.so) if
|
||||
configure does not find the right one.
|
||||
|
||||
Some influential environment variables:
|
||||
CC C compiler command
|
||||
@ -1286,11 +1286,11 @@ case $withFam in
|
||||
no);;
|
||||
yes)
|
||||
for dir in /usr/local/lib /usr/lib;do
|
||||
if test -f $dir/libfam.so ; then famLibDir=$dir;break;fi
|
||||
if test -f $dir/libfam.so ; then famLib=$dir/libfam.so;break;fi
|
||||
done
|
||||
;;
|
||||
*) # The argument should be the path to the fam library
|
||||
famLibDir=$withFam
|
||||
famLib=$withFam
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -1305,14 +1305,18 @@ cat >>confdefs.h <<\_ACEOF
|
||||
#define RCL_USE_FAM 1
|
||||
_ACEOF
|
||||
|
||||
if test X$famLibDir != X ; then
|
||||
if test X$famLib != X ; then
|
||||
famLibDir=`dirname $famLib`
|
||||
famBase=`dirname $famLibDir`
|
||||
famBLib=`basename $famLib .so | sed -e s/lib//`
|
||||
if test ! -f $famBase/include/fam.h ; then
|
||||
{ { echo "$as_me:$LINENO: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&5
|
||||
echo "$as_me: error: fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support" >&2;}
|
||||
{ (exit 1); exit 1; }; }
|
||||
fi
|
||||
LIBFAM="-L$famLibDir -lfam"
|
||||
LIBFAM="-L$famLibDir -l$famBLib"
|
||||
{ echo "$as_me:$LINENO: fam library directive: $LIBFAM" >&5
|
||||
echo "$as_me: fam library directive: $LIBFAM" >&6;}
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define FAM_INCLUDE "$famBase/include/fam.h"
|
||||
|
||||
@ -55,29 +55,32 @@ fi
|
||||
# Real time monitoring with FAM
|
||||
AC_ARG_WITH(fam,
|
||||
AC_HELP_STRING([--with-fam],
|
||||
[Use File Alteration Monitor for almost real time indexing of modified files. Give directory where fam library lives as argument if this is not found by configure.]),
|
||||
[Use File Alteration Monitor for almost real time indexing of modified files. Give the fam/gamin library as argument (ie: /usr/lib/libfam.so) if configure does not find the right one.]),
|
||||
withFam=$withval, withFam=no)
|
||||
case $withFam in
|
||||
no);;
|
||||
yes)
|
||||
for dir in /usr/local/lib /usr/lib;do
|
||||
if test -f $dir/libfam.so ; then famLibDir=$dir;break;fi
|
||||
if test -f $dir/libfam.so ; then famLib=$dir/libfam.so;break;fi
|
||||
done
|
||||
;;
|
||||
*) # The argument should be the path to the fam library
|
||||
famLibDir=$withFam
|
||||
famLib=$withFam
|
||||
;;
|
||||
esac
|
||||
|
||||
if test X$withFam != Xno ; then
|
||||
AC_DEFINE(RCL_MONITOR, 1, [Real time monitoring option])
|
||||
AC_DEFINE(RCL_USE_FAM, 1, [Compile the fam interface])
|
||||
if test X$famLibDir != X ; then
|
||||
if test X$famLib != X ; then
|
||||
famLibDir=`dirname $famLib`
|
||||
famBase=`dirname $famLibDir`
|
||||
famBLib=`basename $famLib .so | sed -e s/lib//`
|
||||
if test ! -f $famBase/include/fam.h ; then
|
||||
AC_MSG_ERROR([fam.h not found in $famBase/include. Specify --with-fam=no to disable fam support])
|
||||
fi
|
||||
LIBFAM="-L$famLibDir -lfam"
|
||||
LIBFAM="-L$famLibDir -l$famBLib"
|
||||
AC_MSG_NOTICE([fam library directive: $LIBFAM])
|
||||
AC_DEFINE_UNQUOTED(FAM_INCLUDE, "$famBase/include/fam.h",
|
||||
[Path to the fam api include file])
|
||||
else
|
||||
|
||||
@ -9,7 +9,8 @@ all: depend $(PROGS) $(BIGLIB)
|
||||
RECOLLINDEX_OBJS= recollindex.o rclmonrcv.o rclmonprc.o $(BIGLIB) $(MIMELIB)
|
||||
recollindex : $(RECOLLINDEX_OBJS)
|
||||
$(CXX) $(ALL_CXXFLAGS) -o recollindex $(RECOLLINDEX_OBJS) \
|
||||
$(BSTATIC) $(LIBXAPIAN) $(LIBICONV) $(BDYNAMIC) -lfam $(LIBSYS)
|
||||
$(BSTATIC) $(LIBXAPIAN) $(LIBICONV) $(BDYNAMIC) \
|
||||
$(LIBFAM) $(LIBSYS)
|
||||
recollindex.o : recollindex.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -c -o recollindex.o $<
|
||||
rclmonrcv.o : rclmonrcv.cpp
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.38 2006-10-16 15:33:08 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.39 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -223,13 +223,13 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
|
||||
if (m_config->getConfParam("idxabsmlen", &abslen))
|
||||
m_db.setAbstractParams(abslen, -1, -1);
|
||||
struct stat stb;
|
||||
if (stat(it->c_str(), &stb) != 0) {
|
||||
LOGERR(("DbIndexer::indexFiles: stat(%s): %s", it->c_str(),
|
||||
if (lstat(it->c_str(), &stb) != 0) {
|
||||
LOGERR(("DbIndexer::indexFiles: lstat(%s): %s", it->c_str(),
|
||||
strerror(errno)));
|
||||
continue;
|
||||
}
|
||||
if (!S_ISREG(stb.st_mode)) {
|
||||
LOGERR(("DbIndexer::indexFiles: %s: not a regular file\n",
|
||||
LOGDEB2(("DbIndexer::indexFiles: %s: not a regular file\n",
|
||||
it->c_str()));
|
||||
continue;
|
||||
}
|
||||
@ -257,7 +257,7 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
|
||||
|
||||
if (processone(*it, &stb, FsTreeWalker::FtwRegular) !=
|
||||
FsTreeWalker::FtwOk) {
|
||||
LOGERR(("DbIndexer::indexFiles: Database error\n"));
|
||||
LOGERR(("DbIndexer::indexFiles: processone failed\n"));
|
||||
return false;
|
||||
}
|
||||
skipped:
|
||||
@ -273,6 +273,31 @@ bool DbIndexer::indexFiles(const list<string> &filenames)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/** Purge docs for given files out of the database */
|
||||
bool DbIndexer::purgeFiles(const list<string> &filenames)
|
||||
{
|
||||
if (!init())
|
||||
return false;
|
||||
|
||||
list<string>::const_iterator it;
|
||||
for (it = filenames.begin(); it != filenames.end(); it++) {
|
||||
if (!m_db.purgeFile(*it)) {
|
||||
LOGERR(("DbIndexer::purgeFiles: Database error\n"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// The close would be done in our destructor, but we want status here
|
||||
if (!m_db.close()) {
|
||||
LOGERR(("DbIndexer::purgefiles: error closing database in %s\n",
|
||||
m_dbdir.c_str()));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// This method gets called for every file and directory found by the
|
||||
/// tree walker.
|
||||
///
|
||||
@ -308,7 +333,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
// without mime type will not be purged from the db, resulting
|
||||
// in possible 'cannot intern file' messages at query time...
|
||||
if (!m_db.needUpdate(fn, stp)) {
|
||||
LOGDEB(("indexfile: up to date: %s\n", fn.c_str()));
|
||||
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
|
||||
if (m_updater) {
|
||||
m_updater->status.fn = fn;
|
||||
if (!m_updater->update()) {
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _INDEXER_H_INCLUDED_
|
||||
#define _INDEXER_H_INCLUDED_
|
||||
/* @(#$Id: indexer.h,v 1.19 2006-10-16 15:33:08 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: indexer.h,v 1.20 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -116,6 +116,9 @@ class DbIndexer : public FsTreeWalkerCB {
|
||||
/** Index a list of files. No db cleaning or stemdb updating */
|
||||
bool indexFiles(const std::list<string> &files);
|
||||
|
||||
/** Purge a list of files. */
|
||||
bool purgeFiles(const std::list<string> &files);
|
||||
|
||||
/** Create stem database for given language */
|
||||
bool createStemDb(const string &lang);
|
||||
|
||||
@ -141,8 +144,9 @@ class DbIndexer : public FsTreeWalkerCB {
|
||||
bool init(bool rst = false);
|
||||
};
|
||||
|
||||
/** Helper method in recollindex.cpp for initial checks/setup to index
|
||||
/** Helper methods in recollindex.cpp for initial checks/setup to index
|
||||
* a list of files (either from the monitor or the command line) */
|
||||
extern bool indexfiles(RclConfig *config, const list<string> &filenames);
|
||||
extern bool purgefiles(RclConfig *config, const list<string> &filenames);
|
||||
|
||||
#endif /* _INDEXER_H_INCLUDED_ */
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
#ifdef RCL_MONITOR
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.2 2006-10-17 14:41:59 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclmonprc.cpp,v 1.3 2006-10-22 14:47:13 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -117,11 +117,13 @@ RclConfig *RclMonEventQueue::getConfig()
|
||||
return m_data->m_config;
|
||||
}
|
||||
|
||||
extern int stopindexing;
|
||||
|
||||
bool RclMonEventQueue::ok()
|
||||
{
|
||||
if (m_data == 0)
|
||||
return false;
|
||||
return m_data->m_ok;
|
||||
return !stopindexing && m_data->m_ok;
|
||||
}
|
||||
|
||||
void RclMonEventQueue::setTerminate()
|
||||
@ -143,11 +145,9 @@ bool RclMonEventQueue::pushEvent(const RclMonEvent &ev)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
pthread_t rcv_thrid;
|
||||
void *rcv_result;
|
||||
extern void *rclMonRcvRun(void *);
|
||||
extern int stopindexing;
|
||||
|
||||
bool startMonitor(RclConfig *conf, bool nofork)
|
||||
{
|
||||
@ -163,7 +163,7 @@ bool startMonitor(RclConfig *conf, bool nofork)
|
||||
LOGDEB(("start_monitoring: entering main loop\n"));
|
||||
while (rclEQ.wait()) {
|
||||
LOGDEB2(("startMonitor: wait returned\n"));
|
||||
if (stopindexing || !rclEQ.ok())
|
||||
if (!rclEQ.ok())
|
||||
break;
|
||||
list<string> modified;
|
||||
list<string> deleted;
|
||||
@ -191,11 +191,13 @@ bool startMonitor(RclConfig *conf, bool nofork)
|
||||
// Unlock queue before processing lists
|
||||
rclEQ.unlock();
|
||||
// Process
|
||||
indexfiles(conf, modified);
|
||||
if (!indexfiles(conf, modified))
|
||||
break;
|
||||
if (!purgefiles(conf, deleted))
|
||||
break;
|
||||
// Lock queue before waiting again
|
||||
rclEQ.lock();
|
||||
}
|
||||
LOGERR(("start_monitoring: rclEQ::wait() failed\n"));
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
#endif // RCL_MONITOR
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#include "autoconfig.h"
|
||||
#ifdef RCL_MONITOR
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.2 2006-10-17 14:41:59 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.3 2006-10-22 14:47:13 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -35,71 +35,92 @@ static char rcsid[] = "@(#$Id: rclmonrcv.cpp,v 1.2 2006-10-17 14:41:59 dockes Ex
|
||||
*/
|
||||
|
||||
|
||||
/** A small virtual interface for monitors. Suitable to let either of
|
||||
fam/gamin/ or raw imonitor hide behind */
|
||||
|
||||
/** A small virtual interface for monitors. Probably suitable to let
|
||||
either of fam/gamin or raw imonitor hide behind */
|
||||
class RclMonitor {
|
||||
public:
|
||||
RclMonitor(){}
|
||||
virtual ~RclMonitor() {}
|
||||
virtual bool addWatch(const string& path, const struct stat&) = 0;
|
||||
virtual bool getEvent(RclMonEvent& ev) = 0;
|
||||
virtual bool getEvent(RclMonEvent& ev, int secs = -1) = 0;
|
||||
virtual bool ok() = 0;
|
||||
};
|
||||
// Monitor factory
|
||||
|
||||
// Monitor factory. We only have one compiled-in kind at a time, no
|
||||
// need for a 'kind' parameter
|
||||
static RclMonitor *makeMonitor();
|
||||
|
||||
/** Class used to create the directory watches */
|
||||
/** This class is a callback for the file system tree walker
|
||||
class. The callback method alternatively creates the directory
|
||||
watches and flushes the event queue (to avoid a possible overflow
|
||||
while we create the watches)*/
|
||||
class WalkCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
WalkCB(RclConfig *conf, RclMonitor *mon)
|
||||
: m_conf(conf), m_mon(mon)
|
||||
WalkCB(RclConfig *conf, RclMonitor *mon, RclMonEventQueue *queue)
|
||||
: m_conf(conf), m_mon(mon), m_queue(queue)
|
||||
{}
|
||||
virtual ~WalkCB()
|
||||
{}
|
||||
|
||||
virtual FsTreeWalker::Status
|
||||
processone(const string &fn, const struct stat *st,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
processone(const string &fn, const struct stat *st, FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
LOGDEB2(("rclMonRcvRun: processone %s m_mon %p m_mon->ok %d\n",
|
||||
fn.c_str(), m_mon, m_mon?m_mon->ok():0));
|
||||
// Create watch when entering directory
|
||||
if (flg == FsTreeWalker::FtwDirEnter) {
|
||||
// Empty whatever events we may already have on queue
|
||||
while (m_queue->ok() && m_mon->ok()) {
|
||||
RclMonEvent ev;
|
||||
if (m_mon->getEvent(ev, 0)) {
|
||||
m_queue->pushEvent(ev);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!m_mon || !m_mon->ok() || !m_mon->addWatch(fn, *st))
|
||||
return FsTreeWalker::FtwError;
|
||||
}
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
private:
|
||||
RclConfig *m_conf;
|
||||
RclMonitor *m_mon;
|
||||
RclConfig *m_conf;
|
||||
RclMonitor *m_mon;
|
||||
RclMonEventQueue *m_queue;
|
||||
};
|
||||
|
||||
/** Main thread routine: create watches, then wait for events an queue them */
|
||||
/** Main thread routine: create watches, then forever wait for and queue events */
|
||||
void *rclMonRcvRun(void *q)
|
||||
{
|
||||
RclMonEventQueue *queue = (RclMonEventQueue *)q;
|
||||
RclMonitor *mon;
|
||||
|
||||
LOGDEB(("rclMonRcvRun: running\n"));
|
||||
|
||||
// Create the fam/whatever interface object
|
||||
RclMonitor *mon;
|
||||
if ((mon = makeMonitor()) == 0) {
|
||||
LOGERR(("rclMonRcvRun: makeMonitor failed\n"));
|
||||
rclEQ.setTerminate();
|
||||
queue->setTerminate();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get top directories from config and walk trees to add watches
|
||||
FsTreeWalker walker;
|
||||
WalkCB walkcb(queue->getConfig(), mon);
|
||||
// Get top directories from config
|
||||
list<string> tdl = queue->getConfig()->getTopdirs();
|
||||
if (tdl.empty()) {
|
||||
LOGERR(("rclMonRcvRun:: top directory list (topdirs param.) not"
|
||||
"found in config or Directory list parse error"));
|
||||
rclEQ.setTerminate();
|
||||
queue->setTerminate();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Walk the directory trees to add watches
|
||||
FsTreeWalker walker;
|
||||
WalkCB walkcb(queue->getConfig(), mon, queue);
|
||||
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||
queue->getConfig()->setKeyDir(*it);
|
||||
// Adjust the skipped names according to config
|
||||
walker.clearSkippedNames();
|
||||
string skipped;
|
||||
if (queue->getConfig()->getConfParam("skippedNames", skipped)) {
|
||||
@ -112,19 +133,16 @@ void *rclMonRcvRun(void *q)
|
||||
}
|
||||
|
||||
// Forever wait for monitoring events and add them to queue:
|
||||
LOGDEB2(("rclMonRcvRun: waiting for events. rclEQ.ok() %d\n", rclEQ.ok()));
|
||||
while (rclEQ.ok()) {
|
||||
if (!mon->ok())
|
||||
break;
|
||||
LOGDEB2(("rclMonRcvRun: waiting for events. queue->ok() %d\n", queue->ok()));
|
||||
while (queue->ok() && mon->ok()) {
|
||||
RclMonEvent ev;
|
||||
if (mon->getEvent(ev)) {
|
||||
rclEQ.pushEvent(ev);
|
||||
queue->pushEvent(ev);
|
||||
}
|
||||
if (!mon->ok())
|
||||
break;
|
||||
}
|
||||
|
||||
LOGDEB(("rclMonRcvRun: exiting\n"));
|
||||
rclEQ.setTerminate();
|
||||
queue->setTerminate();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -133,6 +151,7 @@ void *rclMonRcvRun(void *q)
|
||||
#include <fam.h>
|
||||
#include <sys/select.h>
|
||||
|
||||
// Translate event code to string (debug)
|
||||
static const char *event_name(int code)
|
||||
{
|
||||
static const char *famevent[] = {
|
||||
@ -149,21 +168,22 @@ static const char *event_name(int code)
|
||||
};
|
||||
static char unknown_event[20];
|
||||
|
||||
if (code < FAMChanged || code > FAMEndExist)
|
||||
{
|
||||
if (code < FAMChanged || code > FAMEndExist) {
|
||||
sprintf(unknown_event, "unknown (%d)", code);
|
||||
return unknown_event;
|
||||
}
|
||||
return famevent[code];
|
||||
}
|
||||
|
||||
// FAM based monitor class
|
||||
/** FAM based monitor class. We have to keep a record of FAM watch
|
||||
request numbers to directory names as the event only contain the
|
||||
request number and file name, not the full path */
|
||||
class RclFAM : public RclMonitor {
|
||||
public:
|
||||
RclFAM();
|
||||
virtual ~RclFAM();
|
||||
virtual bool addWatch(const string& path, const struct stat& st);
|
||||
virtual bool getEvent(RclMonEvent& ev);
|
||||
virtual bool getEvent(RclMonEvent& ev, int secs = -1);
|
||||
bool ok() {return m_ok;}
|
||||
|
||||
private:
|
||||
@ -213,7 +233,7 @@ bool RclFAM::addWatch(const string& path, const struct stat& st)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RclFAM::getEvent(RclMonEvent& ev)
|
||||
bool RclFAM::getEvent(RclMonEvent& ev, int secs)
|
||||
{
|
||||
if (!ok())
|
||||
return false;
|
||||
@ -224,16 +244,22 @@ bool RclFAM::getEvent(RclMonEvent& ev)
|
||||
FD_ZERO(&readfds);
|
||||
FD_SET(fam_fd, &readfds);
|
||||
|
||||
// Note: can't see a reason to set a timeout. Only reason we might
|
||||
// want out is signal which will break the select call anyway (I
|
||||
// don't think that there is any system still using the old bsd-type
|
||||
// syscall re-entrance after signal).
|
||||
LOGDEB(("RclFAM::getEvent: select\n"));
|
||||
if (select(fam_fd + 1, &readfds, 0, 0, 0) < 0) {
|
||||
struct timeval timeout;
|
||||
if (secs >= 0) {
|
||||
memset(&timeout, 0, sizeof(timeout));
|
||||
timeout.tv_sec = secs;
|
||||
}
|
||||
int ret;
|
||||
if ((ret=select(fam_fd + 1, &readfds, 0, 0, secs >= 0 ? &timeout : 0)) < 0) {
|
||||
LOGERR(("RclFAM::getEvent: select failed, errno %d\n", errno));
|
||||
close();
|
||||
return false;
|
||||
} else if (ret == 0) {
|
||||
// timeout
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!FD_ISSET(fam_fd, &readfds))
|
||||
return false;
|
||||
|
||||
@ -243,8 +269,10 @@ bool RclFAM::getEvent(RclMonEvent& ev)
|
||||
close();
|
||||
return false;
|
||||
}
|
||||
|
||||
map<int,string>::const_iterator it;
|
||||
if ((it = m_reqtodir.find(fe.fr.reqnum)) != m_reqtodir.end()) {
|
||||
if ((fe.filename[0] != '/') &&
|
||||
(it = m_reqtodir.find(fe.fr.reqnum)) != m_reqtodir.end()) {
|
||||
ev.m_path = path_cat(it->second, fe.filename);
|
||||
} else {
|
||||
ev.m_path = fe.filename;
|
||||
@ -279,7 +307,7 @@ bool RclFAM::getEvent(RclMonEvent& ev)
|
||||
return true;
|
||||
}
|
||||
|
||||
// The monitor factory
|
||||
// The monitor 'factory'
|
||||
static RclMonitor *makeMonitor()
|
||||
{
|
||||
return new RclFAM;
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.24 2006-10-17 14:41:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.25 2006-10-22 14:47:14 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -42,6 +42,27 @@ using namespace std;
|
||||
ConfIndexer *confindexer;
|
||||
DbIndexer *dbindexer;
|
||||
|
||||
int stopindexing;
|
||||
// Mainly used to request indexing stop, we currently do not use the
|
||||
// current file name
|
||||
class MyUpdater : public DbIxStatusUpdater {
|
||||
public:
|
||||
virtual bool update() {
|
||||
if (stopindexing) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
MyUpdater updater;
|
||||
|
||||
static void sigcleanup(int sig)
|
||||
{
|
||||
fprintf(stderr, "sigcleanup\n");
|
||||
LOGDEB(("sigcleanup\n"));
|
||||
stopindexing = 1;
|
||||
}
|
||||
|
||||
static bool makeDbIndexer(RclConfig *config)
|
||||
{
|
||||
string dbdir = config->getDbDir();
|
||||
@ -57,7 +78,7 @@ static bool makeDbIndexer(RclConfig *config)
|
||||
}
|
||||
|
||||
if (!dbindexer)
|
||||
dbindexer = new DbIndexer(config, dbdir);
|
||||
dbindexer = new DbIndexer(config, dbdir, &updater);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -116,6 +137,40 @@ bool indexfiles(RclConfig *config, const list<string> &filenames)
|
||||
return dbindexer->indexFiles(myfiles);
|
||||
}
|
||||
|
||||
// Delete a list of files.
|
||||
bool purgefiles(RclConfig *config, const list<string> &filenames)
|
||||
{
|
||||
if (filenames.empty())
|
||||
return true;
|
||||
|
||||
if (o_tdl.empty()) {
|
||||
o_tdl = config->getTopdirs();
|
||||
if (o_tdl.empty()) {
|
||||
fprintf(stderr, "Top directory list (topdirs param.) "
|
||||
"not found in config or Directory list parse error");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
list<string> myfiles;
|
||||
for (list<string>::const_iterator it = filenames.begin();
|
||||
it != filenames.end(); it++) {
|
||||
myfiles.push_back(path_canon(*it));
|
||||
}
|
||||
|
||||
// Note: we should sort the file names against the topdirs here
|
||||
// and check for different databases. But we can for now only have
|
||||
// one database per config, so we set the keydir from the first
|
||||
// file (which is not really needed...), create the indexer/db and
|
||||
// go:
|
||||
config->setKeyDir(path_getfather(*myfiles.begin()));
|
||||
|
||||
if (!makeDbIndexer(config) || !dbindexer)
|
||||
return false;
|
||||
else
|
||||
return dbindexer->purgeFiles(myfiles);
|
||||
}
|
||||
|
||||
// Create additional stem database
|
||||
static bool createstemdb(RclConfig *config, const string &lang)
|
||||
{
|
||||
@ -134,26 +189,6 @@ static void cleanup()
|
||||
dbindexer = 0;
|
||||
}
|
||||
|
||||
int stopindexing;
|
||||
// Mainly used to request indexing stop, we currently do not use the
|
||||
// current file name
|
||||
class MyUpdater : public DbIxStatusUpdater {
|
||||
public:
|
||||
virtual bool update() {
|
||||
if (stopindexing) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
MyUpdater updater;
|
||||
|
||||
static void sigcleanup(int sig)
|
||||
{
|
||||
fprintf(stderr, "sigcleanup\n");
|
||||
stopindexing = 1;
|
||||
}
|
||||
|
||||
static const char *thisprog;
|
||||
static int op_flags;
|
||||
#define OPT_MOINS 0x1
|
||||
|
||||
@ -5,6 +5,8 @@ XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
|
||||
LIBICONV=@LIBICONV@
|
||||
INCICONV=@INCICONV@
|
||||
|
||||
LIBFAM = @LIBFAM@
|
||||
|
||||
prefix = @prefix@
|
||||
datadir = @datadir@
|
||||
RECOLL_DATADIR = ${datadir}/recoll
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.80 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.81 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -56,18 +56,21 @@ using namespace std;
|
||||
#ifndef NO_NAMESPACES
|
||||
namespace Rcl {
|
||||
#endif
|
||||
|
||||
// Truncate longer path and uniquize with hash . The goal for this is
|
||||
// to avoid xapian max term length limitations, not to gain space (we
|
||||
// gain very little even with very short maxlens like 30)
|
||||
|
||||
// Max length for path terms stored for each document. Truncate
|
||||
// longer path and uniquize with hash. The goal for this is to avoid
|
||||
// xapian max term length limitations, not to gain space (we gain very
|
||||
// little even with very short maxlens like 30)
|
||||
#define PATHHASHLEN 150
|
||||
|
||||
// Synthetic abstract marker (to discriminate from abstract actually
|
||||
// found in doc)
|
||||
const static string rclSyntAbs = "?!#@";
|
||||
|
||||
// Data for a xapian database. There could actually be 2 different
|
||||
// ones for indexing or query as there is not much in common.
|
||||
// A class for data and methods that would have to expose
|
||||
// Xapian-specific stuff if they were in Rcl::Db. There could actually be
|
||||
// 2 different ones for indexing or query as there is not much in
|
||||
// common.
|
||||
class Native {
|
||||
public:
|
||||
Db *m_db;
|
||||
@ -96,6 +99,35 @@ class Native {
|
||||
Xapian::docid docid,
|
||||
const list<string>& terms);
|
||||
|
||||
/** Compute list of subdocuments for a given path (given by hash) */
|
||||
bool subDocs(const string &hash, vector<Xapian::docid>& docids) {
|
||||
|
||||
docids.clear();
|
||||
string qterm = "Q"+ hash + "|";
|
||||
Xapian::Database db = m_iswritable ? wdb: db;
|
||||
Xapian::TermIterator it = db.allterms_begin();
|
||||
it.skip_to(qterm);
|
||||
string ermsg;
|
||||
try {
|
||||
for (;it != db.allterms_end(); it++) {
|
||||
// If current term does not begin with qterm or has
|
||||
// another |, not the same file
|
||||
if ((*it).find(qterm) != 0 ||
|
||||
(*it).find_last_of("|") != qterm.length() -1)
|
||||
break;
|
||||
docids.push_back(*(db.postlist_begin(*it)));
|
||||
}
|
||||
return true;
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg().c_str();
|
||||
} catch (...) {
|
||||
ermsg= "Unknown error";
|
||||
}
|
||||
LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
Native(Db *db)
|
||||
: m_db(db),
|
||||
m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
|
||||
@ -655,7 +687,6 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
pathHash(filename, hash, PATHHASHLEN);
|
||||
string pterm = "P" + hash;
|
||||
const char *ermsg;
|
||||
string qterm = "Q"+ hash + "|";
|
||||
|
||||
// Look for all documents with this path. We need to look at all
|
||||
// to set their existence flag. We check the update time on the
|
||||
@ -697,20 +728,16 @@ bool Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
m_ndb->updated[*docid] = true;
|
||||
|
||||
// Set the existence flag for all the subdocs (if any)
|
||||
Xapian::TermIterator it = m_ndb->wdb.allterms_begin();
|
||||
it.skip_to(qterm);
|
||||
LOGDEB2(("First qterm: [%s]\n", (*it).c_str()));
|
||||
for (;it != m_ndb->wdb.allterms_end(); it++) {
|
||||
// If current term does not begin with qterm or has another |, not
|
||||
// the same file
|
||||
if ((*it).find(qterm) != 0 ||
|
||||
(*it).find_last_of("|") != qterm.length() -1)
|
||||
break;
|
||||
docid = m_ndb->wdb.postlist_begin(*it);
|
||||
if (*docid < m_ndb->updated.size()) {
|
||||
LOGDEB2(("Db::needUpdate: set exist flag for docid %d [%s]\n",
|
||||
*docid, (*it).c_str()));
|
||||
m_ndb->updated[*docid] = true;
|
||||
vector<Xapian::docid> docids;
|
||||
if (!m_ndb->subDocs(hash, docids)) {
|
||||
LOGERR(("Rcl::Db::needUpdate: can't get subdocs list\n"));
|
||||
return true;
|
||||
}
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
if (*it < m_ndb->updated.size()) {
|
||||
LOGDEB2(("Db::needUpdate: set flag for docid %d\n", *it));
|
||||
m_ndb->updated[*it] = true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
@ -764,7 +791,9 @@ bool Db::createStemDb(const string& lang)
|
||||
|
||||
/**
|
||||
* This is called at the end of an indexing session, to delete the
|
||||
* documents for files that are no longer there.
|
||||
* documents for files that are no longer there. This can ONLY be called
|
||||
* after a full file-system tree walk, else the file existence flags will
|
||||
* be wrong.
|
||||
*/
|
||||
bool Db::purge()
|
||||
{
|
||||
@ -806,6 +835,47 @@ bool Db::purge()
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Delete document(s) for given filename */
|
||||
bool Db::purgeFile(const string &fn)
|
||||
{
|
||||
LOGDEB(("Db:purgeFile: [%s]\n", fn.c_str()));
|
||||
if (m_ndb == 0)
|
||||
return false;
|
||||
Xapian::WritableDatabase db = m_ndb->wdb;
|
||||
string hash;
|
||||
pathHash(fn, hash, PATHHASHLEN);
|
||||
string pterm = "P" + hash;
|
||||
const char *ermsg = "";
|
||||
try {
|
||||
Xapian::PostingIterator docid = db.postlist_begin(pterm);
|
||||
if (docid == db.postlist_end(pterm))
|
||||
return true;
|
||||
LOGDEB(("purgeFile: delete docid %d\n", *docid));
|
||||
db.delete_document(*docid);
|
||||
vector<Xapian::docid> docids;
|
||||
m_ndb->subDocs(hash, docids);
|
||||
LOGDEB(("purgeFile: subdocs cnt %d\n", docids.size()));
|
||||
for (vector<Xapian::docid>::iterator it = docids.begin();
|
||||
it != docids.end(); it++) {
|
||||
LOGDEB2(("Db::purgeFile: delete subdoc %d\n", *it));
|
||||
db.delete_document(*it);
|
||||
}
|
||||
return true;
|
||||
} catch (const Xapian::Error &e) {
|
||||
ermsg = e.get_msg().c_str();
|
||||
} catch (const string &s) {
|
||||
ermsg = s.c_str();
|
||||
} catch (const char *s) {
|
||||
ermsg = s;
|
||||
} catch (...) {
|
||||
ermsg = "Caught unknown exception";
|
||||
}
|
||||
if (*ermsg) {
|
||||
LOGERR(("Db::purgeFile: %s\n", ermsg));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Splitter callback for breaking query into terms
|
||||
class wsQData : public TextSplitCB {
|
||||
public:
|
||||
@ -1378,6 +1448,7 @@ bool Db::getDoc(int exti, Doc &doc, int *percent)
|
||||
return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
|
||||
}
|
||||
|
||||
|
||||
// Retrieve document defined by file name and internal path.
|
||||
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
||||
{
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.37 2006-10-09 16:37:08 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.38 2006-10-22 14:47:13 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -123,16 +123,35 @@ class Db {
|
||||
bool close();
|
||||
bool isopen();
|
||||
|
||||
int docCnt(); /// Return total docs in db
|
||||
/** Return total docs in db */
|
||||
int docCnt();
|
||||
|
||||
// Update-related functions
|
||||
|
||||
/* Update-related functions */
|
||||
|
||||
/** Add document. The Doc class should have been filled as much as
|
||||
possible depending on the document type */
|
||||
bool add(const string &filename, const Doc &doc, const struct stat *stp);
|
||||
|
||||
/** Test if the db entry for the given filename/stat is up to date */
|
||||
bool needUpdate(const string &filename, const struct stat *stp);
|
||||
|
||||
/** Remove documents that no longer exist in the file system. This
|
||||
depends on the update map, which is built during
|
||||
indexation. This should only be called after a full walk of
|
||||
the file system, else the update map will not be complete, and
|
||||
many documents will be deleted that shouldn't */
|
||||
bool purge();
|
||||
|
||||
/** Delete document(s) for given filename */
|
||||
bool purgeFile(const string &filename);
|
||||
|
||||
/** Create stem expansion database for given language. */
|
||||
bool createStemDb(const string &lang);
|
||||
/** Delete stem expansion database for given language. */
|
||||
bool deleteStemDb(const string &lang);
|
||||
|
||||
// Query-related functions
|
||||
/* Query-related functions */
|
||||
|
||||
// Parse query string and initialize query
|
||||
bool setQuery(AdvSearchData &q, int opts = QO_NONE,
|
||||
@ -144,11 +163,11 @@ class Db {
|
||||
// Stem expansion is performed if lang is not empty
|
||||
list<string> completions(const string &s, const string &lang, int max=20);
|
||||
|
||||
/// Add extra database for querying
|
||||
/** Add extra database for querying */
|
||||
bool addQueryDb(const string &dir);
|
||||
/// Remove extra database. if dir == "", remove all.
|
||||
/** Remove extra database. if dir == "", remove all. */
|
||||
bool rmQueryDb(const string &dir);
|
||||
/// Tell if directory seems to hold xapian db
|
||||
/** Tell if directory seems to hold xapian db */
|
||||
static bool testDbDir(const string &dir);
|
||||
|
||||
/** Get document at rank i in current query.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user