From 4503971dd0228607239211c20a82481b77e596d2 Mon Sep 17 00:00:00 2001 From: dockes Date: Fri, 13 Nov 2009 09:07:18 +0000 Subject: [PATCH] integrate beaglequeueindexer for indexing. Work remains for indexfiles() at least --- src/index/beaglequeue.cpp | 233 +++++++++++++++++++++++++++++++++----- src/index/beaglequeue.h | 19 +++- src/index/fsindexer.cpp | 3 +- src/index/fsindexer.h | 2 +- src/index/indexer.cpp | 119 +++++++++++++++---- src/index/indexer.h | 36 +++--- src/index/recollindex.cpp | 99 ++++------------ 7 files changed, 357 insertions(+), 154 deletions(-) diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index e323d59b..a4911876 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -17,6 +17,10 @@ #ifndef lint static char rcsid[] = "@(#$Id: $ (C) 2005 J.F.Dockes"; #endif +#include "autoconfig.h" + +#include + #include "autoconfig.h" #include "pathut.h" #include "debuglog.h" @@ -27,9 +31,14 @@ static char rcsid[] = "@(#$Id: $ (C) 2005 J.F.Dockes"; #include "internfile.h" #include "wipedir.h" #include "circache.h" +#include "indexer.h" +#include "readfile.h" +#include "conftree.h" +#include "transcode.h" #include #include +#include using namespace std; #include @@ -42,9 +51,7 @@ class BeagleDotFile { public: BeagleDotFile(RclConfig *conf, const string& fn) : m_conf(conf), m_fn(fn) - { - - } + { } bool readLine(string& line) { @@ -92,13 +99,20 @@ public: return false; doc.mimetype = line; - if (doc.mimetype.empty() && - !stringlowercmp("bookmark", doc.meta[keybght])) - doc.mimetype = "text/plain"; + // We set the bookmarks mtype as html, the text is empty + // anyway, so that the html viewer will be called on 'Open' + bool isbookmark = false; + if (!stringlowercmp("bookmark", doc.meta[keybght])) { + isbookmark = true; + doc.mimetype = "text/html"; + } string confstr; string ss(" "); - // Read the rest: fields and keywords + // Read the rest: fields and keywords. We do a little + // massaging of the input lines, then use a ConfSimple to + // parse, and finally insert the key/value pairs into the doc + // meta[] array for (;;) { if (!readLine(line)) { // Eof hopefully @@ -109,7 +123,6 @@ public: line = line.substr(2); confstr += line + "\n"; } - ConfSimple fields(confstr, 1); list names = fields.getNames(""); for (list::iterator it = names.begin(); @@ -118,23 +131,50 @@ public: fields.get(*it, value, ""); if (!value.compare("undefined") || !value.compare("null")) continue; + + string *valuep = &value; + string cvalue; + if (isbookmark) { + // It appears that bookmarks are stored in the users' + // locale charset (not too sure). No idea what to do + // for other types, would have to check the plugin. + string charset = m_conf->getDefCharset(true); + transcode(value, cvalue, charset, "UTF-8"); + valuep = &cvalue; + } + string caname = m_conf->fieldCanon(*it); - doc.meta[caname].append(ss + value); + doc.meta[caname].append(ss + *valuep); } + + // Finally build the confsimple that we will save to the + // cache, out of document fields. This could also be done in + // parallel with the doc.meta build above, but simpler this way. + for (map::const_iterator it = doc.meta.begin(); + it != doc.meta.end(); it++) { + m_fields.set((*it).first, (*it).second, ""); + } + m_fields.set("url", doc.url, ""); + m_fields.set("mimetype", doc.mimetype, ""); + return true; } RclConfig *m_conf; + ConfSimple m_fields; string m_fn; ifstream m_input; }; const string badtmpdirname = "/no/such/dir/really/can/exist"; -BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf) - : m_config(cnf), m_db(cnf) +BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, + DbIxStatusUpdater *updfunc) + : m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc) { + if (!m_config->getConfParam("beaglequeuedir", m_queuedir)) m_queuedir = path_tildexpand("~/.beagle/ToIndex"); + if (m_tmpdir.empty() || access(m_tmpdir.c_str(), 0) < 0) { string reason; if (!maketmpdir(m_tmpdir, reason)) { @@ -143,12 +183,20 @@ BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf) m_tmpdir = badtmpdirname; } } - Rcl::Db::OpenMode mode = Rcl::Db::DbUpd; - if (!m_db.open(mode)) { - LOGERR(("BeagleQueueIndexer: error opening database %s\n", - m_config->getDbDir().c_str())); - return; - } + + string ccdir; + m_config->getConfParam("webcachedir", ccdir); + if (ccdir.empty()) + ccdir = "webcache"; + ccdir = path_tildexpand(ccdir); + // If not an absolute path, compute relative to config dir + if (ccdir.at(0) != '/') + ccdir = path_cat(m_config->getConfDir(), ccdir); + + int maxmbs = 20; + m_config->getConfParam("webcachemaxmbs", &maxmbs); + m_cache = new CirCache(ccdir); + m_cache->create(off_t(maxmbs)*1000*1024, true); } BeagleQueueIndexer::~BeagleQueueIndexer() @@ -161,13 +209,106 @@ BeagleQueueIndexer::~BeagleQueueIndexer() m_tmpdir.c_str())); } } - m_db.close(); + deleteZ(m_cache); } -bool BeagleQueueIndexer::processqueue() +bool BeagleQueueIndexer::indexFromCache(const string& udi) +{ + string dict, data; + + // This is horribly inefficient and needs fixing either by saving + // the offsets during the forward scan, or using an auxiliary isam + // map + if (!m_cache->get(udi, dict, data)) + return false; + + ConfSimple cf(dict, 1); + + string hittype; + if (!cf.get(keybght, hittype, "")) { + LOGERR(("BeagleIndexer::index: cc entry has no hit type\n")); + return false; + } + + // Build a doc from saved metadata + Rcl::Doc dotdoc; + cf.get("url", dotdoc.url, ""); + cf.get("mimetype", dotdoc.mimetype, ""); + cf.get("fmtime", dotdoc.fmtime, ""); + cf.get("fbytes", dotdoc.fbytes, ""); + dotdoc.sig = ""; + list names = cf.getNames(""); + for (list::const_iterator it = names.begin(); + it != names.end(); it++) { + cf.get(*it, dotdoc.meta[*it], ""); + } + + if (!stringlowercmp("bookmark", hittype)) { + // Just index the dotdoc + return m_db->addOrUpdate(udi, "", dotdoc); + } else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) || + (dotdoc.mimetype.compare("text/html") && + dotdoc.mimetype.compare("text/plain"))) { + LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n", + dotdoc.meta[keybght].c_str(), dotdoc.mimetype.c_str())); + return true; + } else { + Rcl::Doc doc; + FileInterner interner(data, m_config, m_tmpdir, + FileInterner::FIF_doUseInputMimetype, + dotdoc.mimetype); + string ipath; + FileInterner::Status fis = interner.internfile(doc, ipath); + if (fis != FileInterner::FIDone) { + LOGERR(("BeagleQueueIndexer: bad status from internfile\n")); + return false; + } + + doc.mimetype = dotdoc.mimetype; + doc.fmtime = dotdoc.fmtime; + doc.url = dotdoc.url; + doc.fbytes = dotdoc.fbytes; + doc.sig = ""; + + return m_db->addOrUpdate(udi, "", doc); + } +} + +bool BeagleQueueIndexer::index() { LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n", m_queuedir.c_str())); + m_config->setKeyDir(m_queuedir); + + // First walk the cache to set the existence flags. We do not + // actually check uptodateness because all files in the cache are + // supposedly already indexed. + //TBD: change this as the cache needs reindexing after an index reset! + // Also, we need to read the cache backwards so that the newest + // version of each file gets indexed? Or find a way to index + // multiple versions ? + bool eof; + if (!m_cache->rewind(eof)) { + if (!eof) + return false; + } + vector alludis; + alludis.reserve(20000); + while (m_cache->next(eof)) { + string dict; + m_cache->getcurrentdict(dict); + ConfSimple cf(dict, 1); + string udi; + if (!cf.get("udi", udi, "")) + continue; + alludis.push_back(udi); + } + for (vector::reverse_iterator it = alludis.rbegin(); + it != alludis.rend(); it++) { + if (m_db->needUpdate(*it, "")) { + indexFromCache(*it); + } + } FsTreeWalker walker(FsTreeWalker::FtwNoRecurse); walker.addSkippedName(".*"); @@ -181,12 +322,15 @@ BeagleQueueIndexer::processone(const string &path, const struct stat *stp, FsTreeWalker::CbFlag flg) { + bool dounlink = false; + if (flg != FsTreeWalker::FtwRegular) return FsTreeWalker::FtwOk; string dotpath = path_cat(path_getfather(path), string(".") + path_getsimple(path)); LOGDEB(("BeagleQueueIndexer: prc1: [%s]\n", path.c_str())); + BeagleDotFile dotfile(m_config, dotpath); Rcl::Doc dotdoc; string udi, udipath; @@ -205,12 +349,32 @@ BeagleQueueIndexer::processone(const string &path, // We only process bookmarks or text/html and text/plain files. if (!stringlowercmp("bookmark", dotdoc.meta[keybght])) { + // For bookmarks, we just index the doc that was built from the + // metadata. + if (dotdoc.fmtime.empty()) + dotdoc.fmtime = ascdate; + + char cbuf[100]; + sprintf(cbuf, "%ld", (long)stp->st_size); + dotdoc.fbytes = cbuf; + + // Document signature for up to date checks: none. + dotdoc.sig = ""; + + // doc fields not in meta, needing saving to the cache + dotfile.m_fields.set("fmtime", dotdoc.fmtime, ""); + dotfile.m_fields.set("fbytes", dotdoc.fbytes, ""); + + if (!m_db->addOrUpdate(udi, "", dotdoc)) + return FsTreeWalker::FtwError; } else if (stringlowercmp("webhistory", dotdoc.meta[keybght]) || (dotdoc.mimetype.compare("text/html") && dotdoc.mimetype.compare("text/plain"))) { LOGDEB(("BeagleQueueIndexer: skipping: hittype %s mimetype %s\n", dotdoc.meta[keybght].c_str(), dotdoc.mimetype.c_str())); + // Unlink them anyway + dounlink = true; goto out; } else { Rcl::Doc doc; @@ -230,17 +394,34 @@ BeagleQueueIndexer::processone(const string &path, char cbuf[100]; sprintf(cbuf, "%ld", (long)stp->st_size); doc.fbytes = cbuf; - // Document signature for up to date checks: none. The file is - // going to be deleted anyway. We always reindex what comes in - // the queue. It would probably be possible to extract some - // http data to avoid this. + // Document signature for up to date checks: none. doc.sig = ""; doc.url = dotdoc.url; - if (!m_db.addOrUpdate(udi, "", doc)) + + // doc fields not in meta, needing saving to the cache + dotfile.m_fields.set("fmtime", dotdoc.fmtime, ""); + dotfile.m_fields.set("fbytes", dotdoc.fbytes, ""); + + if (!m_db->addOrUpdate(udi, "", doc)) return FsTreeWalker::FtwError; + } + + // Copy to cache + { + stringstream o; + dotfile.m_fields.write(o); + string fdata; + file_to_string(path, fdata); + if (!m_cache->put(udi, o.str(), fdata)) + goto out; + } + + dounlink = true; out: -// unlink(path.c_str()); -// unlink(dotpath.c_str()); + if (dounlink) { + unlink(path.c_str()); + unlink(dotpath.c_str()); + } return FsTreeWalker::FtwOk; } diff --git a/src/index/beaglequeue.h b/src/index/beaglequeue.h index 55640eb5..31447344 100644 --- a/src/index/beaglequeue.h +++ b/src/index/beaglequeue.h @@ -28,21 +28,30 @@ #include "fstreewalk.h" #include "rcldb.h" +class DbIxStatusUpdater; +class CirCache; + class BeagleQueueIndexer : public FsTreeWalkerCB { public: - BeagleQueueIndexer(RclConfig *cnf); + BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, + DbIxStatusUpdater *updfunc = 0); ~BeagleQueueIndexer(); - bool processqueue(); + bool index(); FsTreeWalker::Status processone(const string &, const struct stat *, FsTreeWalker::CbFlag); private: RclConfig *m_config; - Rcl::Db m_db; - string m_queuedir; - string m_tmpdir; + Rcl::Db *m_db; + CirCache *m_cache; + string m_queuedir; + string m_tmpdir; + DbIxStatusUpdater *m_updater; + + bool indexFromCache(const string& udi); + }; #endif /* _beaglequeue_h_included_ */ diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index f4e97e63..bdffe09f 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -90,7 +90,7 @@ bool FsIndexer::init() } // Recursively index each directory in the topdirs: -bool FsIndexer::index(bool resetbefore) +bool FsIndexer::index() { list topdirs = m_config->getTopdirs(); if (topdirs.empty()) { @@ -376,7 +376,6 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, // We'll change the signature to ensure that the indexing will // be retried every time. - // Internal access path for multi-document files if (ipath.empty()) hadNullIpath = true; diff --git a/src/index/fsindexer.h b/src/index/fsindexer.h index c9d94688..b72fd3e4 100644 --- a/src/index/fsindexer.h +++ b/src/index/fsindexer.h @@ -55,7 +55,7 @@ class FsIndexer : public FsTreeWalkerCB { * We create the temporary directory, open the database, * then call a file system walk for each top-level directory. */ - bool index(bool resetbefore); + bool index(); /** Index a list of files. No db cleaning or stemdb updating */ bool indexFiles(const std::list &files); diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index 2fb42b5a..281aa781 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -28,37 +28,63 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.71 2008-12-17 08:01:40 dockes Exp #include "debuglog.h" #include "indexer.h" +#include "fsindexer.h" +#include "beaglequeue.h" + #ifdef RCL_USE_ASPELL #include "rclaspell.h" #endif +ConfIndexer::ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc) + : m_config(cnf), m_db(cnf), m_fsindexer(0), + m_dobeagle(false), m_beagler(0), + m_updater(updfunc) +{ + m_config->getConfParam("processbeaglequeue", &m_dobeagle); +} + ConfIndexer::~ConfIndexer() { deleteZ(m_fsindexer); + deleteZ(m_beagler); } -bool ConfIndexer::index(bool resetbefore) +bool ConfIndexer::index(bool resetbefore, ixType typestorun) { Rcl::Db::OpenMode mode = resetbefore ? Rcl::Db::DbTrunc : Rcl::Db::DbUpd; if (!m_db.open(mode)) { - LOGERR(("ConfIndexer: error opening database %s\n", - m_config->getDbDir().c_str())); + LOGERR(("ConfIndexer: error opening database %s : %s\n", + m_config->getDbDir().c_str(), m_db.getReason().c_str())); return false; } m_config->setKeyDir(""); - m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); - bool ret = m_fsindexer->index(resetbefore); - deleteZ(m_fsindexer); - - if (m_updater) { - m_updater->status.fn.erase(); - m_updater->status.phase = DbIxStatus::DBIXS_PURGE; - m_updater->update(); + if (typestorun & IxTFs) { + deleteZ(m_fsindexer); + m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); + if (!m_fsindexer || !m_fsindexer->index()) { + return false; + } + } + + if (m_dobeagle && (typestorun & IxTBeagleQueue)) { + deleteZ(m_beagler); + m_beagler = new BeagleQueueIndexer(m_config, &m_db, m_updater); + if (!m_beagler || !m_beagler->index()) { + return false; + } + } + + if (typestorun == IxTAll) { + // Get rid of all database entries that don't exist in the + // filesystem anymore. Only if all *configured* indexers ran. + if (m_updater) { + m_updater->status.fn.erase(); + m_updater->status.phase = DbIxStatus::DBIXS_PURGE; + m_updater->update(); + } + m_db.purge(); } - // Get rid of all database entries that don't exist in the - // filesystem anymore. - m_db.purge(); if (m_updater) { m_updater->status.phase = DbIxStatus::DBIXS_CLOSING; @@ -78,17 +104,55 @@ bool ConfIndexer::index(bool resetbefore) return true; } +bool ConfIndexer::initTopDirs() +{ + if (m_tdl.empty()) { + m_tdl = m_config->getTopdirs(); + if (m_tdl.empty()) { + m_reason = "Top directory list (topdirs param.) " + "not found in config or Directory list parse error"; + return false; + } + } + return true; +} + bool ConfIndexer::indexFiles(const std::list &files) { + if (!initTopDirs()) + return false; + + list myfiles; + for (list::const_iterator it = files.begin(); + it != files.end(); it++) { + string fn = path_canon(*it); + bool ok = false; + // Check that this file name belongs to one of our subtrees + for (list::iterator dit = m_tdl.begin(); + dit != m_tdl.end(); dit++) { + if (fn.find(*dit) == 0) { + myfiles.push_back(fn); + ok = true; + break; + } + } + if (!ok) { + m_reason += string("File ") + fn + string(" not in indexed area\n"); + } + } + if (myfiles.empty()) + return true; + if (!m_db.open(Rcl::Db::DbUpd)) { LOGERR(("ConfIndexer: indexFiles error opening database %s\n", m_config->getDbDir().c_str())); return false; } m_config->setKeyDir(""); - m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); + if (!m_fsindexer) + m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); bool ret = m_fsindexer->indexFiles(files); - deleteZ(m_fsindexer); + // The close would be done in our destructor, but we want status here if (!m_db.close()) { LOGERR(("ConfIndexer::index: error closing database in %s\n", @@ -100,15 +164,26 @@ bool ConfIndexer::indexFiles(const std::list &files) bool ConfIndexer::purgeFiles(const std::list &files) { + if (!initTopDirs()) + return false; + + list myfiles; + for (list::const_iterator it = files.begin(); + it != files.end(); it++) { + myfiles.push_back(path_canon(*it)); + } + if (!m_db.open(Rcl::Db::DbUpd)) { LOGERR(("ConfIndexer: purgeFiles error opening database %s\n", m_config->getDbDir().c_str())); return false; } + m_config->setKeyDir(""); - m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); + if (!m_fsindexer) + m_fsindexer = new FsIndexer(m_config, &m_db, m_updater); bool ret = m_fsindexer->purgeFiles(files); - deleteZ(m_fsindexer); + // The close would be done in our destructor, but we want status here if (!m_db.close()) { LOGERR(("ConfIndexer::index: error closing database in %s\n", @@ -159,7 +234,7 @@ bool ConfIndexer::createStemDb(const string &lang) // module, either from a configuration variable or the NLS environment. bool ConfIndexer::createAspellDict() { - LOGDEB2(("FsIndexer::createAspellDict()\n")); + LOGDEB2(("ConfIndexer::createAspellDict()\n")); #ifdef RCL_USE_ASPELL // For the benefit of the real-time indexer, we only initialize // noaspell from the configuration once. It can then be set to @@ -180,14 +255,14 @@ bool ConfIndexer::createAspellDict() Aspell aspell(m_config); string reason; if (!aspell.init(reason)) { - LOGERR(("FsIndexer::createAspellDict: aspell init failed: %s\n", + LOGERR(("ConfIndexer::createAspellDict: aspell init failed: %s\n", reason.c_str())); noaspell = true; return false; } - LOGDEB(("FsIndexer::createAspellDict: creating dictionary\n")); + LOGDEB(("ConfIndexer::createAspellDict: creating dictionary\n")); if (!aspell.buildDict(m_db, reason)) { - LOGERR(("FsIndexer::createAspellDict: aspell buildDict failed: %s\n", + LOGERR(("ConfIndexer::createAspellDict: aspell buildDict failed: %s\n", reason.c_str())); noaspell = true; return false; diff --git a/src/index/indexer.h b/src/index/indexer.h index 2f09abfe..fc37036b 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -29,10 +29,10 @@ using std::map; #endif #include "rclconfig.h" -#include "fsindexer.h" +#include "rcldb.h" -/* Forward decl for lower level indexing object */ -class DbIndexer; +class FsIndexer; +class BeagleQueueIndexer; class DbIxStatus { public: @@ -55,27 +55,20 @@ class DbIxStatusUpdater { }; /** - The top level indexing object. Processes the configuration, then invokes - file system walking to populate/update the database(s). - - Fiction: - Multiple top-level directories can be listed in the - configuration. Each can be indexed to a different - database. Directories are first grouped by database, then an - internal class (DbIndexer) is used to process each group. - Fact: we've had one db per config forever. The multidb/config code has been - kept around for no good reason, this fiction only affects indexer.cpp + * The top level indexing object. Processes the configuration, then invokes + * file system walking or other to populate/update the database(s). */ class ConfIndexer { public: enum runStatus {IndexerOk, IndexerError}; - ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0) - : m_config(cnf), m_db(cnf), m_fsindexer(0), m_updater(updfunc) - {} + ConfIndexer(RclConfig *cnf, DbIxStatusUpdater *updfunc = 0); virtual ~ConfIndexer(); - /** Worker function: doe the actual indexing */ - bool index(bool resetbefore = false); + // Indexer types. Maybe we'll have something more dynamic one day + enum ixType {IxTNone, IxTFs=1, IxTBeagleQueue=2, + IxTAll = IxTFs | IxTBeagleQueue}; + /** Run indexers */ + bool index(bool resetbefore, ixType typestorun); const string &getReason() {return m_reason;} @@ -101,8 +94,13 @@ class ConfIndexer { RclConfig *m_config; Rcl::Db m_db; FsIndexer *m_fsindexer; - DbIxStatusUpdater *m_updater; + bool m_dobeagle; + BeagleQueueIndexer *m_beagler; + DbIxStatusUpdater *m_updater; string m_reason; + list m_tdl; + + bool initTopDirs(); }; #endif /* _INDEXER_H_INCLUDED_ */ diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 950b5a27..3ffae840 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -83,66 +83,27 @@ static bool makeIndexer(RclConfig *config) { if (!confindexer) confindexer = new ConfIndexer(config, &updater); - return confindexer ? true : false; + if (!confindexer) { + cerr << "Cannot create indexer" << endl; + exit(1); + } + return true; } -// The list of top directories/files wont change during program run, -// let's cache it: -static list o_tdl; - // Index a list of files. We just check that they belong to one of the // topdirs subtrees, and call the indexer method. // // This is called either from the command line or from the monitor. In // this case we're called repeatedly in the same process, and the -// confindexer is only created once by makeIndexer (but the db is -// flushed anyway) +// confindexer is only created once by makeIndexer (but the db closed and +// flushed every time) bool indexfiles(RclConfig *config, const list &filenames) { if (filenames.empty()) return true; - - if (o_tdl.empty()) { - o_tdl = config->getTopdirs(); - if (o_tdl.empty()) { - fprintf(stderr, "Top directory list (topdirs param.) " - "not found in config or Directory list parse error"); - return false; - } - } - - list myfiles; - for (list::const_iterator it = filenames.begin(); - it != filenames.end(); it++) { - string fn = path_canon(*it); - bool ok = false; - // Check that this file name belongs to one of our subtrees - for (list::iterator dit = o_tdl.begin(); - dit != o_tdl.end(); dit++) { - if (fn.find(*dit) == 0) { - myfiles.push_back(fn); - ok = true; - break; - } - } - if (!ok) { - fprintf(stderr, "File %s not in indexed area\n", fn.c_str()); - } - } - if (myfiles.empty()) - return true; - - // Note: we should sort the file names against the topdirs here - // and check for different databases. But we can for now only have - // one database per config, so we set the keydir from the first - // file (which is not really needed...), create the indexer/db and - // go: - config->setKeyDir(path_getfather(*myfiles.begin())); - if (!makeIndexer(config)) return false; - - return confindexer->indexFiles(myfiles); + return confindexer->indexFiles(filenames); } // Delete a list of files. Same comments about call contexts as indexfiles. @@ -150,32 +111,9 @@ bool purgefiles(RclConfig *config, const list &filenames) { if (filenames.empty()) return true; - - if (o_tdl.empty()) { - o_tdl = config->getTopdirs(); - if (o_tdl.empty()) { - fprintf(stderr, "Top directory list (topdirs param.) " - "not found in config or Directory list parse error"); - return false; - } - } - - list myfiles; - for (list::const_iterator it = filenames.begin(); - it != filenames.end(); it++) { - myfiles.push_back(path_canon(*it)); - } - - // Note: we should sort the file names against the topdirs here - // and check for different databases. But we can for now only have - // one database per config, so we set the keydir from the first - // file (which is not really needed...), create the indexer/db and - // go: - config->setKeyDir(path_getfather(*myfiles.begin())); - if (!makeIndexer(config)) return false; - return confindexer->purgeFiles(myfiles); + return confindexer->purgeFiles(filenames); } // Create stemming and spelling databases @@ -343,12 +281,14 @@ int main(int argc, const char **argv) filenames.push_back(*argv++); } } - + bool status; if (op_flags & OPT_i) - exit(!indexfiles(config, filenames)); + status = indexfiles(config, filenames); else - exit(!purgefiles(config, filenames)); - + status = purgefiles(config, filenames); + if (!confindexer->getReason().empty()) + cerr << confindexer->getReason() << endl; + exit(status ? 0 : 1); } else if (op_flags & OPT_l) { if (argc != 0) Usage(); @@ -400,14 +340,15 @@ int main(int argc, const char **argv) exit(!confindexer->createAspellDict()); #endif // ASPELL } else if (op_flags & OPT_b) { - BeagleQueueIndexer beagler(config); - bool status = beagler.processqueue(); - return !status; + cerr << "Not yet" << endl; + return 1; } else { confindexer = new ConfIndexer(config, &updater); - bool status = confindexer->index(rezero); + bool status = confindexer->index(rezero, ConfIndexer::IxTAll); if (!status) cerr << "Indexing failed" << endl; + if (!confindexer->getReason().empty()) + cerr << confindexer->getReason() << endl; return !status; } }