diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index ee85be9d..79c8414a 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -47,16 +47,20 @@ using namespace std; const string keybght("beagleHitType"); -#define LL 2048 +// Beagle creates a file named .xxx (where xxx is the name for the main file +// in the queue), to hold external metadata (http or created by Beagle). +// This class reads the .xxx, dotfile, and turns it into an Rcl::Doc holder class BeagleDotFile { public: BeagleDotFile(RclConfig *conf, const string& fn) : m_conf(conf), m_fn(fn) - { } + {} + // Read input line, strip it of eol and return as c++ string bool readLine(string& line) { + static const int LL = 2048; char cline[LL]; cline[0] = 0; m_input.getline(cline, LL-1); @@ -101,8 +105,8 @@ public: return false; doc.mimetype = line; - // We set the bookmarks mtype as html, the text is empty - // anyway, so that the html viewer will be called on 'Open' + // We set the bookmarks mtype as html (the text is empty + // anyway), so that the html viewer will be called on 'Open' bool isbookmark = false; if (!stringlowercmp("bookmark", doc.meta[keybght])) { isbookmark = true; @@ -150,8 +154,11 @@ public: } // Finally build the confsimple that we will save to the - // cache, out of document fields. This could also be done in - // parallel with the doc.meta build above, but simpler this way. + // cache, from the doc fields. This could also be done in + // parallel with the doc.meta build above, but simpler this + // way. We need it because not all interesting doc fields are + // in the meta array (ie: mimetype, url), and we want + // something homogenous and easy to save. for (map::const_iterator it = doc.meta.begin(); it != doc.meta.end(); it++) { m_fields.set((*it).first, (*it).second, ""); @@ -169,6 +176,9 @@ public: }; const string badtmpdirname = "/no/such/dir/really/can/exist"; + +// Initialize. Compute paths and create a temporary directory that will be +// used by internfile() BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) : m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc), @@ -216,6 +226,8 @@ BeagleQueueIndexer::~BeagleQueueIndexer() deleteZ(m_cache); } +// Read document from cache. Return the metadata as an Rcl::Doc +// @param htt Beagle Hit Type bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc, string& data, string *htt) { @@ -243,6 +255,7 @@ bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc, return true; } +// Index document stored in the cache. bool BeagleQueueIndexer::indexFromCache(const string& udi) { if (!m_db) @@ -304,18 +317,31 @@ bool BeagleQueueIndexer::index() { if (!m_db) return false; - LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n", - m_queuedir.c_str())); + LOGDEB(("BeagleQueueIndexer::processqueue: [%s]\n", m_queuedir.c_str())); m_config->setKeyDir(m_queuedir); - // First check that files in the cache are in the index, in case this - // has been reset. We don't do this when called from indexFiles + // First check/index files found in the cache. If the index was reset, + // this actually does work, else it sets the existence flags (avoid + // purging). We don't do this when called from indexFiles if (!m_nocacheindex) { bool eof; if (!m_cache->rewind(eof)) { + // rewind can return eof if the cache is empty if (!eof) return false; } + + // The cache is walked in chronogical order, but we want to + // index the newest files first (there can be several versions + // of a given file in the cache). Have to revert the + // list. This would be a problem with a big cache, because the + // udis can be big (ie 150 chars), and would be more + // efficiently performed by the cache, which could use the + // smaller offsets. + // + // Another approach would be to just walk chronogical and + // reindex all versions: would waste processing but save + // memory vector alludis; alludis.reserve(20000); while (m_cache->next(eof)) { @@ -340,6 +366,7 @@ bool BeagleQueueIndexer::index() } } + // Finally index the queue FsTreeWalker walker(FsTreeWalker::FtwNoRecurse); walker.addSkippedName(".*"); FsTreeWalker::Status status =walker.walk(m_queuedir, *this); @@ -347,6 +374,7 @@ bool BeagleQueueIndexer::index() return true; } +// Index a list of files (sent by the real time monitor) bool BeagleQueueIndexer::indexFiles(list& files) { LOGDEB(("BeagleQueueIndexer::indexFiles\n")); @@ -489,7 +517,6 @@ BeagleQueueIndexer::processone(const string &path, return FsTreeWalker::FtwError; } - // Copy to cache { // doc fields not in meta, needing saving to the cache diff --git a/src/mk/FreeBSD b/src/mk/FreeBSD index 480d5d90..e0f0c720 100644 --- a/src/mk/FreeBSD +++ b/src/mk/FreeBSD @@ -2,4 +2,4 @@ include $(depth)/mk/commondefs include $(depth)/mk/localdefs ALL_CXXFLAGS = $(CXXFLAGS) $(COMMONCXXFLAGS) $(LOCALCXXFLAGS) -pthread -LIBSYS = +LIBSYS = -lz