beaglequeue fully functional, small fixes remaining?

This commit is contained in:
dockes 2009-11-18 08:24:06 +00:00
parent 0a9537fef6
commit f5637445d6
2 changed files with 39 additions and 12 deletions

View File

@ -47,16 +47,20 @@ using namespace std;
const string keybght("beagleHitType"); const string keybght("beagleHitType");
#define LL 2048
// Beagle creates a file named .xxx (where xxx is the name for the main file
// in the queue), to hold external metadata (http or created by Beagle).
// This class reads the .xxx, dotfile, and turns it into an Rcl::Doc holder
class BeagleDotFile { class BeagleDotFile {
public: public:
BeagleDotFile(RclConfig *conf, const string& fn) BeagleDotFile(RclConfig *conf, const string& fn)
: m_conf(conf), m_fn(fn) : m_conf(conf), m_fn(fn)
{ } {}
// Read input line, strip it of eol and return as c++ string
bool readLine(string& line) bool readLine(string& line)
{ {
static const int LL = 2048;
char cline[LL]; char cline[LL];
cline[0] = 0; cline[0] = 0;
m_input.getline(cline, LL-1); m_input.getline(cline, LL-1);
@ -101,8 +105,8 @@ public:
return false; return false;
doc.mimetype = line; doc.mimetype = line;
// We set the bookmarks mtype as html, the text is empty // We set the bookmarks mtype as html (the text is empty
// anyway, so that the html viewer will be called on 'Open' // anyway), so that the html viewer will be called on 'Open'
bool isbookmark = false; bool isbookmark = false;
if (!stringlowercmp("bookmark", doc.meta[keybght])) { if (!stringlowercmp("bookmark", doc.meta[keybght])) {
isbookmark = true; isbookmark = true;
@ -150,8 +154,11 @@ public:
} }
// Finally build the confsimple that we will save to the // Finally build the confsimple that we will save to the
// cache, out of document fields. This could also be done in // cache, from the doc fields. This could also be done in
// parallel with the doc.meta build above, but simpler this way. // parallel with the doc.meta build above, but simpler this
// way. We need it because not all interesting doc fields are
// in the meta array (ie: mimetype, url), and we want
// something homogenous and easy to save.
for (map<string,string>::const_iterator it = doc.meta.begin(); for (map<string,string>::const_iterator it = doc.meta.begin();
it != doc.meta.end(); it++) { it != doc.meta.end(); it++) {
m_fields.set((*it).first, (*it).second, ""); m_fields.set((*it).first, (*it).second, "");
@ -169,6 +176,9 @@ public:
}; };
const string badtmpdirname = "/no/such/dir/really/can/exist"; const string badtmpdirname = "/no/such/dir/really/can/exist";
// Initialize. Compute paths and create a temporary directory that will be
// used by internfile()
BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
DbIxStatusUpdater *updfunc) DbIxStatusUpdater *updfunc)
: m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc), : m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc),
@ -216,6 +226,8 @@ BeagleQueueIndexer::~BeagleQueueIndexer()
deleteZ(m_cache); deleteZ(m_cache);
} }
// Read document from cache. Return the metadata as an Rcl::Doc
// @param htt Beagle Hit Type
bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc, bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc,
string& data, string *htt) string& data, string *htt)
{ {
@ -243,6 +255,7 @@ bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc,
return true; return true;
} }
// Index document stored in the cache.
bool BeagleQueueIndexer::indexFromCache(const string& udi) bool BeagleQueueIndexer::indexFromCache(const string& udi)
{ {
if (!m_db) if (!m_db)
@ -304,18 +317,31 @@ bool BeagleQueueIndexer::index()
{ {
if (!m_db) if (!m_db)
return false; return false;
LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n", LOGDEB(("BeagleQueueIndexer::processqueue: [%s]\n", m_queuedir.c_str()));
m_queuedir.c_str()));
m_config->setKeyDir(m_queuedir); m_config->setKeyDir(m_queuedir);
// First check that files in the cache are in the index, in case this // First check/index files found in the cache. If the index was reset,
// has been reset. We don't do this when called from indexFiles // this actually does work, else it sets the existence flags (avoid
// purging). We don't do this when called from indexFiles
if (!m_nocacheindex) { if (!m_nocacheindex) {
bool eof; bool eof;
if (!m_cache->rewind(eof)) { if (!m_cache->rewind(eof)) {
// rewind can return eof if the cache is empty
if (!eof) if (!eof)
return false; return false;
} }
// The cache is walked in chronogical order, but we want to
// index the newest files first (there can be several versions
// of a given file in the cache). Have to revert the
// list. This would be a problem with a big cache, because the
// udis can be big (ie 150 chars), and would be more
// efficiently performed by the cache, which could use the
// smaller offsets.
//
// Another approach would be to just walk chronogical and
// reindex all versions: would waste processing but save
// memory
vector<string> alludis; vector<string> alludis;
alludis.reserve(20000); alludis.reserve(20000);
while (m_cache->next(eof)) { while (m_cache->next(eof)) {
@ -340,6 +366,7 @@ bool BeagleQueueIndexer::index()
} }
} }
// Finally index the queue
FsTreeWalker walker(FsTreeWalker::FtwNoRecurse); FsTreeWalker walker(FsTreeWalker::FtwNoRecurse);
walker.addSkippedName(".*"); walker.addSkippedName(".*");
FsTreeWalker::Status status =walker.walk(m_queuedir, *this); FsTreeWalker::Status status =walker.walk(m_queuedir, *this);
@ -347,6 +374,7 @@ bool BeagleQueueIndexer::index()
return true; return true;
} }
// Index a list of files (sent by the real time monitor)
bool BeagleQueueIndexer::indexFiles(list<string>& files) bool BeagleQueueIndexer::indexFiles(list<string>& files)
{ {
LOGDEB(("BeagleQueueIndexer::indexFiles\n")); LOGDEB(("BeagleQueueIndexer::indexFiles\n"));
@ -489,7 +517,6 @@ BeagleQueueIndexer::processone(const string &path,
return FsTreeWalker::FtwError; return FsTreeWalker::FtwError;
} }
// Copy to cache // Copy to cache
{ {
// doc fields not in meta, needing saving to the cache // doc fields not in meta, needing saving to the cache

View File

@ -2,4 +2,4 @@ include $(depth)/mk/commondefs
include $(depth)/mk/localdefs include $(depth)/mk/localdefs
ALL_CXXFLAGS = $(CXXFLAGS) $(COMMONCXXFLAGS) $(LOCALCXXFLAGS) -pthread ALL_CXXFLAGS = $(CXXFLAGS) $(COMMONCXXFLAGS) $(LOCALCXXFLAGS) -pthread
LIBSYS = LIBSYS = -lz