From 64d219e047cd1d89f7a938e15636d58443f22804 Mon Sep 17 00:00:00 2001 From: dockes Date: Fri, 13 Nov 2009 09:08:00 +0000 Subject: [PATCH] integrate beaglequeueindexer for indexing. Work remains for indexfiles() at least --- src/utils/circache.cpp | 178 ++++++++++++++++++++++++++++++----------- src/utils/circache.h | 13 ++- 2 files changed, 145 insertions(+), 46 deletions(-) diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp index 2f127580..1a2f9fbe 100644 --- a/src/utils/circache.cpp +++ b/src/utils/circache.cpp @@ -34,6 +34,7 @@ static char rcsid[] = "@(#$Id: $ (C) 2009 J.F.Dockes"; #include "circache.h" #include "conftree.h" #include "debuglog.h" +#include "smallut.h" using namespace std; @@ -85,6 +86,7 @@ public: class CirCacheInternal { public: int m_fd; + ////// These are cache persistent state and written to the first block: // Maximum file size, after which we begin reusing old space off_t m_maxsize; // Offset of the oldest header. @@ -93,12 +95,18 @@ public: off_t m_nheadoffs; // Pad size for newest entry. int m_npadsize; + ///////////////////// End header entries + // A place to hold data when reading char *m_buffer; size_t m_bufsiz; // Error messages ostringstream m_reason; + // State for rewind/next/getcurrent operation + off_t m_itoffs; + EntryHeaderData m_ithd; + CirCacheInternal() : m_fd(-1), m_maxsize(-1), m_oheadoffs(-1), m_nheadoffs(0), m_npadsize(0), m_buffer(0), m_bufsiz(0) @@ -194,6 +202,25 @@ public: return true; } + bool writeentryheader(off_t offset, const EntryHeaderData& d) + { + char *bf = buf(CIRCACHE_HEADER_SIZE); + if (bf == 0) + return false; + memset(bf, 0, CIRCACHE_HEADER_SIZE); + sprintf(bf, headerformat, d.dicsize, d.datasize, d.padsize); + if (lseek(m_fd, offset, 0) != offset) { + m_reason << "CirCache::weh: lseek(" << offset << + ") failed: errno " << errno; + return false; + } + if (write(m_fd, bf, CIRCACHE_HEADER_SIZE) != CIRCACHE_HEADER_SIZE) { + m_reason << "CirCache::weh: write failed. errno " << errno; + return false; + } + return true; + } + CCScanHook::status readentryheader(off_t offset, EntryHeaderData& d) { assert(m_fd >= 0); @@ -226,25 +253,6 @@ public: return CCScanHook::Continue; } - bool writeentryheader(off_t offset, const EntryHeaderData& d) - { - char *bf = buf(CIRCACHE_HEADER_SIZE); - if (bf == 0) - return false; - memset(bf, 0, CIRCACHE_HEADER_SIZE); - sprintf(bf, headerformat, d.dicsize, d.datasize, d.padsize); - if (lseek(m_fd, offset, 0) != offset) { - m_reason << "CirCache::weh: lseek(" << offset << - ") failed: errno " << errno; - return false; - } - if (write(m_fd, bf, CIRCACHE_HEADER_SIZE) != CIRCACHE_HEADER_SIZE) { - m_reason << "CirCache::weh: write failed. errno " << errno; - return false; - } - return true; - } - CCScanHook::status scan(off_t startoffset, CCScanHook *user, bool fold = false) { @@ -304,12 +312,46 @@ public: d.datasize + d.padsize; } } + + bool readDicData(off_t hoffs, unsigned int dicsize, string& dict, + unsigned int datasize, string* data) + { + off_t offs = hoffs + CIRCACHE_HEADER_SIZE; + if (lseek(m_fd, offs, 0) != offs) { + m_reason << "CirCache::get: lseek(" << offs << ") failed: " << + errno; + return false; + } + char *bf = buf(dicsize); + if (bf == 0) + return false; + if (read(m_fd, bf, dicsize) != int(dicsize)) { + m_reason << "CirCache::get: read() failed: errno " << errno; + return false; + } + dict.assign(bf, dicsize); + if (data == 0) + return true; + + bf = buf(datasize); + if (bf == 0) + return false; + if (read(m_fd, bf, datasize) != int(datasize)){ + m_reason << "CirCache::get: read() failed: errno " << errno; + return false; + } + data->assign(bf, datasize); + + return true; + } + }; CirCache::CirCache(const string& dir) : m_dir(dir) { m_d = new CirCacheInternal; + LOGDEB(("CirCache: [%s]\n", m_dir.c_str())); } CirCache::~CirCache() @@ -323,8 +365,9 @@ string CirCache::getReason() return m_d ? m_d->m_reason.str() : "Not initialized"; } -bool CirCache::create(off_t m_maxsize) +bool CirCache::create(off_t m_maxsize, bool onlyifnotexists) { + LOGDEB(("CirCache::create: [%s]\n", m_dir.c_str())); assert(m_d != 0); struct stat st; if (stat(m_dir.c_str(), &st) < 0) { @@ -333,6 +376,9 @@ bool CirCache::create(off_t m_maxsize) ") failed" << " errno " << errno; return false; } + } else { + if (onlyifnotexists) + return open(CC_OPWRITE); } if ((m_d->m_fd = ::open(m_d->datafn(m_dir).c_str(), @@ -460,33 +506,11 @@ bool CirCache::get(const string& udi, string& dict, string& data, int instance) } else if (ret != CCScanHook::Stop) { return false; } - off_t offs = getter.m_offs + CIRCACHE_HEADER_SIZE; - if (lseek(m_d->m_fd, offs, 0) != offs) { - m_d->m_reason << "CirCache::get: lseek(" << offs << ") failed: " << - errno; - return false; - } - char *bf = m_d->buf(getter.m_hd.dicsize); - if (bf == 0) - return false; - if (read(m_d->m_fd, bf, getter.m_hd.dicsize) != int(getter.m_hd.dicsize)) { - m_d->m_reason << "CirCache::get: read() failed: errno " << errno; - return false; - } - dict.assign(bf, getter.m_hd.dicsize); - - bf = m_d->buf(getter.m_hd.datasize); - if (bf == 0) - return false; - if (read(m_d->m_fd, bf, getter.m_hd.datasize) != int(getter.m_hd.datasize)){ - m_d->m_reason << "CirCache::get: read() failed: errno " << errno; - return false; - } - data.assign(bf, getter.m_hd.datasize); - - return true; + return m_d->readDicData(getter.m_offs, getter.m_hd.dicsize, dict, + getter.m_hd.datasize, &data); } + class CCScanHookSpacer : public CCScanHook { public: unsigned int sizewanted; @@ -641,6 +665,70 @@ bool CirCache::put(const string& udi, const string& idic, const string& data) return true; } +bool CirCache::rewind(bool& eof) +{ + assert(m_d != 0); + eof = false; + m_d->m_itoffs = m_d->m_oheadoffs; + CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd); + switch(st) { + case CCScanHook::Eof: + eof = true; + return false; + case CCScanHook::Continue: + return true; + default: + return false; + } +} + +bool CirCache::next(bool& eof) +{ + assert(m_d != 0); + + eof = false; + + m_d->m_itoffs += CIRCACHE_HEADER_SIZE + m_d->m_ithd.dicsize + + m_d->m_ithd.datasize + m_d->m_ithd.padsize; + if (m_d->m_itoffs == m_d->m_oheadoffs) { + eof = true; + return false; + } + + CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd); + if (st == CCScanHook::Eof) { + m_d->m_itoffs = CIRCACHE_FIRSTBLOCK_SIZE; + if (m_d->m_itoffs == m_d->m_oheadoffs) { + eof = true; + return false; + } + st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd); + } + if (st == CCScanHook::Continue) + return true; + return false; +} + +bool CirCache::getcurrentdict(string& dict) +{ + assert(m_d != 0); + if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dict, 0, 0)) + return false; + return true; +} + +bool CirCache::getcurrent(string& udi, string& dict, string& data) +{ + assert(m_d != 0); + if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dict, + m_d->m_ithd.datasize, &data)) + return false; + + ConfSimple conf(dict, 1); + conf.get("udi", udi, ""); + return true; +} + #else // TEST -> #include diff --git a/src/utils/circache.h b/src/utils/circache.h index 2d802b38..5cccc973 100644 --- a/src/utils/circache.h +++ b/src/utils/circache.h @@ -49,7 +49,7 @@ public: virtual string getReason(); - virtual bool create(off_t maxsize); + virtual bool create(off_t maxsize, bool onlyifnotexists = true); enum OpMode {CC_OPREAD, CC_OPWRITE}; virtual bool open(OpMode mode); @@ -59,12 +59,23 @@ public: virtual bool put(const string& udi, const string& dic, const string& data); + /* Maybe we'll have separate iterators one day, but this is good enough for + * now. No put() operations should be performed while using these. + */ + virtual bool rewind(bool& eof); + virtual bool next(bool& eof); + virtual bool getcurrent(string& udi, string& dic, string& data); + virtual bool getcurrentdict(string& dict); + /* Debug */ virtual bool dump(); protected: CirCacheInternal *m_d; string m_dir; +private: + CirCache(const CirCache&) {} + CirCache& operator=(const CirCache&) {return *this;} }; #endif /* _circache_h_included_ */