circache ok

This commit is contained in:
dockes 2009-11-17 14:52:01 +00:00
parent 120cf441ba
commit 1a767213a0
2 changed files with 291 additions and 56 deletions

View File

@ -32,11 +32,13 @@ static char rcsid[] = "@(#$Id: $ (C) 2009 J.F.Dockes";
#include <sstream> #include <sstream>
#include <iostream> #include <iostream>
#include <map>
#include "circache.h" #include "circache.h"
#include "conftree.h" #include "conftree.h"
#include "debuglog.h" #include "debuglog.h"
#include "smallut.h" #include "smallut.h"
#include "md5.h"
using namespace std; using namespace std;
@ -53,13 +55,19 @@ using namespace std;
* *
* There is a write position, which can be at eof while * There is a write position, which can be at eof while
* the file is growing, or inside the file if we are recycling. This is stored * the file is growing, or inside the file if we are recycling. This is stored
* in the header, together with the maximum size * in the header (oheadoffs), together with the maximum size
* *
* If we are recycling, we have to take care to compute the size of the * If we are recycling, we have to take care to compute the size of the
* possible remaining area from the last object invalidated by the write, * possible remaining area from the last object invalidated by the write,
* pad it with neutral data and store the size in the new header. * pad it with neutral data and store the size in the new header. To help with
* this, the address for the last object written is also kept in the header
* (nheadoffs, npadsize)
*
*/ */
typedef unsigned long ULONG;
typedef unsigned int UINT;
// First block size // First block size
#define CIRCACHE_FIRSTBLOCK_SIZE 1024 #define CIRCACHE_FIRSTBLOCK_SIZE 1024
@ -71,9 +79,9 @@ const char *headerformat = "circacheSizes = %x %x %x";
class EntryHeaderData { class EntryHeaderData {
public: public:
EntryHeaderData() : dicsize(0), datasize(0), padsize(0) {} EntryHeaderData() : dicsize(0), datasize(0), padsize(0) {}
unsigned int dicsize; UINT dicsize;
unsigned int datasize; UINT datasize;
unsigned int padsize; UINT padsize;
}; };
// A callback class for the header-hopping function. // A callback class for the header-hopping function.
@ -81,10 +89,61 @@ class CCScanHook {
public: public:
virtual ~CCScanHook() {} virtual ~CCScanHook() {}
enum status {Stop, Continue, Error, Eof}; enum status {Stop, Continue, Error, Eof};
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize, virtual status takeone(off_t offs, const string& udi, UINT dicsize,
unsigned int datasize, unsigned int padsize) = 0; UINT datasize, UINT padsize) = 0;
}; };
// We have an auxiliary in-memory multimap of hashed-udi -> offset to
// speed things up. This is created the first time the file is scanned
// (on the first get), and not saved to disk.
// The map key: hashed udi. As a very short hash seems sufficient,
// maybe we could find something faster/simpler than md5?
#define UDIHLEN 4
class UdiH {
public:
unsigned char h[UDIHLEN];
UdiH(const string& udi)
{
MD5_CTX ctx;
MD5Init(&ctx);
MD5Update(&ctx, (const unsigned char*)udi.c_str(), udi.length());
unsigned char md[16];
MD5Final(md, &ctx);
memcpy(h, md, UDIHLEN);
}
string asHexString() const {
static const char hex[]="0123456789abcdef";
string out;
for (int i = 0; i < UDIHLEN; i++) {
out.append(1, hex[h[i] >> 4]);
out.append(1, hex[h[i] & 0x0f]);
}
return out;
}
bool operator==(const UdiH& r) const
{
for (int i = 0; i < UDIHLEN; i++)
if (h[i] != r.h[i])
return false;
return true;
}
bool operator<(const UdiH& r) const
{
for (int i = 0; i < UDIHLEN; i++) {
if (h[i] < r.h[i])
return true;
if (h[i] > r.h[i])
return false;
}
return false;
}
};
typedef multimap<UdiH, off_t> kh_type;
typedef multimap<UdiH, off_t>::value_type kh_value_type;
class CirCacheInternal { class CirCacheInternal {
public: public:
int m_fd; int m_fd;
@ -102,16 +161,87 @@ public:
// A place to hold data when reading // A place to hold data when reading
char *m_buffer; char *m_buffer;
size_t m_bufsiz; size_t m_bufsiz;
// Error messages // Error messages
ostringstream m_reason; ostringstream m_reason;
// State for rewind/next/getcurrent operation // State for rewind/next/getcurrent operation. This could/should
// be moved to a separate iterator.
off_t m_itoffs; off_t m_itoffs;
EntryHeaderData m_ithd; EntryHeaderData m_ithd;
// Offset cache
kh_type m_ofskh;
bool m_ofskhcplt; // Has cache been fully read since open?
// Add udi->offset translation to map
bool khEnter(const string& udi, off_t ofs)
{
UdiH h(udi);
LOGDEB2(("Circache::khEnter: h %s offs %lu udi [%s]\n",
h.asHexString().c_str(), (ULONG)ofs, udi.c_str()));
pair<kh_type::iterator, kh_type::iterator> p = m_ofskh.equal_range(h);
if (p.first != m_ofskh.end() && p.first->first == h) {
for (kh_type::iterator it = p.first; it != p.second; it++) {
LOGDEB2(("Circache::khEnter: col h %s, ofs %lu\n",
it->first.asHexString().c_str(),
(ULONG)it->second));
if (it->second == ofs) {
// (h,offs) already there. Happens
LOGDEB2(("Circache::khEnter: already there\n"));
return true;
}
}
}
m_ofskh.insert(kh_value_type(h, ofs));
LOGDEB2(("Circache::khEnter: inserted\n"));
return true;
}
void khDump()
{
for (kh_type::const_iterator it = m_ofskh.begin();
it != m_ofskh.end(); it++) {
LOGDEB(("Circache::KHDUMP: %s %d\n",
it->first.asHexString().c_str(), (ULONG)it->second));
}
}
bool khFind(const string& udi, vector<off_t>& ofss)
{
ofss.clear();
UdiH h(udi);
LOGDEB2(("Circache::khFind: h %s udi [%s]\n",
h.asHexString().c_str(), udi.c_str()));
pair<kh_type::iterator, kh_type::iterator> p =
m_ofskh.equal_range(h);
#if 0
if (p.first == m_ofskh.end()) LOGDEB(("KHFIND: FIRST END()\n"));
if (p.second == m_ofskh.end()) LOGDEB(("KHFIND: SECOND END()\n"));
if (!(p.first->first == h))
LOGDEB(("KHFIND: NOKEY: %s %s\n",
p.first->first.asHexString().c_str(),
p.second->first.asHexString().c_str()));
#endif
if (p.first == m_ofskh.end() || !(p.first->first == h))
return false;
for (kh_type::iterator it = p.first; it != p.second; it++) {
ofss.push_back(it->second);
}
return true;
}
CirCacheInternal() CirCacheInternal()
: m_fd(-1), m_maxsize(-1), m_oheadoffs(-1), : m_fd(-1), m_maxsize(-1), m_oheadoffs(-1),
m_nheadoffs(0), m_npadsize(0), m_buffer(0), m_bufsiz(0) m_nheadoffs(0), m_npadsize(0), m_buffer(0), m_bufsiz(0),
m_ofskhcplt(false)
{} {}
~CirCacheInternal() ~CirCacheInternal()
@ -153,6 +283,7 @@ public:
"npadsize = " << m_npadsize << "\n" << "npadsize = " << m_npadsize << "\n" <<
" " << " " <<
" " << " " <<
" " <<
"\0"; "\0";
int sz = int(s.str().size()); int sz = int(s.str().size());
@ -264,8 +395,10 @@ public:
bool already_folded = false; bool already_folded = false;
while (true) { while (true) {
if (already_folded && startoffset == so0) if (already_folded && startoffset == so0) {
m_ofskhcplt = true;
return CCScanHook::Eof; return CCScanHook::Eof;
}
EntryHeaderData d; EntryHeaderData d;
CCScanHook::status st; CCScanHook::status st;
@ -299,7 +432,8 @@ public:
m_reason << "scan: no udi in dic"; m_reason << "scan: no udi in dic";
return CCScanHook::Error; return CCScanHook::Error;
} }
khEnter(udi, startoffset);
// Call callback // Call callback
CCScanHook::status a = CCScanHook::status a =
user->takeone(startoffset, udi, d.dicsize, d.datasize, user->takeone(startoffset, udi, d.dicsize, d.datasize,
@ -315,10 +449,21 @@ public:
} }
} }
bool readDicData(off_t hoffs, unsigned int dicsize, string& dict, bool readHDicData(off_t hoffs, EntryHeaderData& d, string& dic,
unsigned int datasize, string* data) string* data)
{
if (readentryheader(hoffs, d) != CCScanHook::Continue)
return false;
return readDicData(hoffs, d.dicsize, dic, d.datasize, data);
}
bool readDicData(off_t hoffs, UINT dicsize, string& dic,
UINT datasize, string* data)
{ {
off_t offs = hoffs + CIRCACHE_HEADER_SIZE; off_t offs = hoffs + CIRCACHE_HEADER_SIZE;
// This syscall could be avoided in some cases if we saved the offset
// at each seek. In most cases, we just read the header and we are
// at the right position
if (lseek(m_fd, offs, 0) != offs) { if (lseek(m_fd, offs, 0) != offs) {
m_reason << "CirCache::get: lseek(" << offs << ") failed: " << m_reason << "CirCache::get: lseek(" << offs << ") failed: " <<
errno; errno;
@ -331,7 +476,8 @@ public:
m_reason << "CirCache::get: read() failed: errno " << errno; m_reason << "CirCache::get: read() failed: errno " << errno;
return false; return false;
} }
dict.assign(bf, dicsize); dic.assign(bf, dicsize);
if (data == 0) if (data == 0)
return true; return true;
@ -353,7 +499,7 @@ CirCache::CirCache(const string& dir)
: m_dir(dir) : m_dir(dir)
{ {
m_d = new CirCacheInternal; m_d = new CirCacheInternal;
LOGDEB(("CirCache: [%s]\n", m_dir.c_str())); LOGDEB0(("CirCache: [%s]\n", m_dir.c_str()));
} }
CirCache::~CirCache() CirCache::~CirCache()
@ -422,8 +568,8 @@ bool CirCache::open(OpMode mode)
class CCScanHookDump : public CCScanHook { class CCScanHookDump : public CCScanHook {
public: public:
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize, virtual status takeone(off_t offs, const string& udi, UINT dicsize,
unsigned int datasize, unsigned int padsize) UINT datasize, UINT padsize)
{ {
cout << "Scan: offs " << offs << " dicsize " << dicsize cout << "Scan: offs " << offs << " dicsize " << dicsize
<< " datasize " << datasize << " padsize " << padsize << << " datasize " << datasize << " padsize " << padsize <<
@ -435,8 +581,11 @@ public:
bool CirCache::dump() bool CirCache::dump()
{ {
CCScanHookDump dumper; CCScanHookDump dumper;
off_t start = m_d->m_nheadoffs > CIRCACHE_FIRSTBLOCK_SIZE ?
m_d->m_nheadoffs : CIRCACHE_FIRSTBLOCK_SIZE; // Start at oldest header. This is eof while the file is growing, scan will
// fold to bot at once.
off_t start = m_d->m_oheadoffs;
switch (m_d->scan(start, &dumper, true)) { switch (m_d->scan(start, &dumper, true)) {
case CCScanHook::Stop: case CCScanHook::Stop:
cout << "Scan returns Stop??" << endl; cout << "Scan returns Stop??" << endl;
@ -449,7 +598,7 @@ bool CirCache::dump()
cout << "Scan returns Error: " << getReason() << endl; cout << "Scan returns Error: " << getReason() << endl;
return false; return false;
case CCScanHook::Eof: case CCScanHook::Eof:
cout << "Scan returns Eof" << endl; cout << "Scan returns Eof (ok)" << endl;
return true; return true;
default: default:
cout << "Scan returns Unknown ??" << endl; cout << "Scan returns Unknown ??" << endl;
@ -468,11 +617,12 @@ public:
CCScanHookGetter(const string &udi, int ti) CCScanHookGetter(const string &udi, int ti)
: m_udi(udi), m_targinstance(ti), m_instance(0), m_offs(0){} : m_udi(udi), m_targinstance(ti), m_instance(0), m_offs(0){}
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize, virtual status takeone(off_t offs, const string& udi, UINT dicsize,
unsigned int datasize, unsigned int padsize) UINT datasize, UINT padsize)
{ {
// cerr << "offs " << offs << " udi [" << udi << "] dicsize " << dicsize LOGDEB2(("Circache:Scan: off %ld udi [%s] dcsz %u dtsz %u pdsz %u\n",
// << " datasize " << datasize << " padsize " << padsize << endl; long(offs), udi.c_str(), (UINT)dicsize,
(UINT)datasize, (UINT)padsize));
if (!m_udi.compare(udi)) { if (!m_udi.compare(udi)) {
m_instance++; m_instance++;
m_offs = offs; m_offs = offs;
@ -487,19 +637,68 @@ public:
}; };
// instance == -1 means get latest. Otherwise specify from 1+ // instance == -1 means get latest. Otherwise specify from 1+
bool CirCache::get(const string& udi, string& dict, string& data, int instance) bool CirCache::get(const string& udi, string& dic, string& data, int instance)
{ {
Chrono chron;
assert(m_d != 0); assert(m_d != 0);
if (m_d->m_fd < 0) { if (m_d->m_fd < 0) {
m_d->m_reason << "CirCache::get: not open"; m_d->m_reason << "CirCache::get: not open";
return false; return false;
} }
LOGDEB(("CirCache::get: udi [%s], instance\n", udi.c_str(), instance)); LOGDEB0(("CirCache::get: udi [%s], instance %d\n", udi.c_str(), instance));
// If memory map is up to date, use it:
if (m_d->m_ofskhcplt) {
LOGDEB1(("CirCache::get: using ofskh\n"));
//m_d->khDump();
vector<off_t> ofss;
if (m_d->khFind(udi, ofss)) {
LOGDEB1(("Circache::get: h found, colls %d\n", ofss.size()));
int finst = 1;
EntryHeaderData d_good;
off_t o_good = 0;
for (vector<off_t>::iterator it = ofss.begin();
it != ofss.end(); it++) {
LOGDEB1(("Circache::get: trying offs %lu\n", (ULONG)*it));
string fdic;
EntryHeaderData d;
if (!m_d->readHDicData(*it, d, fdic, 0))
return false;
ConfSimple conf(fdic);
string fudi;
if (!conf.get("udi", fudi, "")) {
m_d->m_reason << "get: bad file: no udi in dic";
return false;
}
if (!fudi.compare(udi)) {
// Found one, memorize offset. Done if instance
// matches, else go on. If instance is -1 need to
// go to the end of the list anyway
d_good = d;
o_good = *it;
if (finst == instance) {
break;
} else {
finst++;
}
}
}
// Did we read an appropriate entry ?
if (o_good != 0 && (instance == -1 || instance == finst)) {
bool ret = m_d->readDicData(o_good, d_good.dicsize, dic,
d_good.datasize, &data);
LOGDEB0(("Circache::get: hfound, %d mS\n",
chron.millis()));
return ret;
}
// Else try to scan anyway.
}
}
CCScanHookGetter getter(udi, instance); CCScanHookGetter getter(udi, instance);
off_t start = m_d->m_nheadoffs > CIRCACHE_FIRSTBLOCK_SIZE ? off_t start = m_d->m_oheadoffs;
m_d->m_nheadoffs : CIRCACHE_FIRSTBLOCK_SIZE;
CCScanHook::status ret = m_d->scan(start, &getter, true); CCScanHook::status ret = m_d->scan(start, &getter, true);
if (ret == CCScanHook::Eof) { if (ret == CCScanHook::Eof) {
@ -508,24 +707,29 @@ bool CirCache::get(const string& udi, string& dict, string& data, int instance)
} else if (ret != CCScanHook::Stop) { } else if (ret != CCScanHook::Stop) {
return false; return false;
} }
return m_d->readDicData(getter.m_offs, getter.m_hd.dicsize, dict, bool bret =
getter.m_hd.datasize, &data); m_d->readDicData(getter.m_offs, getter.m_hd.dicsize, dic,
getter.m_hd.datasize, &data);
LOGDEB0(("Circache::get: scanfound, %d mS\n", chron.millis()));
return bret;
} }
// Used to scan the file ahead until we accumulated enough space for the new
// entry.
class CCScanHookSpacer : public CCScanHook { class CCScanHookSpacer : public CCScanHook {
public: public:
unsigned int sizewanted; UINT sizewanted;
unsigned int sizeseen; UINT sizeseen;
CCScanHookSpacer(int sz) CCScanHookSpacer(int sz)
: sizewanted(sz), sizeseen(0) {assert(sz > 0);} : sizewanted(sz), sizeseen(0) {assert(sz > 0);}
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize, virtual status takeone(off_t offs, const string& udi, UINT dicsize,
unsigned int datasize, unsigned int padsize) UINT datasize, UINT padsize)
{ {
LOGDEB(("ScanSpacer: offs %u dicsz %u datasz %u padsz %u udi[%s]\n", LOGDEB2(("Circache:ScanSpacer:off %u dcsz %u dtsz %u pdsz %u udi[%s]\n",
(unsigned int)offs, dicsize, datasize, padsize, udi.c_str())); (UINT)offs, dicsize, datasize, padsize, udi.c_str()));
sizeseen += CIRCACHE_HEADER_SIZE + dicsize + datasize + padsize; sizeseen += CIRCACHE_HEADER_SIZE + dicsize + datasize + padsize;
if (sizeseen >= sizewanted) if (sizeseen >= sizewanted)
return Stop; return Stop;
@ -610,7 +814,7 @@ bool CirCache::put(const string& udi, const string& idic, const string& data)
// and determine the pad size up to the 1st preserved entry // and determine the pad size up to the 1st preserved entry
int scansize = nsize - recovpadsize; int scansize = nsize - recovpadsize;
LOGDEB2(("CirCache::put: scanning for size %d from offs %u\n", LOGDEB2(("CirCache::put: scanning for size %d from offs %u\n",
scansize, (unsigned int)m_d->m_oheadoffs)); scansize, (UINT)m_d->m_oheadoffs));
CCScanHookSpacer spacer(scansize); CCScanHookSpacer spacer(scansize);
switch (m_d->scan(m_d->m_oheadoffs, &spacer)) { switch (m_d->scan(m_d->m_oheadoffs, &spacer)) {
case CCScanHook::Stop: case CCScanHook::Stop:
@ -654,9 +858,12 @@ bool CirCache::put(const string& udi, const string& idic, const string& data)
return false; return false;
} }
m_d->khEnter(udi, nwriteoffs);
// Update first block information // Update first block information
m_d->m_nheadoffs = nwriteoffs; m_d->m_nheadoffs = nwriteoffs;
m_d->m_npadsize = npadsize; m_d->m_npadsize = npadsize;
// New oldest header is the one just after the one we just wrote.
m_d->m_oheadoffs = nwriteoffs + nsize + npadsize; m_d->m_oheadoffs = nwriteoffs + nsize + npadsize;
if (nwriteoffs + nsize >= m_d->m_maxsize) { if (nwriteoffs + nsize >= m_d->m_maxsize) {
// If we are at the biggest allowed size or we are currently // If we are at the biggest allowed size or we are currently
@ -670,9 +877,13 @@ bool CirCache::put(const string& udi, const string& idic, const string& data)
bool CirCache::rewind(bool& eof) bool CirCache::rewind(bool& eof)
{ {
assert(m_d != 0); assert(m_d != 0);
eof = false; eof = false;
// Read oldest header
m_d->m_itoffs = m_d->m_oheadoffs; m_d->m_itoffs = m_d->m_oheadoffs;
CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd); CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
switch(st) { switch(st) {
case CCScanHook::Eof: case CCScanHook::Eof:
eof = true; eof = true;
@ -690,43 +901,49 @@ bool CirCache::next(bool& eof)
eof = false; eof = false;
// Skip to next header, using values stored from previous one
m_d->m_itoffs += CIRCACHE_HEADER_SIZE + m_d->m_ithd.dicsize + m_d->m_itoffs += CIRCACHE_HEADER_SIZE + m_d->m_ithd.dicsize +
m_d->m_ithd.datasize + m_d->m_ithd.padsize; m_d->m_ithd.datasize + m_d->m_ithd.padsize;
// Looped back ?
if (m_d->m_itoffs == m_d->m_oheadoffs) { if (m_d->m_itoffs == m_d->m_oheadoffs) {
eof = true; eof = true;
return false; return false;
} }
// Read. If we hit physical eof, fold.
CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd); CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
if (st == CCScanHook::Eof) { if (st == CCScanHook::Eof) {
m_d->m_itoffs = CIRCACHE_FIRSTBLOCK_SIZE; m_d->m_itoffs = CIRCACHE_FIRSTBLOCK_SIZE;
if (m_d->m_itoffs == m_d->m_oheadoffs) { if (m_d->m_itoffs == m_d->m_oheadoffs) {
// Then the file is not folded yet (still growing)
eof = true; eof = true;
return false; return false;
} }
st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd); st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
} }
if (st == CCScanHook::Continue) if (st == CCScanHook::Continue)
return true; return true;
return false; return false;
} }
bool CirCache::getcurrentdict(string& dict) bool CirCache::getcurrentdict(string& dic)
{ {
assert(m_d != 0); assert(m_d != 0);
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dict, 0, 0)) if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dic, 0, 0))
return false; return false;
return true; return true;
} }
bool CirCache::getcurrent(string& udi, string& dict, string& data) bool CirCache::getcurrent(string& udi, string& dic, string& data)
{ {
assert(m_d != 0); assert(m_d != 0);
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dict, if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dic,
m_d->m_ithd.datasize, &data)) m_d->m_ithd.datasize, &data))
return false; return false;
ConfSimple conf(dict, 1); ConfSimple conf(dic, 1);
conf.get("udi", udi, ""); conf.get("udi", udi, "");
return true; return true;
} }
@ -841,17 +1058,19 @@ int main(int argc, char **argv)
} }
cc.open(CirCache::CC_OPREAD); cc.open(CirCache::CC_OPREAD);
} else if (op_flags & OPT_g) { } else if (op_flags & OPT_g) {
string udi = *argv++;argc--;
if (!cc.open(CirCache::CC_OPREAD)) { if (!cc.open(CirCache::CC_OPREAD)) {
cerr << "Open failed: " << cc.getReason() << endl; cerr << "Open failed: " << cc.getReason() << endl;
exit(1); exit(1);
} }
string dic, data; while (argc) {
if (!cc.get(udi, dic, data, instance)) { string udi = *argv++;argc--;
cerr << "Get failed: " << cc.getReason() << endl; string dic, data;
exit(1); if (!cc.get(udi, dic, data, instance)) {
cerr << "Get failed: " << cc.getReason() << endl;
exit(1);
}
cout << "Dict: [" << dic << "]" << endl;
} }
cout << "Dict: [" << dic << "]" << endl;
} else if (op_flags & OPT_d) { } else if (op_flags & OPT_d) {
if (!cc.open(CirCache::CC_OPREAD)) { if (!cc.open(CirCache::CC_OPREAD)) {
cerr << "Open failed: " << cc.getReason() << endl; cerr << "Open failed: " << cc.getReason() << endl;

View File

@ -17,6 +17,7 @@
#ifndef _circache_h_included_ #ifndef _circache_h_included_
#define _circache_h_included_ #define _circache_h_included_
/* @(#$Id: $ (C) 2009 J.F.Dockes */ /* @(#$Id: $ (C) 2009 J.F.Dockes */
/** /**
* A data cache implemented as a circularly managed file * A data cache implemented as a circularly managed file
* *
@ -27,10 +28,18 @@
* Data objects inside the cache each have two parts: a data segment and an * Data objects inside the cache each have two parts: a data segment and an
* attribute (metadata) dictionary. * attribute (metadata) dictionary.
* They are named using the same identifiers that are used inside the Recoll * They are named using the same identifiers that are used inside the Recoll
* index, but any unique identifier scheme would work. * index (the UDI).
* *
* The names are stored in an auxiliary index for fast access. This index can * Inside the file. the UDIs are stored inside the entry dictionary
* be rebuilt from the main file. * under the key "udi", which will appear when the dictionary is read
* back even if not there when wrote.
*
* It is assumed that the dictionary are small (they are routinely read/parsed)
*
* A problem with this approach is that repetitively storing the same
* object will evict all others. This could be somewhat optimized by reusing
* the last entry if it has the same udi as the one written, but not done
* currently.
*/ */
#include <sys/types.h> #include <sys/types.h>
@ -59,15 +68,22 @@ public:
virtual bool put(const string& udi, const string& dic, const string& data); virtual bool put(const string& udi, const string& dic, const string& data);
/* Maybe we'll have separate iterators one day, but this is good enough for /** Walk the archive.
* now. No put() operations should be performed while using these. *
* Maybe we'll have separate iterators one day, but this is good
* enough for now. No put() operations should be performed while
* using these.
*/ */
/** Back to oldest */
virtual bool rewind(bool& eof); virtual bool rewind(bool& eof);
virtual bool next(bool& eof); /** Get entry under cursor */
virtual bool getcurrent(string& udi, string& dic, string& data); virtual bool getcurrent(string& udi, string& dic, string& data);
/** Get current entry dict only (ie: udi is in dict */
virtual bool getcurrentdict(string& dict); virtual bool getcurrentdict(string& dict);
/** Skip to next. (false && !eof) -> error, (false&&eof)->EOF. */
virtual bool next(bool& eof);
/* Debug */ /* Debug. This writes the entry headers to stdout */
virtual bool dump(); virtual bool dump();
protected: protected: