circache ok
This commit is contained in:
parent
120cf441ba
commit
1a767213a0
@ -32,11 +32,13 @@ static char rcsid[] = "@(#$Id: $ (C) 2009 J.F.Dockes";
|
|||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#include "circache.h"
|
#include "circache.h"
|
||||||
#include "conftree.h"
|
#include "conftree.h"
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
#include "md5.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -53,13 +55,19 @@ using namespace std;
|
|||||||
*
|
*
|
||||||
* There is a write position, which can be at eof while
|
* There is a write position, which can be at eof while
|
||||||
* the file is growing, or inside the file if we are recycling. This is stored
|
* the file is growing, or inside the file if we are recycling. This is stored
|
||||||
* in the header, together with the maximum size
|
* in the header (oheadoffs), together with the maximum size
|
||||||
*
|
*
|
||||||
* If we are recycling, we have to take care to compute the size of the
|
* If we are recycling, we have to take care to compute the size of the
|
||||||
* possible remaining area from the last object invalidated by the write,
|
* possible remaining area from the last object invalidated by the write,
|
||||||
* pad it with neutral data and store the size in the new header.
|
* pad it with neutral data and store the size in the new header. To help with
|
||||||
|
* this, the address for the last object written is also kept in the header
|
||||||
|
* (nheadoffs, npadsize)
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
typedef unsigned long ULONG;
|
||||||
|
typedef unsigned int UINT;
|
||||||
|
|
||||||
// First block size
|
// First block size
|
||||||
#define CIRCACHE_FIRSTBLOCK_SIZE 1024
|
#define CIRCACHE_FIRSTBLOCK_SIZE 1024
|
||||||
|
|
||||||
@ -71,9 +79,9 @@ const char *headerformat = "circacheSizes = %x %x %x";
|
|||||||
class EntryHeaderData {
|
class EntryHeaderData {
|
||||||
public:
|
public:
|
||||||
EntryHeaderData() : dicsize(0), datasize(0), padsize(0) {}
|
EntryHeaderData() : dicsize(0), datasize(0), padsize(0) {}
|
||||||
unsigned int dicsize;
|
UINT dicsize;
|
||||||
unsigned int datasize;
|
UINT datasize;
|
||||||
unsigned int padsize;
|
UINT padsize;
|
||||||
};
|
};
|
||||||
|
|
||||||
// A callback class for the header-hopping function.
|
// A callback class for the header-hopping function.
|
||||||
@ -81,10 +89,61 @@ class CCScanHook {
|
|||||||
public:
|
public:
|
||||||
virtual ~CCScanHook() {}
|
virtual ~CCScanHook() {}
|
||||||
enum status {Stop, Continue, Error, Eof};
|
enum status {Stop, Continue, Error, Eof};
|
||||||
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize,
|
virtual status takeone(off_t offs, const string& udi, UINT dicsize,
|
||||||
unsigned int datasize, unsigned int padsize) = 0;
|
UINT datasize, UINT padsize) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// We have an auxiliary in-memory multimap of hashed-udi -> offset to
|
||||||
|
// speed things up. This is created the first time the file is scanned
|
||||||
|
// (on the first get), and not saved to disk.
|
||||||
|
|
||||||
|
// The map key: hashed udi. As a very short hash seems sufficient,
|
||||||
|
// maybe we could find something faster/simpler than md5?
|
||||||
|
#define UDIHLEN 4
|
||||||
|
class UdiH {
|
||||||
|
public:
|
||||||
|
unsigned char h[UDIHLEN];
|
||||||
|
|
||||||
|
UdiH(const string& udi)
|
||||||
|
{
|
||||||
|
MD5_CTX ctx;
|
||||||
|
MD5Init(&ctx);
|
||||||
|
MD5Update(&ctx, (const unsigned char*)udi.c_str(), udi.length());
|
||||||
|
unsigned char md[16];
|
||||||
|
MD5Final(md, &ctx);
|
||||||
|
memcpy(h, md, UDIHLEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
string asHexString() const {
|
||||||
|
static const char hex[]="0123456789abcdef";
|
||||||
|
string out;
|
||||||
|
for (int i = 0; i < UDIHLEN; i++) {
|
||||||
|
out.append(1, hex[h[i] >> 4]);
|
||||||
|
out.append(1, hex[h[i] & 0x0f]);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
bool operator==(const UdiH& r) const
|
||||||
|
{
|
||||||
|
for (int i = 0; i < UDIHLEN; i++)
|
||||||
|
if (h[i] != r.h[i])
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
bool operator<(const UdiH& r) const
|
||||||
|
{
|
||||||
|
for (int i = 0; i < UDIHLEN; i++) {
|
||||||
|
if (h[i] < r.h[i])
|
||||||
|
return true;
|
||||||
|
if (h[i] > r.h[i])
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef multimap<UdiH, off_t> kh_type;
|
||||||
|
typedef multimap<UdiH, off_t>::value_type kh_value_type;
|
||||||
|
|
||||||
class CirCacheInternal {
|
class CirCacheInternal {
|
||||||
public:
|
public:
|
||||||
int m_fd;
|
int m_fd;
|
||||||
@ -102,16 +161,87 @@ public:
|
|||||||
// A place to hold data when reading
|
// A place to hold data when reading
|
||||||
char *m_buffer;
|
char *m_buffer;
|
||||||
size_t m_bufsiz;
|
size_t m_bufsiz;
|
||||||
|
|
||||||
// Error messages
|
// Error messages
|
||||||
ostringstream m_reason;
|
ostringstream m_reason;
|
||||||
|
|
||||||
// State for rewind/next/getcurrent operation
|
// State for rewind/next/getcurrent operation. This could/should
|
||||||
|
// be moved to a separate iterator.
|
||||||
off_t m_itoffs;
|
off_t m_itoffs;
|
||||||
EntryHeaderData m_ithd;
|
EntryHeaderData m_ithd;
|
||||||
|
|
||||||
|
// Offset cache
|
||||||
|
kh_type m_ofskh;
|
||||||
|
bool m_ofskhcplt; // Has cache been fully read since open?
|
||||||
|
|
||||||
|
// Add udi->offset translation to map
|
||||||
|
bool khEnter(const string& udi, off_t ofs)
|
||||||
|
{
|
||||||
|
UdiH h(udi);
|
||||||
|
|
||||||
|
LOGDEB2(("Circache::khEnter: h %s offs %lu udi [%s]\n",
|
||||||
|
h.asHexString().c_str(), (ULONG)ofs, udi.c_str()));
|
||||||
|
|
||||||
|
pair<kh_type::iterator, kh_type::iterator> p = m_ofskh.equal_range(h);
|
||||||
|
|
||||||
|
if (p.first != m_ofskh.end() && p.first->first == h) {
|
||||||
|
for (kh_type::iterator it = p.first; it != p.second; it++) {
|
||||||
|
LOGDEB2(("Circache::khEnter: col h %s, ofs %lu\n",
|
||||||
|
it->first.asHexString().c_str(),
|
||||||
|
(ULONG)it->second));
|
||||||
|
if (it->second == ofs) {
|
||||||
|
// (h,offs) already there. Happens
|
||||||
|
LOGDEB2(("Circache::khEnter: already there\n"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_ofskh.insert(kh_value_type(h, ofs));
|
||||||
|
LOGDEB2(("Circache::khEnter: inserted\n"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
void khDump()
|
||||||
|
{
|
||||||
|
for (kh_type::const_iterator it = m_ofskh.begin();
|
||||||
|
it != m_ofskh.end(); it++) {
|
||||||
|
LOGDEB(("Circache::KHDUMP: %s %d\n",
|
||||||
|
it->first.asHexString().c_str(), (ULONG)it->second));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool khFind(const string& udi, vector<off_t>& ofss)
|
||||||
|
{
|
||||||
|
ofss.clear();
|
||||||
|
|
||||||
|
UdiH h(udi);
|
||||||
|
|
||||||
|
LOGDEB2(("Circache::khFind: h %s udi [%s]\n",
|
||||||
|
h.asHexString().c_str(), udi.c_str()));
|
||||||
|
|
||||||
|
pair<kh_type::iterator, kh_type::iterator> p =
|
||||||
|
m_ofskh.equal_range(h);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (p.first == m_ofskh.end()) LOGDEB(("KHFIND: FIRST END()\n"));
|
||||||
|
if (p.second == m_ofskh.end()) LOGDEB(("KHFIND: SECOND END()\n"));
|
||||||
|
if (!(p.first->first == h))
|
||||||
|
LOGDEB(("KHFIND: NOKEY: %s %s\n",
|
||||||
|
p.first->first.asHexString().c_str(),
|
||||||
|
p.second->first.asHexString().c_str()));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (p.first == m_ofskh.end() || !(p.first->first == h))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (kh_type::iterator it = p.first; it != p.second; it++) {
|
||||||
|
ofss.push_back(it->second);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
CirCacheInternal()
|
CirCacheInternal()
|
||||||
: m_fd(-1), m_maxsize(-1), m_oheadoffs(-1),
|
: m_fd(-1), m_maxsize(-1), m_oheadoffs(-1),
|
||||||
m_nheadoffs(0), m_npadsize(0), m_buffer(0), m_bufsiz(0)
|
m_nheadoffs(0), m_npadsize(0), m_buffer(0), m_bufsiz(0),
|
||||||
|
m_ofskhcplt(false)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~CirCacheInternal()
|
~CirCacheInternal()
|
||||||
@ -153,6 +283,7 @@ public:
|
|||||||
"npadsize = " << m_npadsize << "\n" <<
|
"npadsize = " << m_npadsize << "\n" <<
|
||||||
" " <<
|
" " <<
|
||||||
" " <<
|
" " <<
|
||||||
|
" " <<
|
||||||
"\0";
|
"\0";
|
||||||
|
|
||||||
int sz = int(s.str().size());
|
int sz = int(s.str().size());
|
||||||
@ -264,8 +395,10 @@ public:
|
|||||||
bool already_folded = false;
|
bool already_folded = false;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (already_folded && startoffset == so0)
|
if (already_folded && startoffset == so0) {
|
||||||
|
m_ofskhcplt = true;
|
||||||
return CCScanHook::Eof;
|
return CCScanHook::Eof;
|
||||||
|
}
|
||||||
|
|
||||||
EntryHeaderData d;
|
EntryHeaderData d;
|
||||||
CCScanHook::status st;
|
CCScanHook::status st;
|
||||||
@ -299,7 +432,8 @@ public:
|
|||||||
m_reason << "scan: no udi in dic";
|
m_reason << "scan: no udi in dic";
|
||||||
return CCScanHook::Error;
|
return CCScanHook::Error;
|
||||||
}
|
}
|
||||||
|
khEnter(udi, startoffset);
|
||||||
|
|
||||||
// Call callback
|
// Call callback
|
||||||
CCScanHook::status a =
|
CCScanHook::status a =
|
||||||
user->takeone(startoffset, udi, d.dicsize, d.datasize,
|
user->takeone(startoffset, udi, d.dicsize, d.datasize,
|
||||||
@ -315,10 +449,21 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool readDicData(off_t hoffs, unsigned int dicsize, string& dict,
|
bool readHDicData(off_t hoffs, EntryHeaderData& d, string& dic,
|
||||||
unsigned int datasize, string* data)
|
string* data)
|
||||||
|
{
|
||||||
|
if (readentryheader(hoffs, d) != CCScanHook::Continue)
|
||||||
|
return false;
|
||||||
|
return readDicData(hoffs, d.dicsize, dic, d.datasize, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readDicData(off_t hoffs, UINT dicsize, string& dic,
|
||||||
|
UINT datasize, string* data)
|
||||||
{
|
{
|
||||||
off_t offs = hoffs + CIRCACHE_HEADER_SIZE;
|
off_t offs = hoffs + CIRCACHE_HEADER_SIZE;
|
||||||
|
// This syscall could be avoided in some cases if we saved the offset
|
||||||
|
// at each seek. In most cases, we just read the header and we are
|
||||||
|
// at the right position
|
||||||
if (lseek(m_fd, offs, 0) != offs) {
|
if (lseek(m_fd, offs, 0) != offs) {
|
||||||
m_reason << "CirCache::get: lseek(" << offs << ") failed: " <<
|
m_reason << "CirCache::get: lseek(" << offs << ") failed: " <<
|
||||||
errno;
|
errno;
|
||||||
@ -331,7 +476,8 @@ public:
|
|||||||
m_reason << "CirCache::get: read() failed: errno " << errno;
|
m_reason << "CirCache::get: read() failed: errno " << errno;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
dict.assign(bf, dicsize);
|
dic.assign(bf, dicsize);
|
||||||
|
|
||||||
if (data == 0)
|
if (data == 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@ -353,7 +499,7 @@ CirCache::CirCache(const string& dir)
|
|||||||
: m_dir(dir)
|
: m_dir(dir)
|
||||||
{
|
{
|
||||||
m_d = new CirCacheInternal;
|
m_d = new CirCacheInternal;
|
||||||
LOGDEB(("CirCache: [%s]\n", m_dir.c_str()));
|
LOGDEB0(("CirCache: [%s]\n", m_dir.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
CirCache::~CirCache()
|
CirCache::~CirCache()
|
||||||
@ -422,8 +568,8 @@ bool CirCache::open(OpMode mode)
|
|||||||
|
|
||||||
class CCScanHookDump : public CCScanHook {
|
class CCScanHookDump : public CCScanHook {
|
||||||
public:
|
public:
|
||||||
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize,
|
virtual status takeone(off_t offs, const string& udi, UINT dicsize,
|
||||||
unsigned int datasize, unsigned int padsize)
|
UINT datasize, UINT padsize)
|
||||||
{
|
{
|
||||||
cout << "Scan: offs " << offs << " dicsize " << dicsize
|
cout << "Scan: offs " << offs << " dicsize " << dicsize
|
||||||
<< " datasize " << datasize << " padsize " << padsize <<
|
<< " datasize " << datasize << " padsize " << padsize <<
|
||||||
@ -435,8 +581,11 @@ public:
|
|||||||
bool CirCache::dump()
|
bool CirCache::dump()
|
||||||
{
|
{
|
||||||
CCScanHookDump dumper;
|
CCScanHookDump dumper;
|
||||||
off_t start = m_d->m_nheadoffs > CIRCACHE_FIRSTBLOCK_SIZE ?
|
|
||||||
m_d->m_nheadoffs : CIRCACHE_FIRSTBLOCK_SIZE;
|
// Start at oldest header. This is eof while the file is growing, scan will
|
||||||
|
// fold to bot at once.
|
||||||
|
off_t start = m_d->m_oheadoffs;
|
||||||
|
|
||||||
switch (m_d->scan(start, &dumper, true)) {
|
switch (m_d->scan(start, &dumper, true)) {
|
||||||
case CCScanHook::Stop:
|
case CCScanHook::Stop:
|
||||||
cout << "Scan returns Stop??" << endl;
|
cout << "Scan returns Stop??" << endl;
|
||||||
@ -449,7 +598,7 @@ bool CirCache::dump()
|
|||||||
cout << "Scan returns Error: " << getReason() << endl;
|
cout << "Scan returns Error: " << getReason() << endl;
|
||||||
return false;
|
return false;
|
||||||
case CCScanHook::Eof:
|
case CCScanHook::Eof:
|
||||||
cout << "Scan returns Eof" << endl;
|
cout << "Scan returns Eof (ok)" << endl;
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
cout << "Scan returns Unknown ??" << endl;
|
cout << "Scan returns Unknown ??" << endl;
|
||||||
@ -468,11 +617,12 @@ public:
|
|||||||
CCScanHookGetter(const string &udi, int ti)
|
CCScanHookGetter(const string &udi, int ti)
|
||||||
: m_udi(udi), m_targinstance(ti), m_instance(0), m_offs(0){}
|
: m_udi(udi), m_targinstance(ti), m_instance(0), m_offs(0){}
|
||||||
|
|
||||||
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize,
|
virtual status takeone(off_t offs, const string& udi, UINT dicsize,
|
||||||
unsigned int datasize, unsigned int padsize)
|
UINT datasize, UINT padsize)
|
||||||
{
|
{
|
||||||
// cerr << "offs " << offs << " udi [" << udi << "] dicsize " << dicsize
|
LOGDEB2(("Circache:Scan: off %ld udi [%s] dcsz %u dtsz %u pdsz %u\n",
|
||||||
// << " datasize " << datasize << " padsize " << padsize << endl;
|
long(offs), udi.c_str(), (UINT)dicsize,
|
||||||
|
(UINT)datasize, (UINT)padsize));
|
||||||
if (!m_udi.compare(udi)) {
|
if (!m_udi.compare(udi)) {
|
||||||
m_instance++;
|
m_instance++;
|
||||||
m_offs = offs;
|
m_offs = offs;
|
||||||
@ -487,19 +637,68 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
// instance == -1 means get latest. Otherwise specify from 1+
|
// instance == -1 means get latest. Otherwise specify from 1+
|
||||||
bool CirCache::get(const string& udi, string& dict, string& data, int instance)
|
bool CirCache::get(const string& udi, string& dic, string& data, int instance)
|
||||||
{
|
{
|
||||||
|
Chrono chron;
|
||||||
|
|
||||||
assert(m_d != 0);
|
assert(m_d != 0);
|
||||||
if (m_d->m_fd < 0) {
|
if (m_d->m_fd < 0) {
|
||||||
m_d->m_reason << "CirCache::get: not open";
|
m_d->m_reason << "CirCache::get: not open";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB(("CirCache::get: udi [%s], instance\n", udi.c_str(), instance));
|
LOGDEB0(("CirCache::get: udi [%s], instance %d\n", udi.c_str(), instance));
|
||||||
|
|
||||||
|
// If memory map is up to date, use it:
|
||||||
|
if (m_d->m_ofskhcplt) {
|
||||||
|
LOGDEB1(("CirCache::get: using ofskh\n"));
|
||||||
|
//m_d->khDump();
|
||||||
|
vector<off_t> ofss;
|
||||||
|
if (m_d->khFind(udi, ofss)) {
|
||||||
|
LOGDEB1(("Circache::get: h found, colls %d\n", ofss.size()));
|
||||||
|
int finst = 1;
|
||||||
|
EntryHeaderData d_good;
|
||||||
|
off_t o_good = 0;
|
||||||
|
for (vector<off_t>::iterator it = ofss.begin();
|
||||||
|
it != ofss.end(); it++) {
|
||||||
|
LOGDEB1(("Circache::get: trying offs %lu\n", (ULONG)*it));
|
||||||
|
string fdic;
|
||||||
|
EntryHeaderData d;
|
||||||
|
if (!m_d->readHDicData(*it, d, fdic, 0))
|
||||||
|
return false;
|
||||||
|
ConfSimple conf(fdic);
|
||||||
|
string fudi;
|
||||||
|
if (!conf.get("udi", fudi, "")) {
|
||||||
|
m_d->m_reason << "get: bad file: no udi in dic";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!fudi.compare(udi)) {
|
||||||
|
// Found one, memorize offset. Done if instance
|
||||||
|
// matches, else go on. If instance is -1 need to
|
||||||
|
// go to the end of the list anyway
|
||||||
|
d_good = d;
|
||||||
|
o_good = *it;
|
||||||
|
if (finst == instance) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
finst++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Did we read an appropriate entry ?
|
||||||
|
if (o_good != 0 && (instance == -1 || instance == finst)) {
|
||||||
|
bool ret = m_d->readDicData(o_good, d_good.dicsize, dic,
|
||||||
|
d_good.datasize, &data);
|
||||||
|
LOGDEB0(("Circache::get: hfound, %d mS\n",
|
||||||
|
chron.millis()));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
// Else try to scan anyway.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CCScanHookGetter getter(udi, instance);
|
CCScanHookGetter getter(udi, instance);
|
||||||
off_t start = m_d->m_nheadoffs > CIRCACHE_FIRSTBLOCK_SIZE ?
|
off_t start = m_d->m_oheadoffs;
|
||||||
m_d->m_nheadoffs : CIRCACHE_FIRSTBLOCK_SIZE;
|
|
||||||
|
|
||||||
CCScanHook::status ret = m_d->scan(start, &getter, true);
|
CCScanHook::status ret = m_d->scan(start, &getter, true);
|
||||||
if (ret == CCScanHook::Eof) {
|
if (ret == CCScanHook::Eof) {
|
||||||
@ -508,24 +707,29 @@ bool CirCache::get(const string& udi, string& dict, string& data, int instance)
|
|||||||
} else if (ret != CCScanHook::Stop) {
|
} else if (ret != CCScanHook::Stop) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return m_d->readDicData(getter.m_offs, getter.m_hd.dicsize, dict,
|
bool bret =
|
||||||
getter.m_hd.datasize, &data);
|
m_d->readDicData(getter.m_offs, getter.m_hd.dicsize, dic,
|
||||||
|
getter.m_hd.datasize, &data);
|
||||||
|
LOGDEB0(("Circache::get: scanfound, %d mS\n", chron.millis()));
|
||||||
|
|
||||||
|
return bret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Used to scan the file ahead until we accumulated enough space for the new
|
||||||
|
// entry.
|
||||||
class CCScanHookSpacer : public CCScanHook {
|
class CCScanHookSpacer : public CCScanHook {
|
||||||
public:
|
public:
|
||||||
unsigned int sizewanted;
|
UINT sizewanted;
|
||||||
unsigned int sizeseen;
|
UINT sizeseen;
|
||||||
|
|
||||||
CCScanHookSpacer(int sz)
|
CCScanHookSpacer(int sz)
|
||||||
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
|
: sizewanted(sz), sizeseen(0) {assert(sz > 0);}
|
||||||
|
|
||||||
virtual status takeone(off_t offs, const string& udi, unsigned int dicsize,
|
virtual status takeone(off_t offs, const string& udi, UINT dicsize,
|
||||||
unsigned int datasize, unsigned int padsize)
|
UINT datasize, UINT padsize)
|
||||||
{
|
{
|
||||||
LOGDEB(("ScanSpacer: offs %u dicsz %u datasz %u padsz %u udi[%s]\n",
|
LOGDEB2(("Circache:ScanSpacer:off %u dcsz %u dtsz %u pdsz %u udi[%s]\n",
|
||||||
(unsigned int)offs, dicsize, datasize, padsize, udi.c_str()));
|
(UINT)offs, dicsize, datasize, padsize, udi.c_str()));
|
||||||
sizeseen += CIRCACHE_HEADER_SIZE + dicsize + datasize + padsize;
|
sizeseen += CIRCACHE_HEADER_SIZE + dicsize + datasize + padsize;
|
||||||
if (sizeseen >= sizewanted)
|
if (sizeseen >= sizewanted)
|
||||||
return Stop;
|
return Stop;
|
||||||
@ -610,7 +814,7 @@ bool CirCache::put(const string& udi, const string& idic, const string& data)
|
|||||||
// and determine the pad size up to the 1st preserved entry
|
// and determine the pad size up to the 1st preserved entry
|
||||||
int scansize = nsize - recovpadsize;
|
int scansize = nsize - recovpadsize;
|
||||||
LOGDEB2(("CirCache::put: scanning for size %d from offs %u\n",
|
LOGDEB2(("CirCache::put: scanning for size %d from offs %u\n",
|
||||||
scansize, (unsigned int)m_d->m_oheadoffs));
|
scansize, (UINT)m_d->m_oheadoffs));
|
||||||
CCScanHookSpacer spacer(scansize);
|
CCScanHookSpacer spacer(scansize);
|
||||||
switch (m_d->scan(m_d->m_oheadoffs, &spacer)) {
|
switch (m_d->scan(m_d->m_oheadoffs, &spacer)) {
|
||||||
case CCScanHook::Stop:
|
case CCScanHook::Stop:
|
||||||
@ -654,9 +858,12 @@ bool CirCache::put(const string& udi, const string& idic, const string& data)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_d->khEnter(udi, nwriteoffs);
|
||||||
|
|
||||||
// Update first block information
|
// Update first block information
|
||||||
m_d->m_nheadoffs = nwriteoffs;
|
m_d->m_nheadoffs = nwriteoffs;
|
||||||
m_d->m_npadsize = npadsize;
|
m_d->m_npadsize = npadsize;
|
||||||
|
// New oldest header is the one just after the one we just wrote.
|
||||||
m_d->m_oheadoffs = nwriteoffs + nsize + npadsize;
|
m_d->m_oheadoffs = nwriteoffs + nsize + npadsize;
|
||||||
if (nwriteoffs + nsize >= m_d->m_maxsize) {
|
if (nwriteoffs + nsize >= m_d->m_maxsize) {
|
||||||
// If we are at the biggest allowed size or we are currently
|
// If we are at the biggest allowed size or we are currently
|
||||||
@ -670,9 +877,13 @@ bool CirCache::put(const string& udi, const string& idic, const string& data)
|
|||||||
bool CirCache::rewind(bool& eof)
|
bool CirCache::rewind(bool& eof)
|
||||||
{
|
{
|
||||||
assert(m_d != 0);
|
assert(m_d != 0);
|
||||||
|
|
||||||
eof = false;
|
eof = false;
|
||||||
|
|
||||||
|
// Read oldest header
|
||||||
m_d->m_itoffs = m_d->m_oheadoffs;
|
m_d->m_itoffs = m_d->m_oheadoffs;
|
||||||
CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
|
CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
|
||||||
|
|
||||||
switch(st) {
|
switch(st) {
|
||||||
case CCScanHook::Eof:
|
case CCScanHook::Eof:
|
||||||
eof = true;
|
eof = true;
|
||||||
@ -690,43 +901,49 @@ bool CirCache::next(bool& eof)
|
|||||||
|
|
||||||
eof = false;
|
eof = false;
|
||||||
|
|
||||||
|
// Skip to next header, using values stored from previous one
|
||||||
m_d->m_itoffs += CIRCACHE_HEADER_SIZE + m_d->m_ithd.dicsize +
|
m_d->m_itoffs += CIRCACHE_HEADER_SIZE + m_d->m_ithd.dicsize +
|
||||||
m_d->m_ithd.datasize + m_d->m_ithd.padsize;
|
m_d->m_ithd.datasize + m_d->m_ithd.padsize;
|
||||||
|
|
||||||
|
// Looped back ?
|
||||||
if (m_d->m_itoffs == m_d->m_oheadoffs) {
|
if (m_d->m_itoffs == m_d->m_oheadoffs) {
|
||||||
eof = true;
|
eof = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read. If we hit physical eof, fold.
|
||||||
CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
|
CCScanHook::status st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
|
||||||
if (st == CCScanHook::Eof) {
|
if (st == CCScanHook::Eof) {
|
||||||
m_d->m_itoffs = CIRCACHE_FIRSTBLOCK_SIZE;
|
m_d->m_itoffs = CIRCACHE_FIRSTBLOCK_SIZE;
|
||||||
if (m_d->m_itoffs == m_d->m_oheadoffs) {
|
if (m_d->m_itoffs == m_d->m_oheadoffs) {
|
||||||
|
// Then the file is not folded yet (still growing)
|
||||||
eof = true;
|
eof = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
|
st = m_d->readentryheader(m_d->m_itoffs, m_d->m_ithd);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (st == CCScanHook::Continue)
|
if (st == CCScanHook::Continue)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CirCache::getcurrentdict(string& dict)
|
bool CirCache::getcurrentdict(string& dic)
|
||||||
{
|
{
|
||||||
assert(m_d != 0);
|
assert(m_d != 0);
|
||||||
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dict, 0, 0))
|
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dic, 0, 0))
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CirCache::getcurrent(string& udi, string& dict, string& data)
|
bool CirCache::getcurrent(string& udi, string& dic, string& data)
|
||||||
{
|
{
|
||||||
assert(m_d != 0);
|
assert(m_d != 0);
|
||||||
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dict,
|
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd.dicsize, dic,
|
||||||
m_d->m_ithd.datasize, &data))
|
m_d->m_ithd.datasize, &data))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
ConfSimple conf(dict, 1);
|
ConfSimple conf(dic, 1);
|
||||||
conf.get("udi", udi, "");
|
conf.get("udi", udi, "");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -841,17 +1058,19 @@ int main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
cc.open(CirCache::CC_OPREAD);
|
cc.open(CirCache::CC_OPREAD);
|
||||||
} else if (op_flags & OPT_g) {
|
} else if (op_flags & OPT_g) {
|
||||||
string udi = *argv++;argc--;
|
|
||||||
if (!cc.open(CirCache::CC_OPREAD)) {
|
if (!cc.open(CirCache::CC_OPREAD)) {
|
||||||
cerr << "Open failed: " << cc.getReason() << endl;
|
cerr << "Open failed: " << cc.getReason() << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
string dic, data;
|
while (argc) {
|
||||||
if (!cc.get(udi, dic, data, instance)) {
|
string udi = *argv++;argc--;
|
||||||
cerr << "Get failed: " << cc.getReason() << endl;
|
string dic, data;
|
||||||
exit(1);
|
if (!cc.get(udi, dic, data, instance)) {
|
||||||
|
cerr << "Get failed: " << cc.getReason() << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
cout << "Dict: [" << dic << "]" << endl;
|
||||||
}
|
}
|
||||||
cout << "Dict: [" << dic << "]" << endl;
|
|
||||||
} else if (op_flags & OPT_d) {
|
} else if (op_flags & OPT_d) {
|
||||||
if (!cc.open(CirCache::CC_OPREAD)) {
|
if (!cc.open(CirCache::CC_OPREAD)) {
|
||||||
cerr << "Open failed: " << cc.getReason() << endl;
|
cerr << "Open failed: " << cc.getReason() << endl;
|
||||||
|
|||||||
@ -17,6 +17,7 @@
|
|||||||
#ifndef _circache_h_included_
|
#ifndef _circache_h_included_
|
||||||
#define _circache_h_included_
|
#define _circache_h_included_
|
||||||
/* @(#$Id: $ (C) 2009 J.F.Dockes */
|
/* @(#$Id: $ (C) 2009 J.F.Dockes */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A data cache implemented as a circularly managed file
|
* A data cache implemented as a circularly managed file
|
||||||
*
|
*
|
||||||
@ -27,10 +28,18 @@
|
|||||||
* Data objects inside the cache each have two parts: a data segment and an
|
* Data objects inside the cache each have two parts: a data segment and an
|
||||||
* attribute (metadata) dictionary.
|
* attribute (metadata) dictionary.
|
||||||
* They are named using the same identifiers that are used inside the Recoll
|
* They are named using the same identifiers that are used inside the Recoll
|
||||||
* index, but any unique identifier scheme would work.
|
* index (the UDI).
|
||||||
*
|
*
|
||||||
* The names are stored in an auxiliary index for fast access. This index can
|
* Inside the file. the UDIs are stored inside the entry dictionary
|
||||||
* be rebuilt from the main file.
|
* under the key "udi", which will appear when the dictionary is read
|
||||||
|
* back even if not there when wrote.
|
||||||
|
*
|
||||||
|
* It is assumed that the dictionary are small (they are routinely read/parsed)
|
||||||
|
*
|
||||||
|
* A problem with this approach is that repetitively storing the same
|
||||||
|
* object will evict all others. This could be somewhat optimized by reusing
|
||||||
|
* the last entry if it has the same udi as the one written, but not done
|
||||||
|
* currently.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@ -59,15 +68,22 @@ public:
|
|||||||
|
|
||||||
virtual bool put(const string& udi, const string& dic, const string& data);
|
virtual bool put(const string& udi, const string& dic, const string& data);
|
||||||
|
|
||||||
/* Maybe we'll have separate iterators one day, but this is good enough for
|
/** Walk the archive.
|
||||||
* now. No put() operations should be performed while using these.
|
*
|
||||||
|
* Maybe we'll have separate iterators one day, but this is good
|
||||||
|
* enough for now. No put() operations should be performed while
|
||||||
|
* using these.
|
||||||
*/
|
*/
|
||||||
|
/** Back to oldest */
|
||||||
virtual bool rewind(bool& eof);
|
virtual bool rewind(bool& eof);
|
||||||
virtual bool next(bool& eof);
|
/** Get entry under cursor */
|
||||||
virtual bool getcurrent(string& udi, string& dic, string& data);
|
virtual bool getcurrent(string& udi, string& dic, string& data);
|
||||||
|
/** Get current entry dict only (ie: udi is in dict */
|
||||||
virtual bool getcurrentdict(string& dict);
|
virtual bool getcurrentdict(string& dict);
|
||||||
|
/** Skip to next. (false && !eof) -> error, (false&&eof)->EOF. */
|
||||||
|
virtual bool next(bool& eof);
|
||||||
|
|
||||||
/* Debug */
|
/* Debug. This writes the entry headers to stdout */
|
||||||
virtual bool dump();
|
virtual bool dump();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user