Beaglequeue: simplify index from cache now that udi entries are unique in cache

This commit is contained in:
dockes 2009-11-23 16:10:38 +00:00
parent 7f856f482b
commit 3c40b0bb1e
3 changed files with 20 additions and 31 deletions

View File

@ -252,6 +252,7 @@ bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc,
it != names.end(); it++) {
cf.get(*it, dotdoc.meta[*it], "");
}
dotdoc.meta[Rcl::Doc::keyudi] = udi;
return true;
}
@ -330,34 +331,20 @@ bool BeagleQueueIndexer::index()
if (!eof)
return false;
}
// The cache is walked in chronogical order, but we want to
// index the newest files first (there can be several versions
// of a given file in the cache). Have to revert the
// list. This would be a problem with a big cache, because the
// udis can be big (ie 150 chars), and would be more
// efficiently performed by the cache, which could use the
// smaller offsets.
//
// Another approach would be to just walk chronogical and
// reindex all versions: would waste processing but save
// memory
vector<string> alludis;
alludis.reserve(20000);
while (m_cache->next(eof)) {
string dict;
m_cache->getcurrentdict(dict);
ConfSimple cf(dict, 1);
string udi;
if (!cf.get("udi", udi, ""))
if (!m_cache->getCurrentUdi(udi)) {
LOGERR(("BeagleQueueIndexer:: cache file damaged\n"));
break;
}
if (udi.empty())
continue;
alludis.push_back(udi);
}
for (vector<string>::reverse_iterator it = alludis.rbegin();
it != alludis.rend(); it++) {
if (m_db->needUpdate(*it, "")) {
if (m_db->needUpdate(udi, "")) {
try {
indexFromCache(*it);
// indexFromCache does a CirCache::get(). We could
// arrange to use a getCurrent() instead, would be more
// efficient
indexFromCache(udi);
} catch (CancelExcept) {
LOGERR(("BeagleQueueIndexer: interrupted\n"));
return false;

View File

@ -508,7 +508,7 @@ public:
if (!readDicData(hoffs, d, dic, 0))
return false;
if (d.dicsize == 0) {
// This is an erase entry
// This is an erased entry
udi.erase();
return true;
}
@ -1082,15 +1082,16 @@ bool CirCache::next(bool& eof)
return false;
}
bool CirCache::getcurrentdict(string& dic)
bool CirCache::getCurrentUdi(string& udi)
{
assert(m_d != 0);
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd, dic, 0))
if (!m_d->readHUdi(m_d->m_itoffs, m_d->m_ithd, udi))
return false;
return true;
}
bool CirCache::getcurrent(string& udi, string& dic, string& data)
bool CirCache::getCurrent(string& udi, string& dic, string& data)
{
assert(m_d != 0);
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd, dic, &data))

View File

@ -84,9 +84,10 @@ public:
/** Back to oldest */
virtual bool rewind(bool& eof);
/** Get entry under cursor */
virtual bool getcurrent(string& udi, string& dic, string& data);
/** Get current entry dict only (ie: udi is in dict */
virtual bool getcurrentdict(string& dict);
virtual bool getCurrent(string& udi, string& dic, string& data);
/** Get current entry udi only. Udi can be empty (erased empty), caller
* should call again */
virtual bool getCurrentUdi(string& udi);
/** Skip to next. (false && !eof) -> error, (false&&eof)->EOF. */
virtual bool next(bool& eof);