Beaglequeue: simplify index from cache now that udi entries are unique in cache
This commit is contained in:
parent
7f856f482b
commit
3c40b0bb1e
@ -252,6 +252,7 @@ bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc,
|
||||
it != names.end(); it++) {
|
||||
cf.get(*it, dotdoc.meta[*it], "");
|
||||
}
|
||||
dotdoc.meta[Rcl::Doc::keyudi] = udi;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -330,34 +331,20 @@ bool BeagleQueueIndexer::index()
|
||||
if (!eof)
|
||||
return false;
|
||||
}
|
||||
|
||||
// The cache is walked in chronogical order, but we want to
|
||||
// index the newest files first (there can be several versions
|
||||
// of a given file in the cache). Have to revert the
|
||||
// list. This would be a problem with a big cache, because the
|
||||
// udis can be big (ie 150 chars), and would be more
|
||||
// efficiently performed by the cache, which could use the
|
||||
// smaller offsets.
|
||||
//
|
||||
// Another approach would be to just walk chronogical and
|
||||
// reindex all versions: would waste processing but save
|
||||
// memory
|
||||
vector<string> alludis;
|
||||
alludis.reserve(20000);
|
||||
while (m_cache->next(eof)) {
|
||||
string dict;
|
||||
m_cache->getcurrentdict(dict);
|
||||
ConfSimple cf(dict, 1);
|
||||
string udi;
|
||||
if (!cf.get("udi", udi, ""))
|
||||
if (!m_cache->getCurrentUdi(udi)) {
|
||||
LOGERR(("BeagleQueueIndexer:: cache file damaged\n"));
|
||||
break;
|
||||
}
|
||||
if (udi.empty())
|
||||
continue;
|
||||
alludis.push_back(udi);
|
||||
}
|
||||
for (vector<string>::reverse_iterator it = alludis.rbegin();
|
||||
it != alludis.rend(); it++) {
|
||||
if (m_db->needUpdate(*it, "")) {
|
||||
if (m_db->needUpdate(udi, "")) {
|
||||
try {
|
||||
indexFromCache(*it);
|
||||
// indexFromCache does a CirCache::get(). We could
|
||||
// arrange to use a getCurrent() instead, would be more
|
||||
// efficient
|
||||
indexFromCache(udi);
|
||||
} catch (CancelExcept) {
|
||||
LOGERR(("BeagleQueueIndexer: interrupted\n"));
|
||||
return false;
|
||||
|
||||
@ -508,7 +508,7 @@ public:
|
||||
if (!readDicData(hoffs, d, dic, 0))
|
||||
return false;
|
||||
if (d.dicsize == 0) {
|
||||
// This is an erase entry
|
||||
// This is an erased entry
|
||||
udi.erase();
|
||||
return true;
|
||||
}
|
||||
@ -1082,15 +1082,16 @@ bool CirCache::next(bool& eof)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CirCache::getcurrentdict(string& dic)
|
||||
bool CirCache::getCurrentUdi(string& udi)
|
||||
{
|
||||
assert(m_d != 0);
|
||||
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd, dic, 0))
|
||||
|
||||
if (!m_d->readHUdi(m_d->m_itoffs, m_d->m_ithd, udi))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CirCache::getcurrent(string& udi, string& dic, string& data)
|
||||
bool CirCache::getCurrent(string& udi, string& dic, string& data)
|
||||
{
|
||||
assert(m_d != 0);
|
||||
if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd, dic, &data))
|
||||
|
||||
@ -84,9 +84,10 @@ public:
|
||||
/** Back to oldest */
|
||||
virtual bool rewind(bool& eof);
|
||||
/** Get entry under cursor */
|
||||
virtual bool getcurrent(string& udi, string& dic, string& data);
|
||||
/** Get current entry dict only (ie: udi is in dict */
|
||||
virtual bool getcurrentdict(string& dict);
|
||||
virtual bool getCurrent(string& udi, string& dic, string& data);
|
||||
/** Get current entry udi only. Udi can be empty (erased empty), caller
|
||||
* should call again */
|
||||
virtual bool getCurrentUdi(string& udi);
|
||||
/** Skip to next. (false && !eof) -> error, (false&&eof)->EOF. */
|
||||
virtual bool next(bool& eof);
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user