diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index 43f4fb45..c65b4f16 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -252,6 +252,7 @@ bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc, it != names.end(); it++) { cf.get(*it, dotdoc.meta[*it], ""); } + dotdoc.meta[Rcl::Doc::keyudi] = udi; return true; } @@ -330,34 +331,20 @@ bool BeagleQueueIndexer::index() if (!eof) return false; } - - // The cache is walked in chronogical order, but we want to - // index the newest files first (there can be several versions - // of a given file in the cache). Have to revert the - // list. This would be a problem with a big cache, because the - // udis can be big (ie 150 chars), and would be more - // efficiently performed by the cache, which could use the - // smaller offsets. - // - // Another approach would be to just walk chronogical and - // reindex all versions: would waste processing but save - // memory - vector alludis; - alludis.reserve(20000); while (m_cache->next(eof)) { - string dict; - m_cache->getcurrentdict(dict); - ConfSimple cf(dict, 1); string udi; - if (!cf.get("udi", udi, "")) + if (!m_cache->getCurrentUdi(udi)) { + LOGERR(("BeagleQueueIndexer:: cache file damaged\n")); + break; + } + if (udi.empty()) continue; - alludis.push_back(udi); - } - for (vector::reverse_iterator it = alludis.rbegin(); - it != alludis.rend(); it++) { - if (m_db->needUpdate(*it, "")) { + if (m_db->needUpdate(udi, "")) { try { - indexFromCache(*it); + // indexFromCache does a CirCache::get(). We could + // arrange to use a getCurrent() instead, would be more + // efficient + indexFromCache(udi); } catch (CancelExcept) { LOGERR(("BeagleQueueIndexer: interrupted\n")); return false; diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp index 6c9739b8..cc760c2a 100644 --- a/src/utils/circache.cpp +++ b/src/utils/circache.cpp @@ -508,7 +508,7 @@ public: if (!readDicData(hoffs, d, dic, 0)) return false; if (d.dicsize == 0) { - // This is an erase entry + // This is an erased entry udi.erase(); return true; } @@ -1082,15 +1082,16 @@ bool CirCache::next(bool& eof) return false; } -bool CirCache::getcurrentdict(string& dic) +bool CirCache::getCurrentUdi(string& udi) { assert(m_d != 0); - if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd, dic, 0)) + + if (!m_d->readHUdi(m_d->m_itoffs, m_d->m_ithd, udi)) return false; return true; } -bool CirCache::getcurrent(string& udi, string& dic, string& data) +bool CirCache::getCurrent(string& udi, string& dic, string& data) { assert(m_d != 0); if (!m_d->readDicData(m_d->m_itoffs, m_d->m_ithd, dic, &data)) diff --git a/src/utils/circache.h b/src/utils/circache.h index 2bd26952..c10a213c 100644 --- a/src/utils/circache.h +++ b/src/utils/circache.h @@ -84,9 +84,10 @@ public: /** Back to oldest */ virtual bool rewind(bool& eof); /** Get entry under cursor */ - virtual bool getcurrent(string& udi, string& dic, string& data); - /** Get current entry dict only (ie: udi is in dict */ - virtual bool getcurrentdict(string& dict); + virtual bool getCurrent(string& udi, string& dic, string& data); + /** Get current entry udi only. Udi can be empty (erased empty), caller + * should call again */ + virtual bool getCurrentUdi(string& udi); /** Skip to next. (false && !eof) -> error, (false&&eof)->EOF. */ virtual bool next(bool& eof);