From 4afde68f9e7221d898ab7f4c3e3a69c4018b5a76 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 30 Nov 2012 16:45:02 +0100 Subject: [PATCH] more config isolation. Still crashing --- src/index/beaglequeue.cpp | 8 ++-- src/index/fsindexer.cpp | 87 ++++++++++++++++------------------ src/index/fsindexer.h | 10 ++-- src/index/indexer.h | 9 ++++ src/index/recollindex.cpp | 4 +- src/python/recoll/pyrecoll.cpp | 2 +- src/rcldb/rcldb.cpp | 11 +++-- src/rcldb/rcldb.h | 2 +- 8 files changed, 66 insertions(+), 67 deletions(-) diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index b4d9efb0..1bc613d9 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -219,7 +219,7 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi) if (!stringlowercmp("bookmark", hittype)) { // Just index the dotdoc dotdoc.meta[Rcl::Doc::keybcknd] = "BGL"; - return m_db->addOrUpdate(m_config, udi, cstr_null, dotdoc); + return m_db->addOrUpdate(udi, cstr_null, dotdoc); } else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) || (dotdoc.mimetype.compare("text/html") && dotdoc.mimetype.compare(cstr_textplain))) { @@ -249,7 +249,7 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi) doc.pcbytes = dotdoc.pcbytes; doc.sig.clear(); doc.meta[Rcl::Doc::keybcknd] = "BGL"; - return m_db->addOrUpdate(m_config, udi, cstr_null, doc); + return m_db->addOrUpdate(udi, cstr_null, doc); } } @@ -420,7 +420,7 @@ BeagleQueueIndexer::processone(const string &path, dotdoc.sig.clear(); dotdoc.meta[Rcl::Doc::keybcknd] = "BGL"; - if (!m_db->addOrUpdate(m_config, udi, cstr_null, dotdoc)) + if (!m_db->addOrUpdate(udi, cstr_null, dotdoc)) return FsTreeWalker::FtwError; } else if (stringlowercmp("webhistory", dotdoc.meta[Rcl::Doc::keybght]) || @@ -467,7 +467,7 @@ BeagleQueueIndexer::processone(const string &path, doc.url = dotdoc.url; doc.meta[Rcl::Doc::keybcknd] = "BGL"; - if (!m_db->addOrUpdate(m_config, udi, cstr_null, doc)) + if (!m_db->addOrUpdate(udi, cstr_null, doc)) return FsTreeWalker::FtwError; } diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index 2cb49abb..bad509d1 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -59,24 +59,24 @@ using namespace std; #ifdef IDX_THREADS class DbUpdTask { public: - DbUpdTask(RclConfig *cnf, const string& u, const string& p, - const Rcl::Doc& d) - : udi(u), parent_udi(p), doc(d), config(cnf) + DbUpdTask(const string& u, const string& p, const Rcl::Doc& d) + : udi(u), parent_udi(p), doc(d) {} string udi; string parent_udi; Rcl::Doc doc; - RclConfig *config; }; extern void *FsIndexerDbUpdWorker(void*); class InternfileTask { public: - InternfileTask(const std::string &f, const struct stat *i_stp) - : fn(f), statbuf(*i_stp) + InternfileTask(const std::string &f, const struct stat *i_stp, + map lfields) + : fn(f), statbuf(*i_stp), localfields(lfields) {} string fn; struct stat statbuf; + map localfields; }; extern void *FsIndexerInternfileWorker(void*); #endif // IDX_THREADS @@ -110,6 +110,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc) m_havelocalfields = m_config->hasNameAnywhere("localfields"); #ifdef IDX_THREADS + m_stableconfig = new RclConfig(*m_config); m_loglevel = DebugLog::getdbl()->getlevel(); m_haveInternQ = m_haveSplitQ = false; int internqlen = cnf->getThrConf(RclConfig::ThrIntern).first; @@ -152,6 +153,7 @@ FsIndexer::~FsIndexer() LOGDEB0(("FsIndexer: dbupd worker status: %ld (1->ok)\n", long(status))); } + delete m_stableconfig; #endif // IDX_THREADS delete m_missing; @@ -178,7 +180,7 @@ bool FsIndexer::index() if (m_updater) { #ifdef IDX_THREADS - PTMutexLocker locker(m_mutex); + PTMutexLocker locker(m_updater->m_mutex); #endif m_updater->status.reset(); m_updater->status.dbtotdocs = m_db->docCnt(); @@ -293,6 +295,10 @@ bool FsIndexer::indexFiles(list& files, ConfIndexer::IxFlag flag) if (!init()) return false; + int abslen; + if (m_config->getConfParam("idxabsmlen", &abslen)) + m_db->setAbstractParams(abslen, -1, -1); + // We use an FsTreeWalker just for handling the skipped path/name lists FsTreeWalker walker; walker.setSkippedPaths(m_config->getSkippedPaths()); @@ -302,8 +308,8 @@ bool FsIndexer::indexFiles(list& files, ConfIndexer::IxFlag flag) LOGDEB2(("FsIndexer::indexFiles: [%s]\n", it->c_str())); m_config->setKeyDir(path_getfather(*it)); - if (m_havelocalfields) - localfieldsfromconf(); + if (m_havelocalfields) + localfieldsfromconf(); bool follow = false; m_config->getConfParam("followLinks", &follow); @@ -323,10 +329,6 @@ bool FsIndexer::indexFiles(list& files, ConfIndexer::IxFlag flag) it++; continue; } - int abslen; - if (m_config->getConfParam("idxabsmlen", &abslen)) - m_db->setAbstractParams(abslen, -1, -1); - if (processone(*it, &stb, FsTreeWalker::FtwRegular) != FsTreeWalker::FtwOk) { LOGERR(("FsIndexer::indexFiles: processone failed\n")); @@ -383,10 +385,10 @@ void FsIndexer::localfieldsfromconf() } // -void FsIndexer::setlocalfields(Rcl::Doc& doc) +void FsIndexer::setlocalfields(map fields, Rcl::Doc& doc) { - for (map::const_iterator it = m_localfields.begin(); - it != m_localfields.end(); it++) { + for (map::const_iterator it = fields.begin(); + it != fields.end(); it++) { // Should local fields override those coming from the document // ? I think not, but not too sure if (doc.meta.find(it->second) == doc.meta.end()) { @@ -422,8 +424,7 @@ void *FsIndexerDbUpdWorker(void * fsp) return (void*)1; } LOGDEB0(("FsIndexerDbUpdWorker: task ql %d\n", int(qsz))); - if (!fip->m_db->addOrUpdate(tsk->config, tsk->udi, tsk->parent_udi, - tsk->doc)) { + if (!fip->m_db->addOrUpdate(tsk->udi, tsk->parent_udi, tsk->doc)) { LOGERR(("FsIndexerDbUpdWorker: addOrUpdate failed\n")); tqp->workerExit(); return (void*)0; @@ -439,7 +440,7 @@ void *FsIndexerInternfileWorker(void * fsp) WorkQueue *tqp = &fip->m_iwqueue; DebugLog::getdbl()->setloglevel(fip->m_loglevel); TempDir tmpdir; - RclConfig *myconf = new RclConfig(*(fip->m_config)); + RclConfig myconf(*(fip->m_stableconfig)); InternfileTask *tsk; for (;;) { @@ -448,7 +449,8 @@ void *FsIndexerInternfileWorker(void * fsp) return (void*)1; } LOGDEB0(("FsIndexerInternfileWorker: task fn %s\n", tsk->fn.c_str())); - if (fip->processonefile(myconf, tmpdir, tsk->fn, &tsk->statbuf) != + if (fip->processonefile(&myconf, tmpdir, tsk->fn, &tsk->statbuf, + tsk->localfields) != FsTreeWalker::FtwOk) { LOGERR(("FsIndexerInternfileWorker: processone failed\n")); tqp->workerExit(); @@ -477,7 +479,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, { if (m_updater) { #ifdef IDX_THREADS - PTMutexLocker locker(m_mutex); + PTMutexLocker locker(m_updater->m_mutex); #endif if (!m_updater->update()) { return FsTreeWalker::FtwStop; @@ -493,13 +495,9 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, // Set up skipped patterns for this subtree. m_walker.setSkippedNames(m_config->getSkippedNames()); - int abslen; - if (m_config->getConfParam("idxabsmlen", &abslen)) - m_db->setAbstractParams(abslen, -1, -1); - // Adjust local fields from config for this subtree - if (m_havelocalfields) - localfieldsfromconf(); + if (m_havelocalfields) + localfieldsfromconf(); if (flg == FsTreeWalker::FtwDirReturn) return FsTreeWalker::FtwOk; @@ -507,7 +505,7 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, #ifdef IDX_THREADS if (m_haveInternQ) { - InternfileTask *tp = new InternfileTask(fn, stp); + InternfileTask *tp = new InternfileTask(fn, stp, m_localfields); if (m_iwqueue.put(tp)) { return FsTreeWalker::FtwOk; } else { @@ -516,20 +514,15 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, } #endif - return processonefile(m_config, m_tmpdir, fn, stp); - + return processonefile(m_config, m_tmpdir, fn, stp, m_localfields); } FsTreeWalker::Status FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, - const std::string &fn, const struct stat *stp) + const std::string &fn, const struct stat *stp, + map localfields) { - -#ifdef IDX_THREADS - config->setKeyDir(path_getfather(fn)); -#endif - //////////////////// // Check db up to date ? Doing this before file type // identification means that, if usesystemfilecommand is switched @@ -551,7 +544,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, LOGDEB0(("processone: up to date: %s\n", fn.c_str())); if (m_updater) { #ifdef IDX_THREADS - PTMutexLocker locker(m_mutex); + PTMutexLocker locker(m_updater->m_mutex); #endif // Status bar update, abort request etc. m_updater->status.fn = fn; @@ -579,7 +572,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, // Note that we used to do the full path here, but I ended up believing // that it made more sense to use only the file name // The charset is used is the one from the locale. - string charset = m_config->getDefCharset(true); + string charset = config->getDefCharset(true); string utf8fn; int ercnt; if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) { LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n", @@ -646,7 +639,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, // Possibly add fields from local config if (m_havelocalfields) - setlocalfields(doc); + setlocalfields(localfields, doc); // Add document to database. If there is an ipath, add it as a children // of the file document. string udi; @@ -654,16 +647,15 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, #ifdef IDX_THREADS if (m_haveSplitQ) { - DbUpdTask *tp = new DbUpdTask(config, udi, doc.ipath.empty() ? - cstr_null : parent_udi, doc); + DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc); if (!m_dwqueue.put(tp)) { LOGERR(("processonefile: wqueue.put failed\n")); return FsTreeWalker::FtwError; } } else { #endif - if (!m_db->addOrUpdate(config, udi, doc.ipath.empty() ? cstr_null : - parent_udi, doc)) { + if (!m_db->addOrUpdate(udi, doc.ipath.empty() ? + cstr_null : parent_udi, doc)) { return FsTreeWalker::FtwError; } #ifdef IDX_THREADS @@ -673,7 +665,7 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, // Tell what we are doing and check for interrupt request if (m_updater) { #ifdef IDX_THREADS - PTMutexLocker locker(m_mutex); + PTMutexLocker locker(m_updater->m_mutex); #endif ++(m_updater->status.docsdone); if (m_updater->status.dbtotdocs < m_updater->status.docsdone) @@ -697,6 +689,8 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, fileDoc.meta[Rcl::Doc::keyfn] = utf8fn; fileDoc.mimetype = interner.getMimetype(); fileDoc.url = cstr_fileu + fn; + if (m_havelocalfields) + setlocalfields(localfields, fileDoc); char cbuf[100]; sprintf(cbuf, OFFTPC, stp->st_size); @@ -706,15 +700,14 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir, #ifdef IDX_THREADS if (m_haveSplitQ) { - DbUpdTask *tp = new DbUpdTask(config, parent_udi, cstr_null, - fileDoc); + DbUpdTask *tp = new DbUpdTask(parent_udi, cstr_null, fileDoc); if (!m_dwqueue.put(tp)) return FsTreeWalker::FtwError; else return FsTreeWalker::FtwOk; } #endif - if (!m_db->addOrUpdate(config, parent_udi, cstr_null, fileDoc)) + if (!m_db->addOrUpdate(parent_udi, cstr_null, fileDoc)) return FsTreeWalker::FtwError; } diff --git a/src/index/fsindexer.h b/src/index/fsindexer.h index 6ef8071c..8db85fb0 100644 --- a/src/index/fsindexer.h +++ b/src/index/fsindexer.h @@ -97,11 +97,6 @@ class FsIndexer : public FsTreeWalkerCB { map m_localfields; #ifdef IDX_THREADS - // Used to protect all ops from processonefile to class members: - // m_missing, m_db. It would be possible to be more fine-grained - // but probably not worth it. m_config and m_updater have separate - // protections - PTMutexInit m_mutex; friend void *FsIndexerDbUpdWorker(void*); friend void *FsIndexerInternfileWorker(void*); int m_loglevel; @@ -109,15 +104,16 @@ class FsIndexer : public FsTreeWalkerCB { WorkQueue m_dwqueue; bool m_haveInternQ; bool m_haveSplitQ; + RclConfig *m_stableconfig; #endif // IDX_THREADS bool init(); void localfieldsfromconf(); - void setlocalfields(Rcl::Doc& doc); + void setlocalfields(const map flds, Rcl::Doc& doc); string getDbDir() {return m_config->getDbDir();} FsTreeWalker::Status processonefile(RclConfig *config, TempDir& tmpdir, const string &fn, - const struct stat *); + const struct stat *, map localfields); }; #endif /* _fsindexer_h_included_ */ diff --git a/src/index/indexer.h b/src/index/indexer.h index 7582afb0..3536c241 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -32,6 +32,9 @@ using std::vector; #include "rclconfig.h" #include "rcldb.h" #include "rcldoc.h" +#ifdef IDX_THREADS +#include "ptmutex.h" +#endif class FsIndexer; class BeagleQueueIndexer; @@ -60,12 +63,18 @@ class DbIxStatus { * stop as soon as possible without corrupting state */ class DbIxStatusUpdater { public: +#ifdef IDX_THREADS + PTMutexInit m_mutex; +#endif DbIxStatus status; virtual ~DbIxStatusUpdater(){} // Convenience: change phase/fn and update virtual bool update(DbIxStatus::Phase phase, const string& fn) { +#ifdef IDX_THREADS + PTMutexLocker lock(m_mutex); +#endif status.phase = phase; status.fn = fn; return update(); diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index f7f42ace..2c6cd54c 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -88,7 +88,7 @@ int stopindexing; // should subsequently orderly terminate what it is doing. class MyUpdater : public DbIxStatusUpdater { public: - MyUpdater(RclConfig *config) + MyUpdater(const RclConfig *config) : m_prevphase(DbIxStatus::DBIXS_NONE) { m_fd = open(config->getIdxStatusFile().c_str(), @@ -165,7 +165,7 @@ static void makeIndexerOrExit(RclConfig *config, bool inPlaceReset) } } -void rclIxIonice(RclConfig *config) +void rclIxIonice(const RclConfig *config) { string clss, classdata; if (!config->getConfParam("monioniceclass", clss) || clss.empty()) diff --git a/src/python/recoll/pyrecoll.cpp b/src/python/recoll/pyrecoll.cpp index 152551bf..43de1940 100644 --- a/src/python/recoll/pyrecoll.cpp +++ b/src/python/recoll/pyrecoll.cpp @@ -1149,7 +1149,7 @@ Db_addOrUpdate(recoll_DbObject* self, PyObject *args, PyObject *) PyErr_SetString(PyExc_AttributeError, "doc"); return 0; } - if (!self->db->addOrUpdate(rclconfig, udi, parent_udi, *pydoc->doc)) { + if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) { LOGERR(("Db_addOrUpdate: rcldb error\n")); PyErr_SetString(PyExc_AttributeError, "rcldb error"); return 0; diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 38f4a646..6ddeb708 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -338,11 +338,12 @@ int Db::Native::getPageNumberForPosition(const vector& pbreaks, bool Db::o_inPlaceReset; Db::Db(const RclConfig *cfp) - : m_ndb(0), m_config(cfp), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0), + : m_ndb(0), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1), m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4), m_flushMb(-1), m_maxFsOccupPc(0) { + m_config = new RclConfig(*cfp); #ifndef RCL_INDEX_STRIPCHARS if (start_of_field_term.empty()) { if (o_index_stripchars) { @@ -370,6 +371,7 @@ Db::~Db() LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen, m_ndb->m_iswritable)); i_close(true); + delete m_config; } vector Db::getStemmerNames() @@ -867,8 +869,7 @@ static const string cstr_nc("\n\r\x0c"); // the title abstract and body and add special terms for file name, // date, mime type etc. , create the document data record (more // metadata), and update database -bool Db::addOrUpdate(RclConfig *config, const string &udi, - const string &parent_udi, Doc &doc) +bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) { LOGDEB(("Db::add: udi [%s] parent [%s]\n", udi.c_str(), parent_udi.c_str())); @@ -1097,10 +1098,10 @@ bool Db::addOrUpdate(RclConfig *config, const string &udi, if (!doc.meta[Doc::keyabs].empty()) RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]); - const set& stored = config->getStoredFields(); + const set& stored = m_config->getStoredFields(); for (set::const_iterator it = stored.begin(); it != stored.end(); it++) { - string nm = config->fieldCanon(*it); + string nm = m_config->fieldCanon(*it); if (!doc.meta[*it].empty()) { string value = neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc); diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index dd846920..94f87d54 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -256,7 +256,7 @@ class Db { * @param doc container for document data. Should have been filled as * much as possible depending on the document type. */ - bool addOrUpdate(RclConfig *config, const string &udi, + bool addOrUpdate(const string &udi, const string &parent_udi, Doc &doc); #ifdef IDX_THREADS void waitUpdIdle();