diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index 30f9b169..478a918a 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -63,9 +63,12 @@ using namespace std; #ifdef IDX_THREADS class DbUpdTask { public: + // Take some care to avoid sharing string data (if string impl is cow) DbUpdTask(const string& u, const string& p, const Rcl::Doc& d) - : udi(u), parent_udi(p), doc(d) - {} + : udi(u.begin(), u.end()), parent_udi(p.begin(), p.end()) + { + d.copyto(&doc); + } string udi; string parent_udi; Rcl::Doc doc; @@ -74,10 +77,13 @@ extern void *FsIndexerDbUpdWorker(void*); class InternfileTask { public: + // Take some care to avoid sharing string data (if string impl is cow) InternfileTask(const std::string &f, const struct stat *i_stp, map lfields) - : fn(f), statbuf(*i_stp), localfields(lfields) - {} + : fn(f.begin(), f.end()), statbuf(*i_stp) + { + map_ss_cp_noshr(lfields, &localfields); + } string fn; struct stat statbuf; map localfields; diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index d449f9c1..5567fa9d 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -373,6 +373,7 @@ MyHtmlParser::opening_tag(const string &tag) // Specific to Recoll filters. decode_entities(content); struct tm tm; + memset(&tm, 0, sizeof(tm)); if (strptime(content.c_str(), " %Y-%m-%d %H:%M:%S ", &tm) || strptime(content.c_str(), diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index e1131808..d33514bc 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1419,10 +1419,11 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc) // Dates etc. time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() : doc.dmtime.c_str()); - struct tm *tm = localtime(&mtime); + struct tm tmb; + localtime_r(&mtime, &tmb); char buf[9]; snprintf(buf, 9, "%04d%02d%02d", - tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday); + tmb.tm_year+1900, tmb.tm_mon + 1, tmb.tm_mday); // Date (YYYYMMDD) newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf)); // Month (YYYYMM) diff --git a/src/rcldb/rcldb_p.h b/src/rcldb/rcldb_p.h index b247c21a..bc7c1d02 100644 --- a/src/rcldb/rcldb_p.h +++ b/src/rcldb/rcldb_p.h @@ -50,9 +50,11 @@ public: // Note that udi and uniterm are strictly equivalent and are // passed both just to avoid recomputing uniterm which is // available on the caller site. + // Take some care to avoid sharing string data (if string impl is cow) DbUpdTask(Op _op, const string& ud, const string& un, Xapian::Document *d, size_t tl) - : op(_op), udi(ud), uniterm(un), doc(d), txtlen(tl) + : op(_op), udi(ud.begin(), ud.end()), uniterm(un.begin(), un.end()), + doc(d), txtlen(tl) {} // Udi and uniterm equivalently designate the doc Op op; diff --git a/src/rcldb/rcldoc.h b/src/rcldb/rcldoc.h index 2ba5b4ca..34a6e658 100644 --- a/src/rcldb/rcldoc.h +++ b/src/rcldb/rcldoc.h @@ -19,12 +19,12 @@ #include #include - -#ifndef NO_NAMESPACES using std::string; using std::map; + +#include "smallut.h" + namespace Rcl { -#endif /** * Dumb holder for document attributes and data. @@ -139,6 +139,8 @@ class Doc { void erase() { url.erase(); + idxurl.erase(); + idxi = 0; ipath.erase(); mimetype.erase(); fmtime.erase(); @@ -160,6 +162,30 @@ class Doc { haschildren = false; onlyxattr = false; } + // Copy ensuring no shared string data, for threading issues. + void copyto(Doc *d) const { + d->url.assign(url.begin(), url.end()); + d->idxurl.assign(idxurl.begin(), idxurl.end()); + d->idxi = idxi; + d->ipath.assign(ipath.begin(), ipath.end()); + d->mimetype.assign(mimetype.begin(), mimetype.end()); + d->fmtime.assign(fmtime.begin(), fmtime.end()); + d->dmtime.assign(dmtime.begin(), dmtime.end()); + d->origcharset.assign(origcharset.begin(), origcharset.end()); + map_ss_cp_noshr(meta, &d->meta); + d->syntabs = syntabs; + d->pcbytes.assign(pcbytes.begin(), pcbytes.end()); + d->fbytes.assign(fbytes.begin(), fbytes.end()); + d->dbytes.assign(dbytes.begin(), dbytes.end()); + d->sig.assign(sig.begin(), sig.end()); + d->text.assign(text.begin(), text.end()); + d->pc = pc; + d->xdocid = xdocid; + d->idxi = idxi; + d->haspages = haspages; + d->haschildren = haschildren; + d->onlyxattr = onlyxattr; + } Doc() : idxi(0), syntabs(false), pc(0), xdocid(0), haspages(false), haschildren(false), onlyxattr(false) @@ -247,8 +273,6 @@ class Doc { }; -#ifndef NO_NAMESPACES } -#endif #endif /* _RCLDOC_H_INCLUDED_ */ diff --git a/src/utils/idfile.cpp b/src/utils/idfile.cpp index 6da344fc..8be93f00 100644 --- a/src/utils/idfile.cpp +++ b/src/utils/idfile.cpp @@ -30,6 +30,18 @@ using namespace std; #endif /* NO_NAMESPACES */ +// Bogus code to avoid bogus valgrind mt warnings about the +// initialization of treat_mbox_... which I can't even remember the +// use of (it's not documented or ever set) +static int treat_mbox_as_rfc822; +class InitTMAR { +public: + InitTMAR() { + treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1; + } +}; +static InitTMAR initTM; + /** * This code is currently ONLY used to identify mbox and mail message files * which are badly handled by standard mime type identifiers @@ -49,11 +61,6 @@ const int wantnhead = 3; // fn is for message printing static string idFileInternal(istream& input, const char *fn) { - static int treat_mbox_as_rfc822; - if (treat_mbox_as_rfc822 == 0) { - treat_mbox_as_rfc822 = getenv("RECOLL_TREAT_MBOX_AS_RFC822") ? 1 : -1; - } - bool line1HasFrom = false; bool gotnonempty = false; int lookslikemail = 0; diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 40b16354..6d84e999 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -42,6 +42,16 @@ using namespace std; #include "hldata.h" #include "cstr.h" +void map_ss_cp_noshr(const map s, map *d) +{ + for (map::const_iterator it= s.begin(); + it != s.end(); it++) { + d->insert( + pair(string(it->first.begin(), it->first.end()), + string(it->second.begin(), it->second.end()))); + } +} + int stringicmp(const string & s1, const string& s2) { string::const_iterator it1 = s1.begin(); diff --git a/src/utils/smallut.h b/src/utils/smallut.h index bc8c132c..6a5b1193 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -197,6 +197,11 @@ inline void leftzeropad(string& s, unsigned len) s = s.insert(0, len - s.length(), '0'); } +// Duplicate map while ensuring no shared string data (to pass +// to other thread): +void map_ss_cp_noshr(const std::map s, + std::map *d); + // Code for static initialization of an stl map. Somewhat like Boost.assign. // Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c // Example use: map m = create_map (1,2) (3,4) (5,6) (7,8);