threading cleanup: add mutex protection around moronic change to transcode. Add mutex to equiv issue in unac. Rename const strings everywhere to cstr_xx to ease future detection of potentially problematic static variables. Most probably close issue #65

This commit is contained in:
Jean-Francois Dockes 2011-09-28 15:01:14 +02:00
parent 80c86c4c5f
commit 424e4173ba
22 changed files with 411 additions and 151 deletions

View File

@ -34,6 +34,8 @@
#include "rclaspell.h" #include "rclaspell.h"
#include "debuglog.h" #include "debuglog.h"
#include "ptmutex.h"
// Just a place where we keep the Aspell library entry points together // Just a place where we keep the Aspell library entry points together
class AspellApi { class AspellApi {
public: public:
@ -61,6 +63,7 @@ public:
}; };
static AspellApi aapi; static AspellApi aapi;
static PTMutexInit o_aapi_mutex;
#define NMTOPTR(NM, TP) \ #define NMTOPTR(NM, TP) \
if ((aapi.NM = TP dlsym(m_data->m_handle, #NM)) == 0) { \ if ((aapi.NM = TP dlsym(m_data->m_handle, #NM)) == 0) { \
@ -111,6 +114,7 @@ Aspell::~Aspell()
bool Aspell::init(string &reason) bool Aspell::init(string &reason)
{ {
PTMutexLocker locker(o_aapi_mutex);
deleteZ(m_data); deleteZ(m_data);
// Language: we get this from the configuration, else from the NLS // Language: we get this from the configuration, else from the NLS
@ -227,7 +231,7 @@ bool Aspell::init(string &reason)
return true; return true;
} }
bool Aspell::ok() bool Aspell::ok() const
{ {
return m_data != 0 && m_data->m_handle != 0; return m_data != 0 && m_data->m_handle != 0;
} }

View File

@ -50,7 +50,7 @@ class Aspell {
~Aspell(); ~Aspell();
/** Check health */ /** Check health */
bool ok(); bool ok() const;
/** Find the aspell command and shared library, init function pointers */ /** Find the aspell command and shared library, init function pointers */
bool init(string &reason); bool init(string &reason);

View File

@ -29,6 +29,7 @@
#include "rclconfig.h" #include "rclconfig.h"
#include "rclinit.h" #include "rclinit.h"
#include "pathut.h" #include "pathut.h"
#include "unac.h"
static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM, static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM,
SIGUSR1, SIGUSR2}; SIGUSR1, SIGUSR2};
@ -99,6 +100,8 @@ RclConfig *recollinit(RclInitFlags flags,
// Make sure the locale charset is initialized (so that multiple // Make sure the locale charset is initialized (so that multiple
// threads don't try to do it at once). // threads don't try to do it at once).
config->getDefCharset(); config->getDefCharset();
// Init unac locking
unac_init_mt();
int flushmb; int flushmb;
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) { if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {

View File

@ -172,8 +172,6 @@ public:
ifstream m_input; ifstream m_input;
}; };
const string badtmpdirname = "/no/such/dir/really/can/exist";
// Initialize. Compute paths and create a temporary directory that will be // Initialize. Compute paths and create a temporary directory that will be
// used by internfile() // used by internfile()
BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db, BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,

View File

@ -51,20 +51,21 @@ using namespace std;
#include "pxattr.h" #include "pxattr.h"
#endif // RCL_USE_XATTR #endif // RCL_USE_XATTR
static const string stxtplain("text/plain"); static const string cstr_stxtplain("text/plain");
// The internal path element separator. This can't be the same as the rcldb // The internal path element separator. This can't be the same as the rcldb
// file to ipath separator : "|" // file to ipath separator : "|"
// We replace it with a control char if it comes out of a filter (ie: // We replace it with a control char if it comes out of a filter (ie:
// rclzip or rclchm can do this). If you want the SOH control char // rclzip or rclchm can do this). If you want the SOH control char
// inside an ipath, you're out of luck (and a bit weird). // inside an ipath, you're out of luck (and a bit weird).
static const string isep(":"); static const string cstr_isep(":");
static const char colon_repl = '\x01';
static const char cchar_colon_repl = '\x01';
static string colon_hide(const string& in) static string colon_hide(const string& in)
{ {
string out; string out;
for (string::const_iterator it = in.begin(); it != in.end(); it++) { for (string::const_iterator it = in.begin(); it != in.end(); it++) {
out += *it == ':' ? colon_repl : *it; out += *it == ':' ? cchar_colon_repl : *it;
} }
return out; return out;
} }
@ -72,7 +73,7 @@ static string colon_restore(const string& in)
{ {
string out; string out;
for (string::const_iterator it = in.begin(); it != in.end(); it++) { for (string::const_iterator it = in.begin(); it != in.end(); it++) {
out += *it == colon_repl ? ':' : *it; out += *it == cchar_colon_repl ? ':' : *it;
} }
return out; return out;
} }
@ -115,7 +116,7 @@ bool FileInterner::getEnclosing(const string &url, const string &ipath,
url.c_str(), eipath.c_str())); url.c_str(), eipath.c_str()));
if (eipath.empty()) if (eipath.empty())
return false; return false;
if ((colon = eipath.find_last_of(isep)) != string::npos) { if ((colon = eipath.find_last_of(cstr_isep)) != string::npos) {
eipath.erase(colon); eipath.erase(colon);
} else { } else {
eipath.erase(); eipath.erase();
@ -365,12 +366,12 @@ void FileInterner::initcommon(RclConfig *cnf, int flags)
m_handlers.reserve(MAXHANDLERS); m_handlers.reserve(MAXHANDLERS);
for (unsigned int i = 0; i < MAXHANDLERS; i++) for (unsigned int i = 0; i < MAXHANDLERS; i++)
m_tmpflgs[i] = false; m_tmpflgs[i] = false;
m_targetMType = stxtplain; m_targetMType = cstr_stxtplain;
} }
// We used a single beagle cache object to access beagle data. We protect it // We used a single beagle cache object to access beagle data. We protect it
// against multiple thread access. // against multiple thread access.
static PTMutexInit o_lock; static PTMutexInit o_beagler_mutex;
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
TempDir& td, int flags) TempDir& td, int flags)
@ -422,12 +423,12 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
string udi = it->second; string udi = it->second;
{ {
PTMutexLocker locker(o_lock); PTMutexLocker locker(o_beagler_mutex);
// Retrieve from our webcache (beagle data). The beagler // Retrieve from our webcache (beagle data). The beagler
// object is created at the first call of this routine and // object is created at the first call of this routine and
// deleted when the program exits. // deleted when the program exits.
static BeagleQueueCache beagler(cnf); static BeagleQueueCache o_beagler(cnf);
if (!beagler.getFromCache(udi, dotdoc, data)) { if (!o_beagler.getFromCache(udi, dotdoc, data)) {
LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n", LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n",
udi.c_str())); udi.c_str()));
return; return;
@ -564,14 +565,14 @@ static inline bool getKeyValue(const map<string, string>& docdata,
// These defs are for the Dijon meta array. Rcl::Doc predefined field // These defs are for the Dijon meta array. Rcl::Doc predefined field
// names are used where appropriate. In some cases, Rcl::Doc names are // names are used where appropriate. In some cases, Rcl::Doc names are
// used inside the Dijon metadata (ex: origcharset) // used inside the Dijon metadata (ex: origcharset)
static const string keyau("author"); static const string cstr_keyau("author");
static const string keycs("charset"); static const string cstr_keycs("charset");
static const string keyct("content"); static const string cstr_keyct("content");
static const string keyds("description"); static const string cstr_keyds("description");
static const string keyfn("filename"); static const string cstr_keyfn("filename");
static const string keymd("modificationdate"); static const string cstr_keymd("modificationdate");
static const string keymt("mimetype"); static const string cstr_keymt("mimetype");
static const string keytt("title"); static const string cstr_keytt("title");
bool FileInterner::dijontorcl(Rcl::Doc& doc) bool FileInterner::dijontorcl(Rcl::Doc& doc)
{ {
@ -585,21 +586,21 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
for (map<string,string>::const_iterator it = docdata.begin(); for (map<string,string>::const_iterator it = docdata.begin();
it != docdata.end(); it++) { it != docdata.end(); it++) {
if (it->first == keyct) { if (it->first == cstr_keyct) {
doc.text = it->second; doc.text = it->second;
} else if (it->first == keymd) { } else if (it->first == cstr_keymd) {
doc.dmtime = it->second; doc.dmtime = it->second;
} else if (it->first == Rcl::Doc::keyoc) { } else if (it->first == Rcl::Doc::keyoc) {
doc.origcharset = it->second; doc.origcharset = it->second;
} else if (it->first == keymt || it->first == keycs) { } else if (it->first == cstr_keymt || it->first == cstr_keycs) {
// don't need/want these. // don't need/want these.
} else { } else {
doc.meta[it->first] = it->second; doc.meta[it->first] = it->second;
} }
} }
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[keyds].empty()) { if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) {
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds]; doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds];
doc.meta.erase(keyds); doc.meta.erase(cstr_keyds);
} }
return true; return true;
} }
@ -635,21 +636,21 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
if (!ipathel.empty()) { if (!ipathel.empty()) {
// We have a non-empty ipath // We have a non-empty ipath
hasipath = true; hasipath = true;
getKeyValue(docdata, keymt, doc.mimetype); getKeyValue(docdata, cstr_keymt, doc.mimetype);
getKeyValue(docdata, keyfn, doc.utf8fn); getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
} }
doc.ipath += colon_hide(ipathel) + isep; doc.ipath += colon_hide(ipathel) + cstr_isep;
} else { } else {
doc.ipath += isep; doc.ipath += cstr_isep;
} }
getKeyValue(docdata, keyau, doc.meta[Rcl::Doc::keyau]); getKeyValue(docdata, cstr_keyau, doc.meta[Rcl::Doc::keyau]);
getKeyValue(docdata, keymd, doc.dmtime); getKeyValue(docdata, cstr_keymd, doc.dmtime);
} }
// Trim empty tail elements in ipath. // Trim empty tail elements in ipath.
if (hasipath) { if (hasipath) {
LOGDEB2(("IPATH [%s]\n", doc.ipath.c_str())); LOGDEB2(("IPATH [%s]\n", doc.ipath.c_str()));
string::size_type sit = doc.ipath.find_last_not_of(isep); string::size_type sit = doc.ipath.find_last_not_of(cstr_isep);
if (sit == string::npos) if (sit == string::npos)
doc.ipath.erase(); doc.ipath.erase();
else if (sit < doc.ipath.length() -1) else if (sit < doc.ipath.length() -1)
@ -681,8 +682,8 @@ int FileInterner::addHandler()
{ {
const map<string, string>& docdata = m_handlers.back()->get_meta_data(); const map<string, string>& docdata = m_handlers.back()->get_meta_data();
string charset, mimetype; string charset, mimetype;
getKeyValue(docdata, keycs, charset); getKeyValue(docdata, cstr_keycs, charset);
getKeyValue(docdata, keymt, mimetype); getKeyValue(docdata, cstr_keymt, mimetype);
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str())); LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
@ -690,7 +691,7 @@ int FileInterner::addHandler()
// general), we're done decoding. If we hit text/plain, we're done // general), we're done decoding. If we hit text/plain, we're done
// in any case // in any case
if (!stringicmp(mimetype, m_targetMType) || if (!stringicmp(mimetype, m_targetMType) ||
!stringicmp(mimetype, stxtplain)) { !stringicmp(mimetype, cstr_stxtplain)) {
m_reachedMType = mimetype; m_reachedMType = mimetype;
LOGDEB1(("FileInterner::addHandler: target reached\n")); LOGDEB1(("FileInterner::addHandler: target reached\n"));
return ADD_BREAK; return ADD_BREAK;
@ -723,7 +724,7 @@ int FileInterner::addHandler()
const string *txt = &ns; const string *txt = &ns;
{ {
map<string,string>::const_iterator it; map<string,string>::const_iterator it;
it = docdata.find(keyct); it = docdata.find(cstr_keyct);
if (it != docdata.end()) if (it != docdata.end())
txt = &it->second; txt = &it->second;
} }
@ -795,7 +796,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath
int vipathidx = 0; int vipathidx = 0;
if (!ipath.empty()) { if (!ipath.empty()) {
vector<string> lipath; vector<string> lipath;
stringToTokens(ipath, lipath, isep, true); stringToTokens(ipath, lipath, cstr_isep, true);
for (vector<string>::iterator it = lipath.begin(); for (vector<string>::iterator it = lipath.begin();
it != lipath.end(); it++) { it != lipath.end(); it++) {
*it = colon_restore(*it); *it = colon_restore(*it);

View File

@ -53,7 +53,7 @@ public:
private: FILE **m_fpp; private: FILE **m_fpp;
}; };
static PTMutexInit o_mutex; static PTMutexInit o_mcache_mutex;
/** /**
* Handles a cache for message numbers to offset translations. Permits direct * Handles a cache for message numbers to offset translations. Permits direct
@ -86,7 +86,7 @@ public:
LOGDEB0(("MboxCache::get_offsets: init failed\n")); LOGDEB0(("MboxCache::get_offsets: init failed\n"));
return -1; return -1;
} }
PTMutexLocker locker(o_mutex); PTMutexLocker locker(o_mcache_mutex);
string fn = makefilename(udi); string fn = makefilename(udi);
FILE *fp = 0; FILE *fp = 0;
if ((fp = fopen(fn.c_str(), "r")) == 0) { if ((fp = fopen(fn.c_str(), "r")) == 0) {
@ -133,7 +133,7 @@ public:
return; return;
if (fsize < m_minfsize) if (fsize < m_minfsize)
return; return;
PTMutexLocker locker(o_mutex); PTMutexLocker locker(o_mcache_mutex);
string fn = makefilename(udi); string fn = makefilename(udi);
FILE *fp; FILE *fp;
if ((fp = fopen(fn.c_str(), "w")) == 0) { if ((fp = fopen(fn.c_str(), "w")) == 0) {
@ -163,7 +163,7 @@ public:
// Check state, possibly initialize // Check state, possibly initialize
bool ok(RclConfig *config) { bool ok(RclConfig *config) {
PTMutexLocker locker(o_mutex); PTMutexLocker locker(o_mcache_mutex);
if (m_minfsize == -1) if (m_minfsize == -1)
return false; return false;
if (!m_ok) { if (!m_ok) {
@ -224,9 +224,9 @@ private:
const size_t MboxCache::o_b1size = 1024; const size_t MboxCache::o_b1size = 1024;
static class MboxCache mcache; static class MboxCache o_mcache;
static const string keyquirks("mhmboxquirks"); static const string cstr_keyquirks("mhmboxquirks");
MimeHandlerMbox::~MimeHandlerMbox() MimeHandlerMbox::~MimeHandlerMbox()
{ {
@ -271,7 +271,7 @@ bool MimeHandlerMbox::set_document_file(const string &fn)
// Check for location-based quirks: // Check for location-based quirks:
string quirks; string quirks;
if (m_config && m_config->getConfParam(keyquirks, quirks)) { if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) {
if (quirks == "tbird") { if (quirks == "tbird") {
LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n")); LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n"));
m_quirks |= MBOXQUIRK_TBIRD; m_quirks |= MBOXQUIRK_TBIRD;
@ -358,6 +358,20 @@ static const char *miniTbirdFrom = "^From $";
static regex_t fromregex; static regex_t fromregex;
static regex_t minifromregex; static regex_t minifromregex;
static bool regcompiled; static bool regcompiled;
static PTMutexInit o_regex_mutex;
static void compileregexes()
{
PTMutexLocker locker(o_regex_mutex);
// As the initial test of regcompiled is unprotected the value may
// have changed while we were waiting for the lock. Test again now
// that we are alone.
if (regcompiled)
return;
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
regcompiled = true;
}
bool MimeHandlerMbox::next_document() bool MimeHandlerMbox::next_document()
{ {
@ -383,9 +397,7 @@ bool MimeHandlerMbox::next_document()
mtarg = -1; mtarg = -1;
if (!regcompiled) { if (!regcompiled) {
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED); compileregexes();
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
regcompiled = true;
} }
// If we are called to retrieve a specific message, seek to bof // If we are called to retrieve a specific message, seek to bof
@ -403,7 +415,7 @@ bool MimeHandlerMbox::next_document()
LOGDEB0(("MimeHandlerMbox::next_doc: mtarg %d m_udi[%s]\n", LOGDEB0(("MimeHandlerMbox::next_doc: mtarg %d m_udi[%s]\n",
mtarg, m_udi.c_str())); mtarg, m_udi.c_str()));
if (!m_udi.empty() && if (!m_udi.empty() &&
(off = mcache.get_offset(m_config, m_udi, mtarg)) >= 0 && (off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
fseeko(fp, (off_t)off, SEEK_SET) >= 0 && fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
fgets(line, LL, fp) && fgets(line, LL, fp) &&
(!regexec(&fromregex, line, 0, 0, 0) || (!regexec(&fromregex, line, 0, 0, 0) ||
@ -492,7 +504,7 @@ bool MimeHandlerMbox::next_document()
LOGDEB2(("MimeHandlerMbox::next: eof hit\n")); LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
m_havedoc = false; m_havedoc = false;
if (!m_udi.empty() && storeoffsets) { if (!m_udi.empty() && storeoffsets) {
mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets); o_mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
} }
} }
return msgtxt.empty() ? false : true; return msgtxt.empty() ? false : true;

View File

@ -34,12 +34,14 @@ using std::endl;
#include "plaintorich.h" #include "plaintorich.h"
#include "mimehandler.h" #include "mimehandler.h"
// Default highlighter // Default highlighter. No need for locking, this is query-only.
static const string cstr_hlfontcolor("<font color=\"blue\">");
static const string cstr_hlendfont("</font>");
class PlainToRichHtReslist : public PlainToRich { class PlainToRichHtReslist : public PlainToRich {
public: public:
virtual ~PlainToRichHtReslist() {} virtual ~PlainToRichHtReslist() {}
virtual string startMatch() {return string("<font color=\"blue\">");} virtual string startMatch() {return cstr_hlfontcolor;}
virtual string endMatch() {return string("</font>");} virtual string endMatch() {return cstr_hlendfont;}
}; };
static PlainToRichHtReslist g_hiliter; static PlainToRichHtReslist g_hiliter;
@ -406,10 +408,10 @@ string ResListPager::detailsLink()
const string &ResListPager::parFormat() const string &ResListPager::parFormat()
{ {
static const string format("<img src=\"%I\" align=\"left\">" static const string cstr_format("<img src=\"%I\" align=\"left\">"
"%R %S %L &nbsp;&nbsp;<b>%T</b><br>" "%R %S %L &nbsp;&nbsp;<b>%T</b><br>"
"%M&nbsp;%D&nbsp;&nbsp;&nbsp;<i>%U</i><br>" "%M&nbsp;%D&nbsp;&nbsp;&nbsp;<i>%U</i><br>"
"%A %K"); "%A %K");
return format; return cstr_format;
} }

View File

@ -51,6 +51,7 @@ using namespace std;
#include "md5.h" #include "md5.h"
#include "rclversion.h" #include "rclversion.h"
#include "cancelcheck.h" #include "cancelcheck.h"
#include "ptmutex.h"
#ifndef MAX #ifndef MAX
#define MAX(A,B) (A>B?A:B) #define MAX(A,B) (A>B?A:B)
@ -61,8 +62,8 @@ using namespace std;
// Recoll index format version is stored in user metadata. When this change, // Recoll index format version is stored in user metadata. When this change,
// we can't open the db and will have to reindex. // we can't open the db and will have to reindex.
static const string RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY"); static const string cstr_RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
static const string RCL_IDX_VERSION("1"); static const string cstr_RCL_IDX_VERSION("1");
// This is the word position offset at which we index the body text // This is the word position offset at which we index the body text
// (abstract, keywords, etc.. are stored before this) // (abstract, keywords, etc.. are stored before this)
@ -79,7 +80,7 @@ const string end_of_field_term = "XXND";
// This is used as a marker inside the abstract frag lists, but // This is used as a marker inside the abstract frag lists, but
// normally doesn't remain in final output (which is built with a // normally doesn't remain in final output (which is built with a
// custom sep. by our caller). // custom sep. by our caller).
static const string ellipsis("..."); static const string cstr_ellipsis("...");
string version_string(){ string version_string(){
return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") + return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
@ -88,12 +89,12 @@ string version_string(){
// Synthetic abstract marker (to discriminate from abstract actually // Synthetic abstract marker (to discriminate from abstract actually
// found in document) // found in document)
static const string rclSyntAbs("?!#@"); static const string cstr_syntAbs("?!#@");
// Only ONE field name inside the index data record differs from the // Only ONE field name inside the index data record differs from the
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with // Rcl::Doc ones: caption<->title, for a remnant of compatibility with
// omega // omega
static const string keycap("caption"); static const string cstr_keycap("caption");
// Static/Default table for field->prefix/weight translation. // Static/Default table for field->prefix/weight translation.
// This is logically const after initialization. Can't use a // This is logically const after initialization. Can't use a
@ -106,8 +107,16 @@ static const string keycap("caption");
// suppressed. // suppressed.
static map<string, FieldTraits> fldToTraits; static map<string, FieldTraits> fldToTraits;
static PTMutexInit o_fldToTraits_mutex;
static void initFldToTraits() static void initFldToTraits()
{ {
PTMutexLocker locker(o_fldToTraits_mutex);
// As we perform non-locked testing of initialization, check again with
// the lock held
if (fldToTraits.size())
return;
// Can't remember why "abstract" is indexed without a prefix // Can't remember why "abstract" is indexed without a prefix
// (result: it's indexed twice actually). Maybe I'll dare change // (result: it's indexed twice actually). Maybe I'll dare change
// this one day // this one day
@ -116,7 +125,7 @@ static void initFldToTraits()
fldToTraits["ext"] = FieldTraits("XE"); fldToTraits["ext"] = FieldTraits("XE");
fldToTraits[Doc::keyfn] = FieldTraits("XSFN"); fldToTraits[Doc::keyfn] = FieldTraits("XSFN");
fldToTraits[keycap] = FieldTraits("S"); fldToTraits[cstr_keycap] = FieldTraits("S");
fldToTraits[Doc::keytt] = FieldTraits("S"); fldToTraits[Doc::keytt] = FieldTraits("S");
fldToTraits["subject"] = FieldTraits("S"); fldToTraits["subject"] = FieldTraits("S");
@ -189,14 +198,14 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
parms.get(Doc::keyfmt, doc.fmtime); parms.get(Doc::keyfmt, doc.fmtime);
parms.get(Doc::keydmt, doc.dmtime); parms.get(Doc::keydmt, doc.dmtime);
parms.get(Doc::keyoc, doc.origcharset); parms.get(Doc::keyoc, doc.origcharset);
parms.get(keycap, doc.meta[Doc::keytt]); parms.get(cstr_keycap, doc.meta[Doc::keytt]);
parms.get(Doc::keykw, doc.meta[Doc::keykw]); parms.get(Doc::keykw, doc.meta[Doc::keykw]);
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]); parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
// Possibly remove synthetic abstract indicator (if it's there, we // Possibly remove synthetic abstract indicator (if it's there, we
// used to index the beginning of the text as abstract). // used to index the beginning of the text as abstract).
doc.syntabs = false; doc.syntabs = false;
if (doc.meta[Doc::keyabs].find(rclSyntAbs) == 0) { if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(rclSyntAbs.length()); doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
doc.syntabs = true; doc.syntabs = true;
} }
parms.get(Doc::keyipt, doc.ipath); parms.get(Doc::keyipt, doc.ipath);
@ -417,7 +426,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
} else if (ii > (unsigned int)ipos && } else if (ii > (unsigned int)ipos &&
ii < (unsigned int)ipos + qtrmwrdcnt) { ii < (unsigned int)ipos + qtrmwrdcnt) {
sparseDoc[ii] = occupiedmarker; sparseDoc[ii] = occupiedmarker;
} else if (!sparseDoc[ii].compare(ellipsis)) { } else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
// For an empty slot, the test has a side // For an empty slot, the test has a side
// effect of inserting an empty string which // effect of inserting an empty string which
// is what we want // is what we want
@ -429,7 +438,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
// empty string here, we really want an empty slot, // empty string here, we really want an empty slot,
// use find() // use find()
if (sparseDoc.find(sto+1) == sparseDoc.end()) { if (sparseDoc.find(sto+1) == sparseDoc.end()) {
sparseDoc[sto+1] = ellipsis; sparseDoc[sto+1] = cstr_ellipsis;
} }
// Limit to allocated occurences and total size // Limit to allocated occurences and total size
@ -531,7 +540,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
if (!incjk || (incjk && !newcjk)) if (!incjk || (incjk && !newcjk))
chunk += " "; chunk += " ";
incjk = newcjk; incjk = newcjk;
if (it->second == ellipsis) { if (it->second == cstr_ellipsis) {
vabs.push_back(chunk); vabs.push_back(chunk);
chunk.clear(); chunk.clear();
} else { } else {
@ -612,8 +621,8 @@ bool Db::open(OpenMode mode, OpenError *error)
// If db is empty, write the data format version at once // If db is empty, write the data format version at once
// to avoid stupid error messages: // to avoid stupid error messages:
if (m_ndb->xwdb.get_doccount() == 0) if (m_ndb->xwdb.get_doccount() == 0)
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY, m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
RCL_IDX_VERSION); cstr_RCL_IDX_VERSION);
m_ndb->m_iswritable = true; m_ndb->m_iswritable = true;
// We open a readonly object in all cases (possibly in // We open a readonly object in all cases (possibly in
// addition to the r/w one) because some operations // addition to the r/w one) because some operations
@ -650,11 +659,11 @@ bool Db::open(OpenMode mode, OpenError *error)
// Check index format version. Must not try to check a just created or // Check index format version. Must not try to check a just created or
// truncated db // truncated db
if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) { if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) {
string version = m_ndb->xdb().get_metadata(RCL_IDX_VERSION_KEY); string version = m_ndb->xdb().get_metadata(cstr_RCL_IDX_VERSION_KEY);
if (version.compare(RCL_IDX_VERSION)) { if (version.compare(cstr_RCL_IDX_VERSION)) {
m_ndb->m_noversionwrite = true; m_ndb->m_noversionwrite = true;
LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n", LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n",
version.c_str(), RCL_IDX_VERSION.c_str())); version.c_str(), cstr_RCL_IDX_VERSION.c_str()));
throw Xapian::DatabaseError("Recoll index version mismatch", throw Xapian::DatabaseError("Recoll index version mismatch",
"", ""); "", "");
} }
@ -693,7 +702,7 @@ bool Db::i_close(bool final)
bool w = m_ndb->m_iswritable; bool w = m_ndb->m_iswritable;
if (w) { if (w) {
if (!m_ndb->m_noversionwrite) if (!m_ndb->m_noversionwrite)
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY, RCL_IDX_VERSION); m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION);
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n")); LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
} }
// Used to do a flush here. Cant see why it should be necessary. // Used to do a flush here. Cant see why it should be necessary.
@ -952,7 +961,7 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
} }
static const int MB = 1024 * 1024; static const int MB = 1024 * 1024;
static const string nc("\n\r\x0c"); static const string cstr_nc("\n\r\x0c");
#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";} #define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
@ -1168,13 +1177,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
if (doc.meta[Doc::keytt].empty()) if (doc.meta[Doc::keytt].empty())
doc.meta[Doc::keytt] = doc.utf8fn; doc.meta[Doc::keytt] = doc.utf8fn;
doc.meta[Doc::keytt] = doc.meta[Doc::keytt] =
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), nc); neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc);
if (!doc.meta[Doc::keytt].empty()) if (!doc.meta[Doc::keytt].empty())
RECORD_APPEND(record, keycap, doc.meta[Doc::keytt]); RECORD_APPEND(record, cstr_keycap, doc.meta[Doc::keytt]);
trimstring(doc.meta[Doc::keykw], " \t\r\n"); trimstring(doc.meta[Doc::keykw], " \t\r\n");
doc.meta[Doc::keykw] = doc.meta[Doc::keykw] =
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), nc); neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
if (!doc.meta[Doc::keykw].empty()) if (!doc.meta[Doc::keykw].empty())
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]); RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
@ -1189,12 +1198,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
if (doc.meta[Doc::keyabs].empty()) { if (doc.meta[Doc::keyabs].empty()) {
syntabs = true; syntabs = true;
if (!doc.text.empty()) if (!doc.text.empty())
doc.meta[Doc::keyabs] = rclSyntAbs + doc.meta[Doc::keyabs] = cstr_syntAbs +
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), nc); neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), cstr_nc);
} else { } else {
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs] =
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen), neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
nc); cstr_nc);
} }
if (!doc.meta[Doc::keyabs].empty()) if (!doc.meta[Doc::keyabs].empty())
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]); RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
@ -1205,7 +1214,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
string nm = m_config->fieldCanon(*it); string nm = m_config->fieldCanon(*it);
if (!doc.meta[*it].empty()) { if (!doc.meta[*it].empty()) {
string value = string value =
neutchars(truncate_to_word(doc.meta[*it], 150), nc); neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);
RECORD_APPEND(record, nm, value); RECORD_APPEND(record, nm, value);
} }
} }
@ -1611,8 +1620,8 @@ static void addPrefix(list<TermMatchEntry>& terms, const string& prefix)
// Characters that can begin a wildcard or regexp expression. We use skipto // Characters that can begin a wildcard or regexp expression. We use skipto
// to begin the allterms search with terms that begin with the portion of // to begin the allterms search with terms that begin with the portion of
// the input string prior to these chars. // the input string prior to these chars.
const string wildSpecChars = "*?["; const string cstr_wildSpecChars = "*?[";
const string regSpecChars = "(.[{"; const string cstr_regSpecChars = "(.[{";
// Find all index terms that match a wildcard or regular expression // Find all index terms that match a wildcard or regular expression
bool Db::termMatch(MatchType typ, const string &lang, bool Db::termMatch(MatchType typ, const string &lang,
@ -1639,7 +1648,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str())); LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
return false; return false;
} }
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars; string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
string prefix; string prefix;
if (!field.empty()) { if (!field.empty()) {
@ -1852,7 +1861,7 @@ bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
for (vector<string>::const_iterator it = vab.begin(); for (vector<string>::const_iterator it = vab.begin();
it != vab.end(); it++) { it != vab.end(); it++) {
abstract.append(*it); abstract.append(*it);
abstract.append(ellipsis); abstract.append(cstr_ellipsis);
} }
return m_reason.empty() ? true : false; return m_reason.empty() ? true : false;
} }

View File

@ -39,16 +39,17 @@
namespace Rcl { namespace Rcl {
#endif #endif
static const string cstr_keycap("caption");
static const string cstr_keydmtime("dmtime");
// Field names inside the index data record may differ from the rcldoc ones // Field names inside the index data record may differ from the rcldoc ones
// (esp.: caption / title) // (esp.: caption / title)
static const string& docfToDatf(const string& df) static const string& docfToDatf(const string& df)
{ {
static const string keycap("caption");
static const string keydmtime("dmtime");
if (!df.compare(Doc::keytt)) { if (!df.compare(Doc::keytt)) {
return keycap; return cstr_keycap;
} else if (!df.compare(Doc::keymt)) { } else if (!df.compare(Doc::keymt)) {
return keydmtime; return cstr_keydmtime;
} else { } else {
return df; return df;
} }

View File

@ -860,13 +860,13 @@ bool StringToXapianQ::processUserString(const string &iq,
return true; return true;
} }
static const string nullstemlang; static const string cstr_null;
// Translate a simple OR, AND, or EXCL search clause. // Translate a simple OR, AND, or EXCL search clause.
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
const string& stemlang) const string& stemlang)
{ {
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? nullstemlang: const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
stemlang; stemlang;
m_terms.clear(); m_terms.clear();
@ -945,7 +945,7 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
const string& stemlang) const string& stemlang)
{ {
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? nullstemlang: const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
stemlang; stemlang;
LOGDEB(("SearchDataClauseDist::toNativeQuery\n")); LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
m_terms.clear(); m_terms.clear();

View File

@ -38,21 +38,21 @@ namespace Rcl {
namespace StemDb { namespace StemDb {
static const string stemdirstem = "stem_"; static const string cstr_stemdirstem = "stem_";
/// Compute name of stem db for given base database and language /// Compute name of stem db for given base database and language
static string stemdbname(const string& dbdir, const string& lang) static string stemdbname(const string& dbdir, const string& lang)
{ {
return path_cat(dbdir, stemdirstem + lang); return path_cat(dbdir, cstr_stemdirstem + lang);
} }
list<string> getLangs(const string& dbdir) list<string> getLangs(const string& dbdir)
{ {
string pattern = stemdirstem + "*"; string pattern = cstr_stemdirstem + "*";
list<string> dirs = path_dirglob(dbdir, pattern); list<string> dirs = path_dirglob(dbdir, pattern);
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) { for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
*it = path_basename(*it); *it = path_basename(*it);
*it = it->substr(stemdirstem.length(), string::npos); *it = it->substr(cstr_stemdirstem.length(), string::npos);
} }
return dirs; return dirs;
} }

View File

@ -36,6 +36,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdarg.h> #include <stdarg.h>
#endif /* HAVE_VSNPRINTF */ #endif /* HAVE_VSNPRINTF */
#include <pthread.h>
#include "unac.h" #include "unac.h"
#include "unac_version.h" #include "unac_version.h"
@ -10555,8 +10556,6 @@ int unacfold_string_utf16(const char* in, size_t in_length,
outp, out_lengthp, 1); outp, out_lengthp, 1);
} }
#define MAXOUT 1024
static int convert(const char* from, const char* to, static int convert(const char* from, const char* to,
const char* in, size_t in_length, const char* in, size_t in_length,
char** outp, size_t* out_lengthp); char** outp, size_t* out_lengthp);
@ -10564,6 +10563,14 @@ static int convert(const char* from, const char* to,
static const char *utf16be = "UTF-16BE"; static const char *utf16be = "UTF-16BE";
static iconv_t u8tou16_cd = (iconv_t)-1; static iconv_t u8tou16_cd = (iconv_t)-1;
static iconv_t u16tou8_cd = (iconv_t)-1; static iconv_t u16tou8_cd = (iconv_t)-1;
static pthread_mutex_t o_unac_mutex;
static int unac_mutex_is_init;
// Call this or take your chances with the auto init.
void unac_init_mt()
{
pthread_mutex_init(&o_unac_mutex, 0);
unac_mutex_is_init = 1;
}
/* /*
* Convert buffer <in> containing string encoded in charset <from> into * Convert buffer <in> containing string encoded in charset <from> into
@ -10576,6 +10583,7 @@ static int convert(const char* from, const char* to,
const char* in, size_t in_length, const char* in, size_t in_length,
char** outp, size_t* out_lengthp) char** outp, size_t* out_lengthp)
{ {
int ret = -1;
iconv_t cd; iconv_t cd;
char* out; char* out;
size_t out_remain; size_t out_remain;
@ -10584,6 +10592,15 @@ static int convert(const char* from, const char* to,
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8; int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
const char space[] = { 0x00, 0x20 }; const char space[] = { 0x00, 0x20 };
/* Note: better call explicit unac_init_mt() before starting threads than
rely on this.
*/
if (unac_mutex_is_init == 0) {
pthread_mutex_init(&o_unac_mutex, 0);
unac_mutex_is_init = 1;
}
pthread_mutex_lock(&o_unac_mutex);
if (!strcmp(utf16be, from)) { if (!strcmp(utf16be, from)) {
from_utf8 = 0; from_utf8 = 0;
from_utf16 = 1; from_utf16 = 1;
@ -10614,7 +10631,7 @@ static int convert(const char* from, const char* to,
/* *outp still valid, no freeing */ /* *outp still valid, no freeing */
if(debug_level >= UNAC_DEBUG_LOW) if(debug_level >= UNAC_DEBUG_LOW)
DEBUG("realloc %d bytes failed\n", out_size+1); DEBUG("realloc %d bytes failed\n", out_size+1);
return -1; goto out;
} }
} else { } else {
/* +1 for null */ /* +1 for null */
@ -10622,7 +10639,7 @@ static int convert(const char* from, const char* to,
if(out == 0) { if(out == 0) {
if(debug_level >= UNAC_DEBUG_LOW) if(debug_level >= UNAC_DEBUG_LOW)
DEBUG("malloc %d bytes failed\n", out_size+1); DEBUG("malloc %d bytes failed\n", out_size+1);
return -1; goto out;
} }
} }
out_remain = out_size; out_remain = out_size;
@ -10631,7 +10648,7 @@ static int convert(const char* from, const char* to,
if (u8tou16) { if (u8tou16) {
if (u8tou16_cd == (iconv_t)-1) { if (u8tou16_cd == (iconv_t)-1) {
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) { if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
return -1; goto out;
} }
} else { } else {
iconv(u8tou16_cd, 0, 0, 0, 0); iconv(u8tou16_cd, 0, 0, 0, 0);
@ -10640,7 +10657,7 @@ static int convert(const char* from, const char* to,
} else if (u16tou8) { } else if (u16tou8) {
if (u16tou8_cd == (iconv_t)-1) { if (u16tou8_cd == (iconv_t)-1) {
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) { if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
return -1; goto out;
} }
} else { } else {
iconv(u16tou8_cd, 0, 0, 0, 0); iconv(u16tou8_cd, 0, 0, 0, 0);
@ -10648,7 +10665,7 @@ static int convert(const char* from, const char* to,
cd = u16tou8_cd; cd = u16tou8_cd;
} else { } else {
if((cd = iconv_open(to, from)) == (iconv_t)-1) { if((cd = iconv_open(to, from)) == (iconv_t)-1) {
return -1; goto out;
} }
} }
@ -10682,7 +10699,7 @@ static int convert(const char* from, const char* to,
if(errno == E2BIG) if(errno == E2BIG)
/* fall thru to the E2BIG case below */; /* fall thru to the E2BIG case below */;
else else
return -1; goto out;
} else { } else {
/* The offending character was replaced by a SPACE, skip it. */ /* The offending character was replaced by a SPACE, skip it. */
in += 2; in += 2;
@ -10691,7 +10708,7 @@ static int convert(const char* from, const char* to,
break; break;
} }
} else { } else {
return -1; goto out;
} }
case E2BIG: case E2BIG:
{ {
@ -10711,7 +10728,7 @@ static int convert(const char* from, const char* to,
DEBUG("realloc %d bytes failed\n", out_size+1); DEBUG("realloc %d bytes failed\n", out_size+1);
free(saved); free(saved);
*outp = 0; *outp = 0;
return -1; goto out;
} }
} }
out = out_base + length; out = out_base + length;
@ -10719,7 +10736,7 @@ static int convert(const char* from, const char* to,
} }
break; break;
default: default:
return -1; goto out;
break; break;
} }
} }
@ -10732,7 +10749,10 @@ static int convert(const char* from, const char* to,
*out_lengthp = out - out_base; *out_lengthp = out - out_base;
(*outp)[*out_lengthp] = '\0'; (*outp)[*out_lengthp] = '\0';
return 0; ret = 0;
out:
pthread_mutex_unlock(&o_unac_mutex);
return ret;
} }
int unacmaybefold_string(const char* charset, int unacmaybefold_string(const char* charset,

View File

@ -113,6 +113,9 @@ int unacfold_string(const char* charset,
const char* in, size_t in_length, const char* in, size_t in_length,
char** out, size_t* out_length); char** out, size_t* out_length);
/* To be called before starting threads in mt programs */
void unac_init_mt();
/* /*
* Return unac version number. * Return unac version number.
*/ */

106
src/utils/ptmutex.cpp Normal file
View File

@ -0,0 +1,106 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
//
// Small test program to evaluate the cost of using mutex locks: calls
// to methods doing a small (150 bytes) base64 encoding job + string
// manips, with and without locking. The performance cost is
// negligible on all machines I tested (around 0.3% to 2% depending on
// the system and machine), but not inexistent, you would not want
// this in a tight loop.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string>
using namespace std;
#include "ptmutex.h"
#include "base64.h"
static char *thisprog;
static char usage [] =
"ptmutex [-l] count\n"
"\n"
;
static void
Usage(void)
{
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
exit(1);
}
static int op_flags;
#define OPT_MOINS 0x1
#define OPT_l 0x2
static const string convertbuffer =
"* The recoll GUI program sometimes crashes when running a query while\
the indexing thread is active. Possible workarounds:";
static PTMutexInit o_lock;
void workerlock(string& out)
{
PTMutexLocker locker(o_lock);
base64_encode(convertbuffer, out);
}
void workernolock(string& out)
{
base64_encode(convertbuffer, out);
}
int main(int argc, char **argv)
{
int count = 0;
thisprog = argv[0];
argc--; argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
Usage();
while (**argv)
switch (*(*argv)++) {
case 'l': op_flags |= OPT_l; break;
default: Usage(); break;
}
b1: argc--; argv++;
}
if (argc != 1)
Usage();
count = atoi(*argv++);argc--;
if (op_flags & OPT_l) {
fprintf(stderr, "Looping %d, locking\n", count);
for (int i = 0; i < count; i++) {
string s;
workerlock(s);
}
} else {
fprintf(stderr, "Looping %d, no locking\n", count);
for (int i = 0; i < count; i++) {
string s;
workernolock(s);
}
}
exit(0);
}

View File

@ -405,7 +405,7 @@ list(int fd, const string& path, vector<string>* names, flags flags, nspace dom)
return true; return true;
} }
static const string nullstring(""); static const string cstr_nullstring("");
bool get(const string& path, const string& _name, string *value, bool get(const string& path, const string& _name, string *value,
flags flags, nspace dom) flags flags, nspace dom)
@ -414,7 +414,7 @@ bool get(const string& path, const string& _name, string *value,
} }
bool get(int fd, const string& _name, string *value, flags flags, nspace dom) bool get(int fd, const string& _name, string *value, flags flags, nspace dom)
{ {
return get(fd, nullstring, _name, value, flags, dom); return get(fd, cstr_nullstring, _name, value, flags, dom);
} }
bool set(const string& path, const string& _name, const string& value, bool set(const string& path, const string& _name, const string& value,
flags flags, nspace dom) flags flags, nspace dom)
@ -424,7 +424,7 @@ bool set(const string& path, const string& _name, const string& value,
bool set(int fd, const string& _name, const string& value, bool set(int fd, const string& _name, const string& value,
flags flags, nspace dom) flags flags, nspace dom)
{ {
return set(fd, nullstring, _name, value, flags, dom); return set(fd, cstr_nullstring, _name, value, flags, dom);
} }
bool del(const string& path, const string& _name, flags flags, nspace dom) bool del(const string& path, const string& _name, flags flags, nspace dom)
{ {
@ -432,7 +432,7 @@ bool del(const string& path, const string& _name, flags flags, nspace dom)
} }
bool del(int fd, const string& _name, flags flags, nspace dom) bool del(int fd, const string& _name, flags flags, nspace dom)
{ {
return del(fd, nullstring, _name, flags, dom); return del(fd, cstr_nullstring, _name, flags, dom);
} }
bool list(const string& path, vector<string>* names, flags flags, nspace dom) bool list(const string& path, vector<string>* names, flags flags, nspace dom)
{ {
@ -440,17 +440,17 @@ bool list(const string& path, vector<string>* names, flags flags, nspace dom)
} }
bool list(int fd, vector<string>* names, flags flags, nspace dom) bool list(int fd, vector<string>* names, flags flags, nspace dom)
{ {
return list(fd, nullstring, names, flags, dom); return list(fd, cstr_nullstring, names, flags, dom);
} }
static const string userstring("user."); static const string cstr_userstring("user.");
bool sysname(nspace dom, const string& pname, string* sname) bool sysname(nspace dom, const string& pname, string* sname)
{ {
if (dom != PXATTR_USER) { if (dom != PXATTR_USER) {
errno = EINVAL; errno = EINVAL;
return false; return false;
} }
*sname = userstring + pname; *sname = cstr_userstring + pname;
return true; return true;
} }
@ -460,7 +460,7 @@ bool pxname(nspace dom, const string& sname, string* pname)
errno = EINVAL; errno = EINVAL;
return false; return false;
} }
*pname = sname.substr(userstring.length()); *pname = sname.substr(cstr_userstring.length());
return true; return true;
} }

View File

@ -428,7 +428,7 @@ void neutchars(const string &str, string &out, const string& chars)
* if reasonably possible. Note: we could also use textsplit, stopping when * if reasonably possible. Note: we could also use textsplit, stopping when
* we have enough, this would be cleanly utf8-aware but would remove * we have enough, this would be cleanly utf8-aware but would remove
* punctuation */ * punctuation */
static const string SEPAR = " \t\n\r-:.;,/[]{}"; static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}";
string truncate_to_word(const string &input, string::size_type maxlen) string truncate_to_word(const string &input, string::size_type maxlen)
{ {
string output; string output;
@ -436,7 +436,7 @@ string truncate_to_word(const string &input, string::size_type maxlen)
output = input; output = input;
} else { } else {
output = input.substr(0, maxlen); output = input.substr(0, maxlen);
string::size_type space = output.find_last_of(SEPAR); string::size_type space = output.find_last_of(cstr_SEPAR);
// Original version only truncated at space if space was found after // Original version only truncated at space if space was found after
// maxlen/2. But we HAVE to truncate at space, else we'd need to do // maxlen/2. But we HAVE to truncate at space, else we'd need to do
// utf8 stuff to avoid truncating at multibyte char. In any case, // utf8 stuff to avoid truncating at multibyte char. In any case,
@ -676,6 +676,9 @@ static void gettime(int, struct m_timespec *ts)
} }
///// End system interface ///// End system interface
// Note: this not protected against multithread access and not reentrant, but
// this is mostly debug code, and it won't crash, just show bad results. Also
// the frozen thing is not used that much
static m_timespec frozen_tv; static m_timespec frozen_tv;
void Chrono::refnow() void Chrono::refnow()
{ {

View File

@ -29,15 +29,21 @@ using std::string;
#include "transcode.h" #include "transcode.h"
#include "debuglog.h" #include "debuglog.h"
#include "ptmutex.h"
#ifdef RCL_ICONV_INBUF_CONST #ifdef RCL_ICONV_INBUF_CONST
#define ICV_P2_TYPE const char** #define ICV_P2_TYPE const char**
#else #else
#define ICV_P2_TYPE char** #define ICV_P2_TYPE char**
#endif #endif
// We gain approximately 28% exec time for word at a time conversions by // We gain approximately 25% exec time for word at a time conversions by
// caching the iconv_open thing. // caching the iconv_open thing.
//
// We may also lose some concurrency on multiproc because of the
// necessary locking, but we only have one processing-intensive
// possible thread for now (the indexing one), so this is probably not
// an issue (and could be worked around with a slightly more
// sohisticated approach).
#define ICONV_CACHE_OPEN #define ICONV_CACHE_OPEN
bool transcode(const string &in, string &out, const string &icode, bool transcode(const string &in, string &out, const string &icode,
@ -48,6 +54,8 @@ bool transcode(const string &in, string &out, const string &icode,
static iconv_t ic = (iconv_t)-1; static iconv_t ic = (iconv_t)-1;
static string cachedicode; static string cachedicode;
static string cachedocode; static string cachedocode;
static PTMutexInit o_cachediconv_mutex;
PTMutexLocker locker(o_cachediconv_mutex);
#else #else
iconv_t ic; iconv_t ic;
#endif #endif
@ -163,13 +171,14 @@ using namespace std;
// Repeatedly transcode a small string for timing measurements // Repeatedly transcode a small string for timing measurements
static const string testword("\xc3\xa9\x6c\x69\x6d\x69\x6e\xc3\xa9\xc3\xa0"); static const string testword("\xc3\xa9\x6c\x69\x6d\x69\x6e\xc3\xa9\xc3\xa0");
// Without cache 10e6 reps on macpro -> 1.88 S // Without cache 10e6 reps on y -> 6.68
// With cache -> 1.56 // With cache -> 4.73
// With cache and lock -> 4.9
void looptest() void looptest()
{ {
cout << testword << endl; cout << testword << endl;
string out; string out;
for (int i = 0; i < 1000*1000; i++) { for (int i = 0; i < 10*1000*1000; i++) {
if (!transcode(testword, out, "UTF-8", "UTF-16BE")) { if (!transcode(testword, out, "UTF-8", "UTF-16BE")) {
cerr << "Transcode failed" << endl; cerr << "Transcode failed" << endl;
break; break;
@ -184,7 +193,7 @@ int main(int argc, char **argv)
exit(0); exit(0);
#endif #endif
if (argc != 5) { if (argc != 5) {
cerr << "Usage: trcsguess ifilename icode ofilename ocode" << endl; cerr << "Usage: transcode ifilename icode ofilename ocode" << endl;
exit(1); exit(1);
} }
const string ifilename = argv[1]; const string ifilename = argv[1];

View File

@ -36,6 +36,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdarg.h> #include <stdarg.h>
#endif /* HAVE_VSNPRINTF */ #endif /* HAVE_VSNPRINTF */
#include <pthread.h>
#include "unac.h" #include "unac.h"
#include "unac_version.h" #include "unac_version.h"
@ -10555,8 +10556,6 @@ int unacfold_string_utf16(const char* in, size_t in_length,
outp, out_lengthp, 1); outp, out_lengthp, 1);
} }
#define MAXOUT 1024
static int convert(const char* from, const char* to, static int convert(const char* from, const char* to,
const char* in, size_t in_length, const char* in, size_t in_length,
char** outp, size_t* out_lengthp); char** outp, size_t* out_lengthp);
@ -10564,6 +10563,14 @@ static int convert(const char* from, const char* to,
static const char *utf16be = "UTF-16BE"; static const char *utf16be = "UTF-16BE";
static iconv_t u8tou16_cd = (iconv_t)-1; static iconv_t u8tou16_cd = (iconv_t)-1;
static iconv_t u16tou8_cd = (iconv_t)-1; static iconv_t u16tou8_cd = (iconv_t)-1;
static pthread_mutex_t o_unac_mutex;
static int unac_mutex_is_init;
// Call this or take your chances with the auto init.
void unac_init_mt()
{
pthread_mutex_init(&o_unac_mutex, 0);
unac_mutex_is_init = 1;
}
/* /*
* Convert buffer <in> containing string encoded in charset <from> into * Convert buffer <in> containing string encoded in charset <from> into
@ -10576,6 +10583,7 @@ static int convert(const char* from, const char* to,
const char* in, size_t in_length, const char* in, size_t in_length,
char** outp, size_t* out_lengthp) char** outp, size_t* out_lengthp)
{ {
int ret = -1;
iconv_t cd; iconv_t cd;
char* out; char* out;
size_t out_remain; size_t out_remain;
@ -10584,6 +10592,15 @@ static int convert(const char* from, const char* to,
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8; int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
const char space[] = { 0x00, 0x20 }; const char space[] = { 0x00, 0x20 };
/* Note: better call explicit unac_init_mt() before starting threads than
rely on this.
*/
if (unac_mutex_is_init == 0) {
pthread_mutex_init(&o_unac_mutex, 0);
unac_mutex_is_init = 1;
}
pthread_mutex_lock(&o_unac_mutex);
if (!strcmp(utf16be, from)) { if (!strcmp(utf16be, from)) {
from_utf8 = 0; from_utf8 = 0;
from_utf16 = 1; from_utf16 = 1;
@ -10614,7 +10631,7 @@ static int convert(const char* from, const char* to,
/* *outp still valid, no freeing */ /* *outp still valid, no freeing */
if(debug_level >= UNAC_DEBUG_LOW) if(debug_level >= UNAC_DEBUG_LOW)
DEBUG("realloc %d bytes failed\n", out_size+1); DEBUG("realloc %d bytes failed\n", out_size+1);
return -1; goto out;
} }
} else { } else {
/* +1 for null */ /* +1 for null */
@ -10622,7 +10639,7 @@ static int convert(const char* from, const char* to,
if(out == 0) { if(out == 0) {
if(debug_level >= UNAC_DEBUG_LOW) if(debug_level >= UNAC_DEBUG_LOW)
DEBUG("malloc %d bytes failed\n", out_size+1); DEBUG("malloc %d bytes failed\n", out_size+1);
return -1; goto out;
} }
} }
out_remain = out_size; out_remain = out_size;
@ -10631,7 +10648,7 @@ static int convert(const char* from, const char* to,
if (u8tou16) { if (u8tou16) {
if (u8tou16_cd == (iconv_t)-1) { if (u8tou16_cd == (iconv_t)-1) {
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) { if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
return -1; goto out;
} }
} else { } else {
iconv(u8tou16_cd, 0, 0, 0, 0); iconv(u8tou16_cd, 0, 0, 0, 0);
@ -10640,7 +10657,7 @@ static int convert(const char* from, const char* to,
} else if (u16tou8) { } else if (u16tou8) {
if (u16tou8_cd == (iconv_t)-1) { if (u16tou8_cd == (iconv_t)-1) {
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) { if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
return -1; goto out;
} }
} else { } else {
iconv(u16tou8_cd, 0, 0, 0, 0); iconv(u16tou8_cd, 0, 0, 0, 0);
@ -10648,7 +10665,7 @@ static int convert(const char* from, const char* to,
cd = u16tou8_cd; cd = u16tou8_cd;
} else { } else {
if((cd = iconv_open(to, from)) == (iconv_t)-1) { if((cd = iconv_open(to, from)) == (iconv_t)-1) {
return -1; goto out;
} }
} }
@ -10682,7 +10699,7 @@ static int convert(const char* from, const char* to,
if(errno == E2BIG) if(errno == E2BIG)
/* fall thru to the E2BIG case below */; /* fall thru to the E2BIG case below */;
else else
return -1; goto out;
} else { } else {
/* The offending character was replaced by a SPACE, skip it. */ /* The offending character was replaced by a SPACE, skip it. */
in += 2; in += 2;
@ -10691,7 +10708,7 @@ static int convert(const char* from, const char* to,
break; break;
} }
} else { } else {
return -1; goto out;
} }
case E2BIG: case E2BIG:
{ {
@ -10711,7 +10728,7 @@ static int convert(const char* from, const char* to,
DEBUG("realloc %d bytes failed\n", out_size+1); DEBUG("realloc %d bytes failed\n", out_size+1);
free(saved); free(saved);
*outp = 0; *outp = 0;
return -1; goto out;
} }
} }
out = out_base + length; out = out_base + length;
@ -10719,7 +10736,7 @@ static int convert(const char* from, const char* to,
} }
break; break;
default: default:
return -1; goto out;
break; break;
} }
} }
@ -10732,7 +10749,10 @@ static int convert(const char* from, const char* to,
*out_lengthp = out - out_base; *out_lengthp = out - out_base;
(*outp)[*out_lengthp] = '\0'; (*outp)[*out_lengthp] = '\0';
return 0; ret = 0;
out:
pthread_mutex_unlock(&o_unac_mutex);
return ret;
} }
int unacmaybefold_string(const char* charset, int unacmaybefold_string(const char* charset,

View File

@ -113,6 +113,9 @@ int unacfold_string(const char* charset,
const char* in, size_t in_length, const char* in, size_t in_length,
char** out, size_t* out_length); char** out, size_t* out_length);
/* To be called before starting threads in mt programs */
void unac_init_mt();
/* /*
* Return unac version number. * Return unac version number.
*/ */

View File

@ -38,7 +38,19 @@
<h2><a name="b_latest">recoll 1.16</a></h2> <h2><a name="b_latest">recoll 1.16</a></h2>
<ul> <ul>
<li>The <tt>recoll</tt> GUI program sometimes crashes when
running a query while the indexing thread is active.
Possible workarounds:<br>
<ul>
<li>(Recommended) Use the command
line <tt>recollindex</tt> program to perform indexing
(usually just type "recollindex" in a console, or see "man
recollindex").</li>
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
is running (as indicated in the bottom status line).</li>
</ul>
<li>Cancelling a preview in the GUI will also cancel the indexing <li>Cancelling a preview in the GUI will also cancel the indexing
thread if it is running.</li> thread if it is running.</li>

View File

@ -53,6 +53,27 @@
<p>The current version is 1.16.0. <a href="release-1.16.html"> <p>The current version is 1.16.0. <a href="release-1.16.html">
Release notes</a>.</p> Release notes</a>.</p>
<div class="important">
<p>Notice for 1.16.0: the
<tt>recoll</tt> GUI program sometimes crashes when running a query
while the indexing thread is active. I can reproduce the problem
and I am working on a correction. Meanwhile, there are two possible
workarounds:<br>
<ul>
<li>(Recommended) Use the command
line <tt>recollindex</tt> program to perform indexing
(usually just type "recollindex" in a console, or see "man
recollindex").</li>
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
is running (as indicated in the bottom status line).</li>
</ul>
If the workaround fails or you experience other kinds of
crashes with either <tt>recoll</tt> or <tt>recollindex</tt>, and
want to help, please follow the instructions on
<a href="https://bitbucket.org/medoc/recoll/wiki/GettingAStackTrace">
this wiki page</a>.</p>
</div>
<p>The download page for Recoll 1.15 is <p>The download page for Recoll 1.15 is
<a href="download-1.15.html">still available</a>.</p> <a href="download-1.15.html">still available</a>.</p>
@ -204,21 +225,26 @@
Xapian</a>, Xapian</a>,
<a href="https://launchpad.net/~recoll-backports/+archive/recoll-1.15-on"> <a href="https://launchpad.net/~recoll-backports/+archive/recoll-1.15-on">
Recoll and kio-recoll</a>. These were built from the latest versions, Recoll and kio-recoll</a>. These were built from the latest versions,
for a set of Ubuntu series. You just need to add the for a set of Ubuntu series.</p>
PPAs to your system software sources (the instructions are on
<p>Ubuntu 10.04 (lucid) and later versions just need the Recoll
PPA. Older versions also needed a backport for Xapian
(xapian-backports/xapian-1.2).</p>
<p>Just add the
PPA to your system software sources (the instructions are on
the PPA page or the PPA page or
<a href="https://help.launchpad.net/Packaging/PPA/InstallingSoftware"> <a href="https://help.launchpad.net/Packaging/PPA/InstallingSoftware">
here</a>), and you can then use the normal package here</a>), and you can then use the normal package
manager to install or update Recoll and Xapian. For Ubuntu versions manager to install or update Recoll. For Ubuntu versions
from 9.10 (Karmic), only two commands are needed: after 9.10 (Karmic), only one command is needed:
<pre><tt> <pre><tt>
sudo add-apt-repository ppa:xapian-backports/xapian-1.2
sudo add-apt-repository ppa:recoll-backports/recoll-1.15-on sudo add-apt-repository ppa:recoll-backports/recoll-1.15-on
</tt></pre> </tt></pre>
</p> </p>
<p>For Ubuntu 9.04 (Jaunty) and older, to avoid <p>For Ubuntu 9.04 (Jaunty) and older,
messages about signature errors, you may have to explicitely import the you may have to explicitely import the
Recoll and Xapian public keys: <pre><tt> Recoll and Xapian public keys: <pre><tt>
gpg --keyserver keyserver.ubuntu.com --recv 9DA85604 gpg --keyserver keyserver.ubuntu.com --recv 9DA85604
gpg --export --armor 9DA85604 | sudo apt-key add - gpg --export --armor 9DA85604 | sudo apt-key add -
@ -390,7 +416,11 @@ I now use the OpenSUSE build service to create Recoll OpenSUSE packages.
<h3>Updated 1.16 translations that became available after the <h3>Updated 1.16 translations that became available after the
release:</h3> release:</h3>
<p>None for now :(</p> <p>Czech, thanks to Pavel !
<a href="translations/recoll_cs.ts">recoll_cs.ts</a>
<a href="translations/recoll_cs.qm">recoll_cs.qm</a>
</p>
<!-- <!--
<p>Lithuanian. <p>Lithuanian.
<a href="translations/recoll_lt.ts">recoll_lt.ts</a> <a href="translations/recoll_lt.ts">recoll_lt.ts</a>

View File

@ -90,6 +90,30 @@
<h2>News: </h2> <h2>News: </h2>
<ul> <ul>
<li>
<div class="important">
<p>Notice for 1.16.0: the
<tt>recoll</tt> GUI program sometimes crashes when running a query
while the indexing thread is active. I can reproduce the problem
and I am working on a correction. Meanwhile, there are two possible
workarounds:<br>
<ul>
<li>(Recommended) Use the command
line <tt>recollindex</tt> program to perform indexing
(usually just type "recollindex" in a console, or see "man
recollindex").</li>
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
is running (as indicated in the bottom status line).</li>
</ul>
If the workaround fails or you experience other kinds of
crashes with either <tt>recoll</tt> or <tt>recollindex</tt>, and
want to help, please follow the instructions on
<a href="https://bitbucket.org/medoc/recoll/wiki/GettingAStackTrace">
this wiki page</a>.</p>
</div>
</li>
<li>2011-09-03: release <a href="download.html#source">1.16.0</a> <li>2011-09-03: release <a href="download.html#source">1.16.0</a>
is out with many <a href="release-1.16.html">small improvements</a> is out with many <a href="release-1.16.html">small improvements</a>
over 1.15. </li> over 1.15. </li>