threading cleanup: add mutex protection around moronic change to transcode. Add mutex to equiv issue in unac. Rename const strings everywhere to cstr_xx to ease future detection of potentially problematic static variables. Most probably close issue #65
This commit is contained in:
parent
80c86c4c5f
commit
424e4173ba
@ -34,6 +34,8 @@
|
||||
#include "rclaspell.h"
|
||||
#include "debuglog.h"
|
||||
|
||||
#include "ptmutex.h"
|
||||
|
||||
// Just a place where we keep the Aspell library entry points together
|
||||
class AspellApi {
|
||||
public:
|
||||
@ -61,6 +63,7 @@ public:
|
||||
|
||||
};
|
||||
static AspellApi aapi;
|
||||
static PTMutexInit o_aapi_mutex;
|
||||
|
||||
#define NMTOPTR(NM, TP) \
|
||||
if ((aapi.NM = TP dlsym(m_data->m_handle, #NM)) == 0) { \
|
||||
@ -111,6 +114,7 @@ Aspell::~Aspell()
|
||||
|
||||
bool Aspell::init(string &reason)
|
||||
{
|
||||
PTMutexLocker locker(o_aapi_mutex);
|
||||
deleteZ(m_data);
|
||||
|
||||
// Language: we get this from the configuration, else from the NLS
|
||||
@ -227,7 +231,7 @@ bool Aspell::init(string &reason)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Aspell::ok()
|
||||
bool Aspell::ok() const
|
||||
{
|
||||
return m_data != 0 && m_data->m_handle != 0;
|
||||
}
|
||||
|
||||
@ -50,7 +50,7 @@ class Aspell {
|
||||
~Aspell();
|
||||
|
||||
/** Check health */
|
||||
bool ok();
|
||||
bool ok() const;
|
||||
|
||||
/** Find the aspell command and shared library, init function pointers */
|
||||
bool init(string &reason);
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#include "rclconfig.h"
|
||||
#include "rclinit.h"
|
||||
#include "pathut.h"
|
||||
#include "unac.h"
|
||||
|
||||
static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM,
|
||||
SIGUSR1, SIGUSR2};
|
||||
@ -99,6 +100,8 @@ RclConfig *recollinit(RclInitFlags flags,
|
||||
// Make sure the locale charset is initialized (so that multiple
|
||||
// threads don't try to do it at once).
|
||||
config->getDefCharset();
|
||||
// Init unac locking
|
||||
unac_init_mt();
|
||||
|
||||
int flushmb;
|
||||
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
|
||||
|
||||
@ -172,8 +172,6 @@ public:
|
||||
ifstream m_input;
|
||||
};
|
||||
|
||||
const string badtmpdirname = "/no/such/dir/really/can/exist";
|
||||
|
||||
// Initialize. Compute paths and create a temporary directory that will be
|
||||
// used by internfile()
|
||||
BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
|
||||
|
||||
@ -51,20 +51,21 @@ using namespace std;
|
||||
#include "pxattr.h"
|
||||
#endif // RCL_USE_XATTR
|
||||
|
||||
static const string stxtplain("text/plain");
|
||||
static const string cstr_stxtplain("text/plain");
|
||||
|
||||
// The internal path element separator. This can't be the same as the rcldb
|
||||
// file to ipath separator : "|"
|
||||
// We replace it with a control char if it comes out of a filter (ie:
|
||||
// rclzip or rclchm can do this). If you want the SOH control char
|
||||
// inside an ipath, you're out of luck (and a bit weird).
|
||||
static const string isep(":");
|
||||
static const char colon_repl = '\x01';
|
||||
static const string cstr_isep(":");
|
||||
|
||||
static const char cchar_colon_repl = '\x01';
|
||||
static string colon_hide(const string& in)
|
||||
{
|
||||
string out;
|
||||
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
||||
out += *it == ':' ? colon_repl : *it;
|
||||
out += *it == ':' ? cchar_colon_repl : *it;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@ -72,7 +73,7 @@ static string colon_restore(const string& in)
|
||||
{
|
||||
string out;
|
||||
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
||||
out += *it == colon_repl ? ':' : *it;
|
||||
out += *it == cchar_colon_repl ? ':' : *it;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
@ -115,7 +116,7 @@ bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
||||
url.c_str(), eipath.c_str()));
|
||||
if (eipath.empty())
|
||||
return false;
|
||||
if ((colon = eipath.find_last_of(isep)) != string::npos) {
|
||||
if ((colon = eipath.find_last_of(cstr_isep)) != string::npos) {
|
||||
eipath.erase(colon);
|
||||
} else {
|
||||
eipath.erase();
|
||||
@ -365,12 +366,12 @@ void FileInterner::initcommon(RclConfig *cnf, int flags)
|
||||
m_handlers.reserve(MAXHANDLERS);
|
||||
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
||||
m_tmpflgs[i] = false;
|
||||
m_targetMType = stxtplain;
|
||||
m_targetMType = cstr_stxtplain;
|
||||
}
|
||||
|
||||
// We used a single beagle cache object to access beagle data. We protect it
|
||||
// against multiple thread access.
|
||||
static PTMutexInit o_lock;
|
||||
static PTMutexInit o_beagler_mutex;
|
||||
|
||||
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
||||
TempDir& td, int flags)
|
||||
@ -422,12 +423,12 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
||||
string udi = it->second;
|
||||
|
||||
{
|
||||
PTMutexLocker locker(o_lock);
|
||||
PTMutexLocker locker(o_beagler_mutex);
|
||||
// Retrieve from our webcache (beagle data). The beagler
|
||||
// object is created at the first call of this routine and
|
||||
// deleted when the program exits.
|
||||
static BeagleQueueCache beagler(cnf);
|
||||
if (!beagler.getFromCache(udi, dotdoc, data)) {
|
||||
static BeagleQueueCache o_beagler(cnf);
|
||||
if (!o_beagler.getFromCache(udi, dotdoc, data)) {
|
||||
LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n",
|
||||
udi.c_str()));
|
||||
return;
|
||||
@ -564,14 +565,14 @@ static inline bool getKeyValue(const map<string, string>& docdata,
|
||||
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
||||
// names are used where appropriate. In some cases, Rcl::Doc names are
|
||||
// used inside the Dijon metadata (ex: origcharset)
|
||||
static const string keyau("author");
|
||||
static const string keycs("charset");
|
||||
static const string keyct("content");
|
||||
static const string keyds("description");
|
||||
static const string keyfn("filename");
|
||||
static const string keymd("modificationdate");
|
||||
static const string keymt("mimetype");
|
||||
static const string keytt("title");
|
||||
static const string cstr_keyau("author");
|
||||
static const string cstr_keycs("charset");
|
||||
static const string cstr_keyct("content");
|
||||
static const string cstr_keyds("description");
|
||||
static const string cstr_keyfn("filename");
|
||||
static const string cstr_keymd("modificationdate");
|
||||
static const string cstr_keymt("mimetype");
|
||||
static const string cstr_keytt("title");
|
||||
|
||||
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
{
|
||||
@ -585,21 +586,21 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
|
||||
for (map<string,string>::const_iterator it = docdata.begin();
|
||||
it != docdata.end(); it++) {
|
||||
if (it->first == keyct) {
|
||||
if (it->first == cstr_keyct) {
|
||||
doc.text = it->second;
|
||||
} else if (it->first == keymd) {
|
||||
} else if (it->first == cstr_keymd) {
|
||||
doc.dmtime = it->second;
|
||||
} else if (it->first == Rcl::Doc::keyoc) {
|
||||
doc.origcharset = it->second;
|
||||
} else if (it->first == keymt || it->first == keycs) {
|
||||
} else if (it->first == cstr_keymt || it->first == cstr_keycs) {
|
||||
// don't need/want these.
|
||||
} else {
|
||||
doc.meta[it->first] = it->second;
|
||||
}
|
||||
}
|
||||
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[keyds].empty()) {
|
||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
|
||||
doc.meta.erase(keyds);
|
||||
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) {
|
||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds];
|
||||
doc.meta.erase(cstr_keyds);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -635,21 +636,21 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
|
||||
if (!ipathel.empty()) {
|
||||
// We have a non-empty ipath
|
||||
hasipath = true;
|
||||
getKeyValue(docdata, keymt, doc.mimetype);
|
||||
getKeyValue(docdata, keyfn, doc.utf8fn);
|
||||
getKeyValue(docdata, cstr_keymt, doc.mimetype);
|
||||
getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
|
||||
}
|
||||
doc.ipath += colon_hide(ipathel) + isep;
|
||||
doc.ipath += colon_hide(ipathel) + cstr_isep;
|
||||
} else {
|
||||
doc.ipath += isep;
|
||||
doc.ipath += cstr_isep;
|
||||
}
|
||||
getKeyValue(docdata, keyau, doc.meta[Rcl::Doc::keyau]);
|
||||
getKeyValue(docdata, keymd, doc.dmtime);
|
||||
getKeyValue(docdata, cstr_keyau, doc.meta[Rcl::Doc::keyau]);
|
||||
getKeyValue(docdata, cstr_keymd, doc.dmtime);
|
||||
}
|
||||
|
||||
// Trim empty tail elements in ipath.
|
||||
if (hasipath) {
|
||||
LOGDEB2(("IPATH [%s]\n", doc.ipath.c_str()));
|
||||
string::size_type sit = doc.ipath.find_last_not_of(isep);
|
||||
string::size_type sit = doc.ipath.find_last_not_of(cstr_isep);
|
||||
if (sit == string::npos)
|
||||
doc.ipath.erase();
|
||||
else if (sit < doc.ipath.length() -1)
|
||||
@ -681,8 +682,8 @@ int FileInterner::addHandler()
|
||||
{
|
||||
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
||||
string charset, mimetype;
|
||||
getKeyValue(docdata, keycs, charset);
|
||||
getKeyValue(docdata, keymt, mimetype);
|
||||
getKeyValue(docdata, cstr_keycs, charset);
|
||||
getKeyValue(docdata, cstr_keymt, mimetype);
|
||||
|
||||
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
||||
|
||||
@ -690,7 +691,7 @@ int FileInterner::addHandler()
|
||||
// general), we're done decoding. If we hit text/plain, we're done
|
||||
// in any case
|
||||
if (!stringicmp(mimetype, m_targetMType) ||
|
||||
!stringicmp(mimetype, stxtplain)) {
|
||||
!stringicmp(mimetype, cstr_stxtplain)) {
|
||||
m_reachedMType = mimetype;
|
||||
LOGDEB1(("FileInterner::addHandler: target reached\n"));
|
||||
return ADD_BREAK;
|
||||
@ -723,7 +724,7 @@ int FileInterner::addHandler()
|
||||
const string *txt = &ns;
|
||||
{
|
||||
map<string,string>::const_iterator it;
|
||||
it = docdata.find(keyct);
|
||||
it = docdata.find(cstr_keyct);
|
||||
if (it != docdata.end())
|
||||
txt = &it->second;
|
||||
}
|
||||
@ -795,7 +796,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath
|
||||
int vipathidx = 0;
|
||||
if (!ipath.empty()) {
|
||||
vector<string> lipath;
|
||||
stringToTokens(ipath, lipath, isep, true);
|
||||
stringToTokens(ipath, lipath, cstr_isep, true);
|
||||
for (vector<string>::iterator it = lipath.begin();
|
||||
it != lipath.end(); it++) {
|
||||
*it = colon_restore(*it);
|
||||
|
||||
@ -53,7 +53,7 @@ public:
|
||||
private: FILE **m_fpp;
|
||||
};
|
||||
|
||||
static PTMutexInit o_mutex;
|
||||
static PTMutexInit o_mcache_mutex;
|
||||
|
||||
/**
|
||||
* Handles a cache for message numbers to offset translations. Permits direct
|
||||
@ -86,7 +86,7 @@ public:
|
||||
LOGDEB0(("MboxCache::get_offsets: init failed\n"));
|
||||
return -1;
|
||||
}
|
||||
PTMutexLocker locker(o_mutex);
|
||||
PTMutexLocker locker(o_mcache_mutex);
|
||||
string fn = makefilename(udi);
|
||||
FILE *fp = 0;
|
||||
if ((fp = fopen(fn.c_str(), "r")) == 0) {
|
||||
@ -133,7 +133,7 @@ public:
|
||||
return;
|
||||
if (fsize < m_minfsize)
|
||||
return;
|
||||
PTMutexLocker locker(o_mutex);
|
||||
PTMutexLocker locker(o_mcache_mutex);
|
||||
string fn = makefilename(udi);
|
||||
FILE *fp;
|
||||
if ((fp = fopen(fn.c_str(), "w")) == 0) {
|
||||
@ -163,7 +163,7 @@ public:
|
||||
|
||||
// Check state, possibly initialize
|
||||
bool ok(RclConfig *config) {
|
||||
PTMutexLocker locker(o_mutex);
|
||||
PTMutexLocker locker(o_mcache_mutex);
|
||||
if (m_minfsize == -1)
|
||||
return false;
|
||||
if (!m_ok) {
|
||||
@ -224,9 +224,9 @@ private:
|
||||
|
||||
const size_t MboxCache::o_b1size = 1024;
|
||||
|
||||
static class MboxCache mcache;
|
||||
static class MboxCache o_mcache;
|
||||
|
||||
static const string keyquirks("mhmboxquirks");
|
||||
static const string cstr_keyquirks("mhmboxquirks");
|
||||
|
||||
MimeHandlerMbox::~MimeHandlerMbox()
|
||||
{
|
||||
@ -271,7 +271,7 @@ bool MimeHandlerMbox::set_document_file(const string &fn)
|
||||
|
||||
// Check for location-based quirks:
|
||||
string quirks;
|
||||
if (m_config && m_config->getConfParam(keyquirks, quirks)) {
|
||||
if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) {
|
||||
if (quirks == "tbird") {
|
||||
LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n"));
|
||||
m_quirks |= MBOXQUIRK_TBIRD;
|
||||
@ -358,6 +358,20 @@ static const char *miniTbirdFrom = "^From $";
|
||||
static regex_t fromregex;
|
||||
static regex_t minifromregex;
|
||||
static bool regcompiled;
|
||||
static PTMutexInit o_regex_mutex;
|
||||
|
||||
static void compileregexes()
|
||||
{
|
||||
PTMutexLocker locker(o_regex_mutex);
|
||||
// As the initial test of regcompiled is unprotected the value may
|
||||
// have changed while we were waiting for the lock. Test again now
|
||||
// that we are alone.
|
||||
if (regcompiled)
|
||||
return;
|
||||
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
||||
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
||||
regcompiled = true;
|
||||
}
|
||||
|
||||
bool MimeHandlerMbox::next_document()
|
||||
{
|
||||
@ -383,9 +397,7 @@ bool MimeHandlerMbox::next_document()
|
||||
mtarg = -1;
|
||||
|
||||
if (!regcompiled) {
|
||||
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
||||
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
||||
regcompiled = true;
|
||||
compileregexes();
|
||||
}
|
||||
|
||||
// If we are called to retrieve a specific message, seek to bof
|
||||
@ -403,7 +415,7 @@ bool MimeHandlerMbox::next_document()
|
||||
LOGDEB0(("MimeHandlerMbox::next_doc: mtarg %d m_udi[%s]\n",
|
||||
mtarg, m_udi.c_str()));
|
||||
if (!m_udi.empty() &&
|
||||
(off = mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
||||
fgets(line, LL, fp) &&
|
||||
(!regexec(&fromregex, line, 0, 0, 0) ||
|
||||
@ -492,7 +504,7 @@ bool MimeHandlerMbox::next_document()
|
||||
LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
|
||||
m_havedoc = false;
|
||||
if (!m_udi.empty() && storeoffsets) {
|
||||
mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
|
||||
o_mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
|
||||
}
|
||||
}
|
||||
return msgtxt.empty() ? false : true;
|
||||
|
||||
@ -34,12 +34,14 @@ using std::endl;
|
||||
#include "plaintorich.h"
|
||||
#include "mimehandler.h"
|
||||
|
||||
// Default highlighter
|
||||
// Default highlighter. No need for locking, this is query-only.
|
||||
static const string cstr_hlfontcolor("<font color=\"blue\">");
|
||||
static const string cstr_hlendfont("</font>");
|
||||
class PlainToRichHtReslist : public PlainToRich {
|
||||
public:
|
||||
virtual ~PlainToRichHtReslist() {}
|
||||
virtual string startMatch() {return string("<font color=\"blue\">");}
|
||||
virtual string endMatch() {return string("</font>");}
|
||||
virtual string startMatch() {return cstr_hlfontcolor;}
|
||||
virtual string endMatch() {return cstr_hlendfont;}
|
||||
};
|
||||
static PlainToRichHtReslist g_hiliter;
|
||||
|
||||
@ -406,10 +408,10 @@ string ResListPager::detailsLink()
|
||||
|
||||
const string &ResListPager::parFormat()
|
||||
{
|
||||
static const string format("<img src=\"%I\" align=\"left\">"
|
||||
"%R %S %L <b>%T</b><br>"
|
||||
"%M %D <i>%U</i><br>"
|
||||
"%A %K");
|
||||
return format;
|
||||
static const string cstr_format("<img src=\"%I\" align=\"left\">"
|
||||
"%R %S %L <b>%T</b><br>"
|
||||
"%M %D <i>%U</i><br>"
|
||||
"%A %K");
|
||||
return cstr_format;
|
||||
}
|
||||
|
||||
|
||||
@ -51,6 +51,7 @@ using namespace std;
|
||||
#include "md5.h"
|
||||
#include "rclversion.h"
|
||||
#include "cancelcheck.h"
|
||||
#include "ptmutex.h"
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(A,B) (A>B?A:B)
|
||||
@ -61,8 +62,8 @@ using namespace std;
|
||||
|
||||
// Recoll index format version is stored in user metadata. When this change,
|
||||
// we can't open the db and will have to reindex.
|
||||
static const string RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
|
||||
static const string RCL_IDX_VERSION("1");
|
||||
static const string cstr_RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
|
||||
static const string cstr_RCL_IDX_VERSION("1");
|
||||
|
||||
// This is the word position offset at which we index the body text
|
||||
// (abstract, keywords, etc.. are stored before this)
|
||||
@ -79,7 +80,7 @@ const string end_of_field_term = "XXND";
|
||||
// This is used as a marker inside the abstract frag lists, but
|
||||
// normally doesn't remain in final output (which is built with a
|
||||
// custom sep. by our caller).
|
||||
static const string ellipsis("...");
|
||||
static const string cstr_ellipsis("...");
|
||||
|
||||
string version_string(){
|
||||
return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
|
||||
@ -88,12 +89,12 @@ string version_string(){
|
||||
|
||||
// Synthetic abstract marker (to discriminate from abstract actually
|
||||
// found in document)
|
||||
static const string rclSyntAbs("?!#@");
|
||||
static const string cstr_syntAbs("?!#@");
|
||||
|
||||
// Only ONE field name inside the index data record differs from the
|
||||
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
|
||||
// omega
|
||||
static const string keycap("caption");
|
||||
static const string cstr_keycap("caption");
|
||||
|
||||
// Static/Default table for field->prefix/weight translation.
|
||||
// This is logically const after initialization. Can't use a
|
||||
@ -106,8 +107,16 @@ static const string keycap("caption");
|
||||
// suppressed.
|
||||
|
||||
static map<string, FieldTraits> fldToTraits;
|
||||
static PTMutexInit o_fldToTraits_mutex;
|
||||
|
||||
static void initFldToTraits()
|
||||
{
|
||||
PTMutexLocker locker(o_fldToTraits_mutex);
|
||||
// As we perform non-locked testing of initialization, check again with
|
||||
// the lock held
|
||||
if (fldToTraits.size())
|
||||
return;
|
||||
|
||||
// Can't remember why "abstract" is indexed without a prefix
|
||||
// (result: it's indexed twice actually). Maybe I'll dare change
|
||||
// this one day
|
||||
@ -116,7 +125,7 @@ static void initFldToTraits()
|
||||
fldToTraits["ext"] = FieldTraits("XE");
|
||||
fldToTraits[Doc::keyfn] = FieldTraits("XSFN");
|
||||
|
||||
fldToTraits[keycap] = FieldTraits("S");
|
||||
fldToTraits[cstr_keycap] = FieldTraits("S");
|
||||
fldToTraits[Doc::keytt] = FieldTraits("S");
|
||||
fldToTraits["subject"] = FieldTraits("S");
|
||||
|
||||
@ -189,14 +198,14 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
||||
parms.get(Doc::keyfmt, doc.fmtime);
|
||||
parms.get(Doc::keydmt, doc.dmtime);
|
||||
parms.get(Doc::keyoc, doc.origcharset);
|
||||
parms.get(keycap, doc.meta[Doc::keytt]);
|
||||
parms.get(cstr_keycap, doc.meta[Doc::keytt]);
|
||||
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
||||
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||
// Possibly remove synthetic abstract indicator (if it's there, we
|
||||
// used to index the beginning of the text as abstract).
|
||||
doc.syntabs = false;
|
||||
if (doc.meta[Doc::keyabs].find(rclSyntAbs) == 0) {
|
||||
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(rclSyntAbs.length());
|
||||
if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
|
||||
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
|
||||
doc.syntabs = true;
|
||||
}
|
||||
parms.get(Doc::keyipt, doc.ipath);
|
||||
@ -417,7 +426,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
} else if (ii > (unsigned int)ipos &&
|
||||
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
||||
sparseDoc[ii] = occupiedmarker;
|
||||
} else if (!sparseDoc[ii].compare(ellipsis)) {
|
||||
} else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
|
||||
// For an empty slot, the test has a side
|
||||
// effect of inserting an empty string which
|
||||
// is what we want
|
||||
@ -429,7 +438,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
// empty string here, we really want an empty slot,
|
||||
// use find()
|
||||
if (sparseDoc.find(sto+1) == sparseDoc.end()) {
|
||||
sparseDoc[sto+1] = ellipsis;
|
||||
sparseDoc[sto+1] = cstr_ellipsis;
|
||||
}
|
||||
|
||||
// Limit to allocated occurences and total size
|
||||
@ -531,7 +540,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
||||
if (!incjk || (incjk && !newcjk))
|
||||
chunk += " ";
|
||||
incjk = newcjk;
|
||||
if (it->second == ellipsis) {
|
||||
if (it->second == cstr_ellipsis) {
|
||||
vabs.push_back(chunk);
|
||||
chunk.clear();
|
||||
} else {
|
||||
@ -612,8 +621,8 @@ bool Db::open(OpenMode mode, OpenError *error)
|
||||
// If db is empty, write the data format version at once
|
||||
// to avoid stupid error messages:
|
||||
if (m_ndb->xwdb.get_doccount() == 0)
|
||||
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY,
|
||||
RCL_IDX_VERSION);
|
||||
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
|
||||
cstr_RCL_IDX_VERSION);
|
||||
m_ndb->m_iswritable = true;
|
||||
// We open a readonly object in all cases (possibly in
|
||||
// addition to the r/w one) because some operations
|
||||
@ -650,11 +659,11 @@ bool Db::open(OpenMode mode, OpenError *error)
|
||||
// Check index format version. Must not try to check a just created or
|
||||
// truncated db
|
||||
if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) {
|
||||
string version = m_ndb->xdb().get_metadata(RCL_IDX_VERSION_KEY);
|
||||
if (version.compare(RCL_IDX_VERSION)) {
|
||||
string version = m_ndb->xdb().get_metadata(cstr_RCL_IDX_VERSION_KEY);
|
||||
if (version.compare(cstr_RCL_IDX_VERSION)) {
|
||||
m_ndb->m_noversionwrite = true;
|
||||
LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n",
|
||||
version.c_str(), RCL_IDX_VERSION.c_str()));
|
||||
version.c_str(), cstr_RCL_IDX_VERSION.c_str()));
|
||||
throw Xapian::DatabaseError("Recoll index version mismatch",
|
||||
"", "");
|
||||
}
|
||||
@ -693,7 +702,7 @@ bool Db::i_close(bool final)
|
||||
bool w = m_ndb->m_iswritable;
|
||||
if (w) {
|
||||
if (!m_ndb->m_noversionwrite)
|
||||
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY, RCL_IDX_VERSION);
|
||||
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION);
|
||||
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
||||
}
|
||||
// Used to do a flush here. Cant see why it should be necessary.
|
||||
@ -952,7 +961,7 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
||||
}
|
||||
|
||||
static const int MB = 1024 * 1024;
|
||||
static const string nc("\n\r\x0c");
|
||||
static const string cstr_nc("\n\r\x0c");
|
||||
|
||||
#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
|
||||
|
||||
@ -1168,13 +1177,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
if (doc.meta[Doc::keytt].empty())
|
||||
doc.meta[Doc::keytt] = doc.utf8fn;
|
||||
doc.meta[Doc::keytt] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), nc);
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc);
|
||||
if (!doc.meta[Doc::keytt].empty())
|
||||
RECORD_APPEND(record, keycap, doc.meta[Doc::keytt]);
|
||||
RECORD_APPEND(record, cstr_keycap, doc.meta[Doc::keytt]);
|
||||
|
||||
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
||||
doc.meta[Doc::keykw] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), nc);
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
|
||||
if (!doc.meta[Doc::keykw].empty())
|
||||
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
|
||||
|
||||
@ -1189,12 +1198,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
if (doc.meta[Doc::keyabs].empty()) {
|
||||
syntabs = true;
|
||||
if (!doc.text.empty())
|
||||
doc.meta[Doc::keyabs] = rclSyntAbs +
|
||||
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), nc);
|
||||
doc.meta[Doc::keyabs] = cstr_syntAbs +
|
||||
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), cstr_nc);
|
||||
} else {
|
||||
doc.meta[Doc::keyabs] =
|
||||
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
||||
nc);
|
||||
cstr_nc);
|
||||
}
|
||||
if (!doc.meta[Doc::keyabs].empty())
|
||||
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||
@ -1205,7 +1214,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
||||
string nm = m_config->fieldCanon(*it);
|
||||
if (!doc.meta[*it].empty()) {
|
||||
string value =
|
||||
neutchars(truncate_to_word(doc.meta[*it], 150), nc);
|
||||
neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);
|
||||
RECORD_APPEND(record, nm, value);
|
||||
}
|
||||
}
|
||||
@ -1611,8 +1620,8 @@ static void addPrefix(list<TermMatchEntry>& terms, const string& prefix)
|
||||
// Characters that can begin a wildcard or regexp expression. We use skipto
|
||||
// to begin the allterms search with terms that begin with the portion of
|
||||
// the input string prior to these chars.
|
||||
const string wildSpecChars = "*?[";
|
||||
const string regSpecChars = "(.[{";
|
||||
const string cstr_wildSpecChars = "*?[";
|
||||
const string cstr_regSpecChars = "(.[{";
|
||||
|
||||
// Find all index terms that match a wildcard or regular expression
|
||||
bool Db::termMatch(MatchType typ, const string &lang,
|
||||
@ -1639,7 +1648,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
||||
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
|
||||
return false;
|
||||
}
|
||||
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;
|
||||
string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
|
||||
|
||||
string prefix;
|
||||
if (!field.empty()) {
|
||||
@ -1852,7 +1861,7 @@ bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
|
||||
for (vector<string>::const_iterator it = vab.begin();
|
||||
it != vab.end(); it++) {
|
||||
abstract.append(*it);
|
||||
abstract.append(ellipsis);
|
||||
abstract.append(cstr_ellipsis);
|
||||
}
|
||||
return m_reason.empty() ? true : false;
|
||||
}
|
||||
|
||||
@ -39,16 +39,17 @@
|
||||
namespace Rcl {
|
||||
#endif
|
||||
|
||||
static const string cstr_keycap("caption");
|
||||
static const string cstr_keydmtime("dmtime");
|
||||
|
||||
// Field names inside the index data record may differ from the rcldoc ones
|
||||
// (esp.: caption / title)
|
||||
static const string& docfToDatf(const string& df)
|
||||
{
|
||||
static const string keycap("caption");
|
||||
static const string keydmtime("dmtime");
|
||||
if (!df.compare(Doc::keytt)) {
|
||||
return keycap;
|
||||
return cstr_keycap;
|
||||
} else if (!df.compare(Doc::keymt)) {
|
||||
return keydmtime;
|
||||
return cstr_keydmtime;
|
||||
} else {
|
||||
return df;
|
||||
}
|
||||
|
||||
@ -860,13 +860,13 @@ bool StringToXapianQ::processUserString(const string &iq,
|
||||
return true;
|
||||
}
|
||||
|
||||
static const string nullstemlang;
|
||||
static const string cstr_null;
|
||||
|
||||
// Translate a simple OR, AND, or EXCL search clause.
|
||||
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||
const string& stemlang)
|
||||
{
|
||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? nullstemlang:
|
||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
||||
stemlang;
|
||||
|
||||
m_terms.clear();
|
||||
@ -945,7 +945,7 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
||||
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||
const string& stemlang)
|
||||
{
|
||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? nullstemlang:
|
||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
||||
stemlang;
|
||||
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
||||
m_terms.clear();
|
||||
|
||||
@ -38,21 +38,21 @@ namespace Rcl {
|
||||
namespace StemDb {
|
||||
|
||||
|
||||
static const string stemdirstem = "stem_";
|
||||
static const string cstr_stemdirstem = "stem_";
|
||||
|
||||
/// Compute name of stem db for given base database and language
|
||||
static string stemdbname(const string& dbdir, const string& lang)
|
||||
{
|
||||
return path_cat(dbdir, stemdirstem + lang);
|
||||
return path_cat(dbdir, cstr_stemdirstem + lang);
|
||||
}
|
||||
|
||||
list<string> getLangs(const string& dbdir)
|
||||
{
|
||||
string pattern = stemdirstem + "*";
|
||||
string pattern = cstr_stemdirstem + "*";
|
||||
list<string> dirs = path_dirglob(dbdir, pattern);
|
||||
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
||||
*it = path_basename(*it);
|
||||
*it = it->substr(stemdirstem.length(), string::npos);
|
||||
*it = it->substr(cstr_stemdirstem.length(), string::npos);
|
||||
}
|
||||
return dirs;
|
||||
}
|
||||
|
||||
@ -36,6 +36,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#endif /* HAVE_VSNPRINTF */
|
||||
#include <pthread.h>
|
||||
|
||||
#include "unac.h"
|
||||
#include "unac_version.h"
|
||||
@ -10555,8 +10556,6 @@ int unacfold_string_utf16(const char* in, size_t in_length,
|
||||
outp, out_lengthp, 1);
|
||||
}
|
||||
|
||||
#define MAXOUT 1024
|
||||
|
||||
static int convert(const char* from, const char* to,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp);
|
||||
@ -10564,6 +10563,14 @@ static int convert(const char* from, const char* to,
|
||||
static const char *utf16be = "UTF-16BE";
|
||||
static iconv_t u8tou16_cd = (iconv_t)-1;
|
||||
static iconv_t u16tou8_cd = (iconv_t)-1;
|
||||
static pthread_mutex_t o_unac_mutex;
|
||||
static int unac_mutex_is_init;
|
||||
// Call this or take your chances with the auto init.
|
||||
void unac_init_mt()
|
||||
{
|
||||
pthread_mutex_init(&o_unac_mutex, 0);
|
||||
unac_mutex_is_init = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert buffer <in> containing string encoded in charset <from> into
|
||||
@ -10576,6 +10583,7 @@ static int convert(const char* from, const char* to,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
{
|
||||
int ret = -1;
|
||||
iconv_t cd;
|
||||
char* out;
|
||||
size_t out_remain;
|
||||
@ -10584,6 +10592,15 @@ static int convert(const char* from, const char* to,
|
||||
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
||||
const char space[] = { 0x00, 0x20 };
|
||||
|
||||
/* Note: better call explicit unac_init_mt() before starting threads than
|
||||
rely on this.
|
||||
*/
|
||||
if (unac_mutex_is_init == 0) {
|
||||
pthread_mutex_init(&o_unac_mutex, 0);
|
||||
unac_mutex_is_init = 1;
|
||||
}
|
||||
pthread_mutex_lock(&o_unac_mutex);
|
||||
|
||||
if (!strcmp(utf16be, from)) {
|
||||
from_utf8 = 0;
|
||||
from_utf16 = 1;
|
||||
@ -10614,7 +10631,7 @@ static int convert(const char* from, const char* to,
|
||||
/* *outp still valid, no freeing */
|
||||
if(debug_level >= UNAC_DEBUG_LOW)
|
||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
/* +1 for null */
|
||||
@ -10622,7 +10639,7 @@ static int convert(const char* from, const char* to,
|
||||
if(out == 0) {
|
||||
if(debug_level >= UNAC_DEBUG_LOW)
|
||||
DEBUG("malloc %d bytes failed\n", out_size+1);
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out_remain = out_size;
|
||||
@ -10631,7 +10648,7 @@ static int convert(const char* from, const char* to,
|
||||
if (u8tou16) {
|
||||
if (u8tou16_cd == (iconv_t)-1) {
|
||||
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
iconv(u8tou16_cd, 0, 0, 0, 0);
|
||||
@ -10640,7 +10657,7 @@ static int convert(const char* from, const char* to,
|
||||
} else if (u16tou8) {
|
||||
if (u16tou8_cd == (iconv_t)-1) {
|
||||
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
iconv(u16tou8_cd, 0, 0, 0, 0);
|
||||
@ -10648,7 +10665,7 @@ static int convert(const char* from, const char* to,
|
||||
cd = u16tou8_cd;
|
||||
} else {
|
||||
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -10682,7 +10699,7 @@ static int convert(const char* from, const char* to,
|
||||
if(errno == E2BIG)
|
||||
/* fall thru to the E2BIG case below */;
|
||||
else
|
||||
return -1;
|
||||
goto out;
|
||||
} else {
|
||||
/* The offending character was replaced by a SPACE, skip it. */
|
||||
in += 2;
|
||||
@ -10691,7 +10708,7 @@ static int convert(const char* from, const char* to,
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
case E2BIG:
|
||||
{
|
||||
@ -10711,7 +10728,7 @@ static int convert(const char* from, const char* to,
|
||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||
free(saved);
|
||||
*outp = 0;
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out = out_base + length;
|
||||
@ -10719,7 +10736,7 @@ static int convert(const char* from, const char* to,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -10732,7 +10749,10 @@ static int convert(const char* from, const char* to,
|
||||
*out_lengthp = out - out_base;
|
||||
(*outp)[*out_lengthp] = '\0';
|
||||
|
||||
return 0;
|
||||
ret = 0;
|
||||
out:
|
||||
pthread_mutex_unlock(&o_unac_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int unacmaybefold_string(const char* charset,
|
||||
|
||||
@ -113,6 +113,9 @@ int unacfold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** out, size_t* out_length);
|
||||
|
||||
/* To be called before starting threads in mt programs */
|
||||
void unac_init_mt();
|
||||
|
||||
/*
|
||||
* Return unac version number.
|
||||
*/
|
||||
|
||||
106
src/utils/ptmutex.cpp
Normal file
106
src/utils/ptmutex.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
//
|
||||
// Small test program to evaluate the cost of using mutex locks: calls
|
||||
// to methods doing a small (150 bytes) base64 encoding job + string
|
||||
// manips, with and without locking. The performance cost is
|
||||
// negligible on all machines I tested (around 0.3% to 2% depending on
|
||||
// the system and machine), but not inexistent, you would not want
|
||||
// this in a tight loop.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include "ptmutex.h"
|
||||
#include "base64.h"
|
||||
|
||||
static char *thisprog;
|
||||
static char usage [] =
|
||||
"ptmutex [-l] count\n"
|
||||
"\n"
|
||||
;
|
||||
static void
|
||||
Usage(void)
|
||||
{
|
||||
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int op_flags;
|
||||
#define OPT_MOINS 0x1
|
||||
#define OPT_l 0x2
|
||||
|
||||
static const string convertbuffer =
|
||||
"* The recoll GUI program sometimes crashes when running a query while\
|
||||
the indexing thread is active. Possible workarounds:";
|
||||
|
||||
static PTMutexInit o_lock;
|
||||
void workerlock(string& out)
|
||||
{
|
||||
PTMutexLocker locker(o_lock);
|
||||
base64_encode(convertbuffer, out);
|
||||
}
|
||||
|
||||
void workernolock(string& out)
|
||||
{
|
||||
base64_encode(convertbuffer, out);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int count = 0;
|
||||
thisprog = argv[0];
|
||||
argc--; argv++;
|
||||
|
||||
while (argc > 0 && **argv == '-') {
|
||||
(*argv)++;
|
||||
if (!(**argv))
|
||||
/* Cas du "adb - core" */
|
||||
Usage();
|
||||
while (**argv)
|
||||
switch (*(*argv)++) {
|
||||
case 'l': op_flags |= OPT_l; break;
|
||||
default: Usage(); break;
|
||||
}
|
||||
b1: argc--; argv++;
|
||||
}
|
||||
|
||||
if (argc != 1)
|
||||
Usage();
|
||||
count = atoi(*argv++);argc--;
|
||||
|
||||
if (op_flags & OPT_l) {
|
||||
fprintf(stderr, "Looping %d, locking\n", count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
string s;
|
||||
workerlock(s);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Looping %d, no locking\n", count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
string s;
|
||||
workernolock(s);
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
@ -405,7 +405,7 @@ list(int fd, const string& path, vector<string>* names, flags flags, nspace dom)
|
||||
return true;
|
||||
}
|
||||
|
||||
static const string nullstring("");
|
||||
static const string cstr_nullstring("");
|
||||
|
||||
bool get(const string& path, const string& _name, string *value,
|
||||
flags flags, nspace dom)
|
||||
@ -414,7 +414,7 @@ bool get(const string& path, const string& _name, string *value,
|
||||
}
|
||||
bool get(int fd, const string& _name, string *value, flags flags, nspace dom)
|
||||
{
|
||||
return get(fd, nullstring, _name, value, flags, dom);
|
||||
return get(fd, cstr_nullstring, _name, value, flags, dom);
|
||||
}
|
||||
bool set(const string& path, const string& _name, const string& value,
|
||||
flags flags, nspace dom)
|
||||
@ -424,7 +424,7 @@ bool set(const string& path, const string& _name, const string& value,
|
||||
bool set(int fd, const string& _name, const string& value,
|
||||
flags flags, nspace dom)
|
||||
{
|
||||
return set(fd, nullstring, _name, value, flags, dom);
|
||||
return set(fd, cstr_nullstring, _name, value, flags, dom);
|
||||
}
|
||||
bool del(const string& path, const string& _name, flags flags, nspace dom)
|
||||
{
|
||||
@ -432,7 +432,7 @@ bool del(const string& path, const string& _name, flags flags, nspace dom)
|
||||
}
|
||||
bool del(int fd, const string& _name, flags flags, nspace dom)
|
||||
{
|
||||
return del(fd, nullstring, _name, flags, dom);
|
||||
return del(fd, cstr_nullstring, _name, flags, dom);
|
||||
}
|
||||
bool list(const string& path, vector<string>* names, flags flags, nspace dom)
|
||||
{
|
||||
@ -440,17 +440,17 @@ bool list(const string& path, vector<string>* names, flags flags, nspace dom)
|
||||
}
|
||||
bool list(int fd, vector<string>* names, flags flags, nspace dom)
|
||||
{
|
||||
return list(fd, nullstring, names, flags, dom);
|
||||
return list(fd, cstr_nullstring, names, flags, dom);
|
||||
}
|
||||
|
||||
static const string userstring("user.");
|
||||
static const string cstr_userstring("user.");
|
||||
bool sysname(nspace dom, const string& pname, string* sname)
|
||||
{
|
||||
if (dom != PXATTR_USER) {
|
||||
errno = EINVAL;
|
||||
return false;
|
||||
}
|
||||
*sname = userstring + pname;
|
||||
*sname = cstr_userstring + pname;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -460,7 +460,7 @@ bool pxname(nspace dom, const string& sname, string* pname)
|
||||
errno = EINVAL;
|
||||
return false;
|
||||
}
|
||||
*pname = sname.substr(userstring.length());
|
||||
*pname = sname.substr(cstr_userstring.length());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -428,7 +428,7 @@ void neutchars(const string &str, string &out, const string& chars)
|
||||
* if reasonably possible. Note: we could also use textsplit, stopping when
|
||||
* we have enough, this would be cleanly utf8-aware but would remove
|
||||
* punctuation */
|
||||
static const string SEPAR = " \t\n\r-:.;,/[]{}";
|
||||
static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}";
|
||||
string truncate_to_word(const string &input, string::size_type maxlen)
|
||||
{
|
||||
string output;
|
||||
@ -436,7 +436,7 @@ string truncate_to_word(const string &input, string::size_type maxlen)
|
||||
output = input;
|
||||
} else {
|
||||
output = input.substr(0, maxlen);
|
||||
string::size_type space = output.find_last_of(SEPAR);
|
||||
string::size_type space = output.find_last_of(cstr_SEPAR);
|
||||
// Original version only truncated at space if space was found after
|
||||
// maxlen/2. But we HAVE to truncate at space, else we'd need to do
|
||||
// utf8 stuff to avoid truncating at multibyte char. In any case,
|
||||
@ -676,6 +676,9 @@ static void gettime(int, struct m_timespec *ts)
|
||||
}
|
||||
///// End system interface
|
||||
|
||||
// Note: this not protected against multithread access and not reentrant, but
|
||||
// this is mostly debug code, and it won't crash, just show bad results. Also
|
||||
// the frozen thing is not used that much
|
||||
static m_timespec frozen_tv;
|
||||
void Chrono::refnow()
|
||||
{
|
||||
|
||||
@ -29,15 +29,21 @@ using std::string;
|
||||
|
||||
#include "transcode.h"
|
||||
#include "debuglog.h"
|
||||
|
||||
#include "ptmutex.h"
|
||||
#ifdef RCL_ICONV_INBUF_CONST
|
||||
#define ICV_P2_TYPE const char**
|
||||
#else
|
||||
#define ICV_P2_TYPE char**
|
||||
#endif
|
||||
|
||||
// We gain approximately 28% exec time for word at a time conversions by
|
||||
// We gain approximately 25% exec time for word at a time conversions by
|
||||
// caching the iconv_open thing.
|
||||
//
|
||||
// We may also lose some concurrency on multiproc because of the
|
||||
// necessary locking, but we only have one processing-intensive
|
||||
// possible thread for now (the indexing one), so this is probably not
|
||||
// an issue (and could be worked around with a slightly more
|
||||
// sohisticated approach).
|
||||
#define ICONV_CACHE_OPEN
|
||||
|
||||
bool transcode(const string &in, string &out, const string &icode,
|
||||
@ -48,6 +54,8 @@ bool transcode(const string &in, string &out, const string &icode,
|
||||
static iconv_t ic = (iconv_t)-1;
|
||||
static string cachedicode;
|
||||
static string cachedocode;
|
||||
static PTMutexInit o_cachediconv_mutex;
|
||||
PTMutexLocker locker(o_cachediconv_mutex);
|
||||
#else
|
||||
iconv_t ic;
|
||||
#endif
|
||||
@ -163,13 +171,14 @@ using namespace std;
|
||||
|
||||
// Repeatedly transcode a small string for timing measurements
|
||||
static const string testword("\xc3\xa9\x6c\x69\x6d\x69\x6e\xc3\xa9\xc3\xa0");
|
||||
// Without cache 10e6 reps on macpro -> 1.88 S
|
||||
// With cache -> 1.56
|
||||
// Without cache 10e6 reps on y -> 6.68
|
||||
// With cache -> 4.73
|
||||
// With cache and lock -> 4.9
|
||||
void looptest()
|
||||
{
|
||||
cout << testword << endl;
|
||||
string out;
|
||||
for (int i = 0; i < 1000*1000; i++) {
|
||||
for (int i = 0; i < 10*1000*1000; i++) {
|
||||
if (!transcode(testword, out, "UTF-8", "UTF-16BE")) {
|
||||
cerr << "Transcode failed" << endl;
|
||||
break;
|
||||
@ -184,7 +193,7 @@ int main(int argc, char **argv)
|
||||
exit(0);
|
||||
#endif
|
||||
if (argc != 5) {
|
||||
cerr << "Usage: trcsguess ifilename icode ofilename ocode" << endl;
|
||||
cerr << "Usage: transcode ifilename icode ofilename ocode" << endl;
|
||||
exit(1);
|
||||
}
|
||||
const string ifilename = argv[1];
|
||||
|
||||
44
unac/unac.c
44
unac/unac.c
@ -36,6 +36,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#endif /* HAVE_VSNPRINTF */
|
||||
#include <pthread.h>
|
||||
|
||||
#include "unac.h"
|
||||
#include "unac_version.h"
|
||||
@ -10555,8 +10556,6 @@ int unacfold_string_utf16(const char* in, size_t in_length,
|
||||
outp, out_lengthp, 1);
|
||||
}
|
||||
|
||||
#define MAXOUT 1024
|
||||
|
||||
static int convert(const char* from, const char* to,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp);
|
||||
@ -10564,6 +10563,14 @@ static int convert(const char* from, const char* to,
|
||||
static const char *utf16be = "UTF-16BE";
|
||||
static iconv_t u8tou16_cd = (iconv_t)-1;
|
||||
static iconv_t u16tou8_cd = (iconv_t)-1;
|
||||
static pthread_mutex_t o_unac_mutex;
|
||||
static int unac_mutex_is_init;
|
||||
// Call this or take your chances with the auto init.
|
||||
void unac_init_mt()
|
||||
{
|
||||
pthread_mutex_init(&o_unac_mutex, 0);
|
||||
unac_mutex_is_init = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert buffer <in> containing string encoded in charset <from> into
|
||||
@ -10576,6 +10583,7 @@ static int convert(const char* from, const char* to,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
{
|
||||
int ret = -1;
|
||||
iconv_t cd;
|
||||
char* out;
|
||||
size_t out_remain;
|
||||
@ -10584,6 +10592,15 @@ static int convert(const char* from, const char* to,
|
||||
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
||||
const char space[] = { 0x00, 0x20 };
|
||||
|
||||
/* Note: better call explicit unac_init_mt() before starting threads than
|
||||
rely on this.
|
||||
*/
|
||||
if (unac_mutex_is_init == 0) {
|
||||
pthread_mutex_init(&o_unac_mutex, 0);
|
||||
unac_mutex_is_init = 1;
|
||||
}
|
||||
pthread_mutex_lock(&o_unac_mutex);
|
||||
|
||||
if (!strcmp(utf16be, from)) {
|
||||
from_utf8 = 0;
|
||||
from_utf16 = 1;
|
||||
@ -10614,7 +10631,7 @@ static int convert(const char* from, const char* to,
|
||||
/* *outp still valid, no freeing */
|
||||
if(debug_level >= UNAC_DEBUG_LOW)
|
||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
/* +1 for null */
|
||||
@ -10622,7 +10639,7 @@ static int convert(const char* from, const char* to,
|
||||
if(out == 0) {
|
||||
if(debug_level >= UNAC_DEBUG_LOW)
|
||||
DEBUG("malloc %d bytes failed\n", out_size+1);
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out_remain = out_size;
|
||||
@ -10631,7 +10648,7 @@ static int convert(const char* from, const char* to,
|
||||
if (u8tou16) {
|
||||
if (u8tou16_cd == (iconv_t)-1) {
|
||||
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
iconv(u8tou16_cd, 0, 0, 0, 0);
|
||||
@ -10640,7 +10657,7 @@ static int convert(const char* from, const char* to,
|
||||
} else if (u16tou8) {
|
||||
if (u16tou8_cd == (iconv_t)-1) {
|
||||
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
iconv(u16tou8_cd, 0, 0, 0, 0);
|
||||
@ -10648,7 +10665,7 @@ static int convert(const char* from, const char* to,
|
||||
cd = u16tou8_cd;
|
||||
} else {
|
||||
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -10682,7 +10699,7 @@ static int convert(const char* from, const char* to,
|
||||
if(errno == E2BIG)
|
||||
/* fall thru to the E2BIG case below */;
|
||||
else
|
||||
return -1;
|
||||
goto out;
|
||||
} else {
|
||||
/* The offending character was replaced by a SPACE, skip it. */
|
||||
in += 2;
|
||||
@ -10691,7 +10708,7 @@ static int convert(const char* from, const char* to,
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
case E2BIG:
|
||||
{
|
||||
@ -10711,7 +10728,7 @@ static int convert(const char* from, const char* to,
|
||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||
free(saved);
|
||||
*outp = 0;
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out = out_base + length;
|
||||
@ -10719,7 +10736,7 @@ static int convert(const char* from, const char* to,
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -10732,7 +10749,10 @@ static int convert(const char* from, const char* to,
|
||||
*out_lengthp = out - out_base;
|
||||
(*outp)[*out_lengthp] = '\0';
|
||||
|
||||
return 0;
|
||||
ret = 0;
|
||||
out:
|
||||
pthread_mutex_unlock(&o_unac_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int unacmaybefold_string(const char* charset,
|
||||
|
||||
@ -113,6 +113,9 @@ int unacfold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** out, size_t* out_length);
|
||||
|
||||
/* To be called before starting threads in mt programs */
|
||||
void unac_init_mt();
|
||||
|
||||
/*
|
||||
* Return unac version number.
|
||||
*/
|
||||
|
||||
@ -38,7 +38,19 @@
|
||||
<h2><a name="b_latest">recoll 1.16</a></h2>
|
||||
|
||||
<ul>
|
||||
|
||||
|
||||
<li>The <tt>recoll</tt> GUI program sometimes crashes when
|
||||
running a query while the indexing thread is active.
|
||||
Possible workarounds:<br>
|
||||
<ul>
|
||||
<li>(Recommended) Use the command
|
||||
line <tt>recollindex</tt> program to perform indexing
|
||||
(usually just type "recollindex" in a console, or see "man
|
||||
recollindex").</li>
|
||||
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
|
||||
is running (as indicated in the bottom status line).</li>
|
||||
</ul>
|
||||
|
||||
<li>Cancelling a preview in the GUI will also cancel the indexing
|
||||
thread if it is running.</li>
|
||||
|
||||
|
||||
@ -53,6 +53,27 @@
|
||||
<p>The current version is 1.16.0. <a href="release-1.16.html">
|
||||
Release notes</a>.</p>
|
||||
|
||||
<div class="important">
|
||||
<p>Notice for 1.16.0: the
|
||||
<tt>recoll</tt> GUI program sometimes crashes when running a query
|
||||
while the indexing thread is active. I can reproduce the problem
|
||||
and I am working on a correction. Meanwhile, there are two possible
|
||||
workarounds:<br>
|
||||
<ul>
|
||||
<li>(Recommended) Use the command
|
||||
line <tt>recollindex</tt> program to perform indexing
|
||||
(usually just type "recollindex" in a console, or see "man
|
||||
recollindex").</li>
|
||||
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
|
||||
is running (as indicated in the bottom status line).</li>
|
||||
</ul>
|
||||
If the workaround fails or you experience other kinds of
|
||||
crashes with either <tt>recoll</tt> or <tt>recollindex</tt>, and
|
||||
want to help, please follow the instructions on
|
||||
<a href="https://bitbucket.org/medoc/recoll/wiki/GettingAStackTrace">
|
||||
this wiki page</a>.</p>
|
||||
</div>
|
||||
|
||||
<p>The download page for Recoll 1.15 is
|
||||
<a href="download-1.15.html">still available</a>.</p>
|
||||
|
||||
@ -204,21 +225,26 @@
|
||||
Xapian</a>,
|
||||
<a href="https://launchpad.net/~recoll-backports/+archive/recoll-1.15-on">
|
||||
Recoll and kio-recoll</a>. These were built from the latest versions,
|
||||
for a set of Ubuntu series. You just need to add the
|
||||
PPAs to your system software sources (the instructions are on
|
||||
for a set of Ubuntu series.</p>
|
||||
|
||||
<p>Ubuntu 10.04 (lucid) and later versions just need the Recoll
|
||||
PPA. Older versions also needed a backport for Xapian
|
||||
(xapian-backports/xapian-1.2).</p>
|
||||
|
||||
<p>Just add the
|
||||
PPA to your system software sources (the instructions are on
|
||||
the PPA page or
|
||||
<a href="https://help.launchpad.net/Packaging/PPA/InstallingSoftware">
|
||||
here</a>), and you can then use the normal package
|
||||
manager to install or update Recoll and Xapian. For Ubuntu versions
|
||||
from 9.10 (Karmic), only two commands are needed:
|
||||
manager to install or update Recoll. For Ubuntu versions
|
||||
after 9.10 (Karmic), only one command is needed:
|
||||
<pre><tt>
|
||||
sudo add-apt-repository ppa:xapian-backports/xapian-1.2
|
||||
sudo add-apt-repository ppa:recoll-backports/recoll-1.15-on
|
||||
</tt></pre>
|
||||
</p>
|
||||
|
||||
<p>For Ubuntu 9.04 (Jaunty) and older, to avoid
|
||||
messages about signature errors, you may have to explicitely import the
|
||||
<p>For Ubuntu 9.04 (Jaunty) and older,
|
||||
you may have to explicitely import the
|
||||
Recoll and Xapian public keys: <pre><tt>
|
||||
gpg --keyserver keyserver.ubuntu.com --recv 9DA85604
|
||||
gpg --export --armor 9DA85604 | sudo apt-key add -
|
||||
@ -390,7 +416,11 @@ I now use the OpenSUSE build service to create Recoll OpenSUSE packages.
|
||||
<h3>Updated 1.16 translations that became available after the
|
||||
release:</h3>
|
||||
|
||||
<p>None for now :(</p>
|
||||
<p>Czech, thanks to Pavel !
|
||||
<a href="translations/recoll_cs.ts">recoll_cs.ts</a>
|
||||
<a href="translations/recoll_cs.qm">recoll_cs.qm</a>
|
||||
</p>
|
||||
|
||||
<!--
|
||||
<p>Lithuanian.
|
||||
<a href="translations/recoll_lt.ts">recoll_lt.ts</a>
|
||||
|
||||
@ -90,6 +90,30 @@
|
||||
|
||||
<h2>News: </h2>
|
||||
<ul>
|
||||
|
||||
<li>
|
||||
<div class="important">
|
||||
<p>Notice for 1.16.0: the
|
||||
<tt>recoll</tt> GUI program sometimes crashes when running a query
|
||||
while the indexing thread is active. I can reproduce the problem
|
||||
and I am working on a correction. Meanwhile, there are two possible
|
||||
workarounds:<br>
|
||||
<ul>
|
||||
<li>(Recommended) Use the command
|
||||
line <tt>recollindex</tt> program to perform indexing
|
||||
(usually just type "recollindex" in a console, or see "man
|
||||
recollindex").</li>
|
||||
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
|
||||
is running (as indicated in the bottom status line).</li>
|
||||
</ul>
|
||||
If the workaround fails or you experience other kinds of
|
||||
crashes with either <tt>recoll</tt> or <tt>recollindex</tt>, and
|
||||
want to help, please follow the instructions on
|
||||
<a href="https://bitbucket.org/medoc/recoll/wiki/GettingAStackTrace">
|
||||
this wiki page</a>.</p>
|
||||
</div>
|
||||
</li>
|
||||
|
||||
<li>2011-09-03: release <a href="download.html#source">1.16.0</a>
|
||||
is out with many <a href="release-1.16.html">small improvements</a>
|
||||
over 1.15. </li>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user