threading cleanup: add mutex protection around moronic change to transcode. Add mutex to equiv issue in unac. Rename const strings everywhere to cstr_xx to ease future detection of potentially problematic static variables. Most probably close issue #65
This commit is contained in:
parent
80c86c4c5f
commit
424e4173ba
@ -34,6 +34,8 @@
|
|||||||
#include "rclaspell.h"
|
#include "rclaspell.h"
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
|
|
||||||
|
#include "ptmutex.h"
|
||||||
|
|
||||||
// Just a place where we keep the Aspell library entry points together
|
// Just a place where we keep the Aspell library entry points together
|
||||||
class AspellApi {
|
class AspellApi {
|
||||||
public:
|
public:
|
||||||
@ -61,6 +63,7 @@ public:
|
|||||||
|
|
||||||
};
|
};
|
||||||
static AspellApi aapi;
|
static AspellApi aapi;
|
||||||
|
static PTMutexInit o_aapi_mutex;
|
||||||
|
|
||||||
#define NMTOPTR(NM, TP) \
|
#define NMTOPTR(NM, TP) \
|
||||||
if ((aapi.NM = TP dlsym(m_data->m_handle, #NM)) == 0) { \
|
if ((aapi.NM = TP dlsym(m_data->m_handle, #NM)) == 0) { \
|
||||||
@ -111,6 +114,7 @@ Aspell::~Aspell()
|
|||||||
|
|
||||||
bool Aspell::init(string &reason)
|
bool Aspell::init(string &reason)
|
||||||
{
|
{
|
||||||
|
PTMutexLocker locker(o_aapi_mutex);
|
||||||
deleteZ(m_data);
|
deleteZ(m_data);
|
||||||
|
|
||||||
// Language: we get this from the configuration, else from the NLS
|
// Language: we get this from the configuration, else from the NLS
|
||||||
@ -227,7 +231,7 @@ bool Aspell::init(string &reason)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Aspell::ok()
|
bool Aspell::ok() const
|
||||||
{
|
{
|
||||||
return m_data != 0 && m_data->m_handle != 0;
|
return m_data != 0 && m_data->m_handle != 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -50,7 +50,7 @@ class Aspell {
|
|||||||
~Aspell();
|
~Aspell();
|
||||||
|
|
||||||
/** Check health */
|
/** Check health */
|
||||||
bool ok();
|
bool ok() const;
|
||||||
|
|
||||||
/** Find the aspell command and shared library, init function pointers */
|
/** Find the aspell command and shared library, init function pointers */
|
||||||
bool init(string &reason);
|
bool init(string &reason);
|
||||||
|
|||||||
@ -29,6 +29,7 @@
|
|||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "rclinit.h"
|
#include "rclinit.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
|
#include "unac.h"
|
||||||
|
|
||||||
static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM,
|
static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM,
|
||||||
SIGUSR1, SIGUSR2};
|
SIGUSR1, SIGUSR2};
|
||||||
@ -99,6 +100,8 @@ RclConfig *recollinit(RclInitFlags flags,
|
|||||||
// Make sure the locale charset is initialized (so that multiple
|
// Make sure the locale charset is initialized (so that multiple
|
||||||
// threads don't try to do it at once).
|
// threads don't try to do it at once).
|
||||||
config->getDefCharset();
|
config->getDefCharset();
|
||||||
|
// Init unac locking
|
||||||
|
unac_init_mt();
|
||||||
|
|
||||||
int flushmb;
|
int flushmb;
|
||||||
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
|
if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {
|
||||||
|
|||||||
@ -172,8 +172,6 @@ public:
|
|||||||
ifstream m_input;
|
ifstream m_input;
|
||||||
};
|
};
|
||||||
|
|
||||||
const string badtmpdirname = "/no/such/dir/really/can/exist";
|
|
||||||
|
|
||||||
// Initialize. Compute paths and create a temporary directory that will be
|
// Initialize. Compute paths and create a temporary directory that will be
|
||||||
// used by internfile()
|
// used by internfile()
|
||||||
BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
|
BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
|
||||||
|
|||||||
@ -51,20 +51,21 @@ using namespace std;
|
|||||||
#include "pxattr.h"
|
#include "pxattr.h"
|
||||||
#endif // RCL_USE_XATTR
|
#endif // RCL_USE_XATTR
|
||||||
|
|
||||||
static const string stxtplain("text/plain");
|
static const string cstr_stxtplain("text/plain");
|
||||||
|
|
||||||
// The internal path element separator. This can't be the same as the rcldb
|
// The internal path element separator. This can't be the same as the rcldb
|
||||||
// file to ipath separator : "|"
|
// file to ipath separator : "|"
|
||||||
// We replace it with a control char if it comes out of a filter (ie:
|
// We replace it with a control char if it comes out of a filter (ie:
|
||||||
// rclzip or rclchm can do this). If you want the SOH control char
|
// rclzip or rclchm can do this). If you want the SOH control char
|
||||||
// inside an ipath, you're out of luck (and a bit weird).
|
// inside an ipath, you're out of luck (and a bit weird).
|
||||||
static const string isep(":");
|
static const string cstr_isep(":");
|
||||||
static const char colon_repl = '\x01';
|
|
||||||
|
static const char cchar_colon_repl = '\x01';
|
||||||
static string colon_hide(const string& in)
|
static string colon_hide(const string& in)
|
||||||
{
|
{
|
||||||
string out;
|
string out;
|
||||||
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
||||||
out += *it == ':' ? colon_repl : *it;
|
out += *it == ':' ? cchar_colon_repl : *it;
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@ -72,7 +73,7 @@ static string colon_restore(const string& in)
|
|||||||
{
|
{
|
||||||
string out;
|
string out;
|
||||||
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
||||||
out += *it == colon_repl ? ':' : *it;
|
out += *it == cchar_colon_repl ? ':' : *it;
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@ -115,7 +116,7 @@ bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
|||||||
url.c_str(), eipath.c_str()));
|
url.c_str(), eipath.c_str()));
|
||||||
if (eipath.empty())
|
if (eipath.empty())
|
||||||
return false;
|
return false;
|
||||||
if ((colon = eipath.find_last_of(isep)) != string::npos) {
|
if ((colon = eipath.find_last_of(cstr_isep)) != string::npos) {
|
||||||
eipath.erase(colon);
|
eipath.erase(colon);
|
||||||
} else {
|
} else {
|
||||||
eipath.erase();
|
eipath.erase();
|
||||||
@ -365,12 +366,12 @@ void FileInterner::initcommon(RclConfig *cnf, int flags)
|
|||||||
m_handlers.reserve(MAXHANDLERS);
|
m_handlers.reserve(MAXHANDLERS);
|
||||||
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
||||||
m_tmpflgs[i] = false;
|
m_tmpflgs[i] = false;
|
||||||
m_targetMType = stxtplain;
|
m_targetMType = cstr_stxtplain;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We used a single beagle cache object to access beagle data. We protect it
|
// We used a single beagle cache object to access beagle data. We protect it
|
||||||
// against multiple thread access.
|
// against multiple thread access.
|
||||||
static PTMutexInit o_lock;
|
static PTMutexInit o_beagler_mutex;
|
||||||
|
|
||||||
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
||||||
TempDir& td, int flags)
|
TempDir& td, int flags)
|
||||||
@ -422,12 +423,12 @@ FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
|||||||
string udi = it->second;
|
string udi = it->second;
|
||||||
|
|
||||||
{
|
{
|
||||||
PTMutexLocker locker(o_lock);
|
PTMutexLocker locker(o_beagler_mutex);
|
||||||
// Retrieve from our webcache (beagle data). The beagler
|
// Retrieve from our webcache (beagle data). The beagler
|
||||||
// object is created at the first call of this routine and
|
// object is created at the first call of this routine and
|
||||||
// deleted when the program exits.
|
// deleted when the program exits.
|
||||||
static BeagleQueueCache beagler(cnf);
|
static BeagleQueueCache o_beagler(cnf);
|
||||||
if (!beagler.getFromCache(udi, dotdoc, data)) {
|
if (!o_beagler.getFromCache(udi, dotdoc, data)) {
|
||||||
LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n",
|
LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n",
|
||||||
udi.c_str()));
|
udi.c_str()));
|
||||||
return;
|
return;
|
||||||
@ -564,14 +565,14 @@ static inline bool getKeyValue(const map<string, string>& docdata,
|
|||||||
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
||||||
// names are used where appropriate. In some cases, Rcl::Doc names are
|
// names are used where appropriate. In some cases, Rcl::Doc names are
|
||||||
// used inside the Dijon metadata (ex: origcharset)
|
// used inside the Dijon metadata (ex: origcharset)
|
||||||
static const string keyau("author");
|
static const string cstr_keyau("author");
|
||||||
static const string keycs("charset");
|
static const string cstr_keycs("charset");
|
||||||
static const string keyct("content");
|
static const string cstr_keyct("content");
|
||||||
static const string keyds("description");
|
static const string cstr_keyds("description");
|
||||||
static const string keyfn("filename");
|
static const string cstr_keyfn("filename");
|
||||||
static const string keymd("modificationdate");
|
static const string cstr_keymd("modificationdate");
|
||||||
static const string keymt("mimetype");
|
static const string cstr_keymt("mimetype");
|
||||||
static const string keytt("title");
|
static const string cstr_keytt("title");
|
||||||
|
|
||||||
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||||
{
|
{
|
||||||
@ -585,21 +586,21 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
|||||||
|
|
||||||
for (map<string,string>::const_iterator it = docdata.begin();
|
for (map<string,string>::const_iterator it = docdata.begin();
|
||||||
it != docdata.end(); it++) {
|
it != docdata.end(); it++) {
|
||||||
if (it->first == keyct) {
|
if (it->first == cstr_keyct) {
|
||||||
doc.text = it->second;
|
doc.text = it->second;
|
||||||
} else if (it->first == keymd) {
|
} else if (it->first == cstr_keymd) {
|
||||||
doc.dmtime = it->second;
|
doc.dmtime = it->second;
|
||||||
} else if (it->first == Rcl::Doc::keyoc) {
|
} else if (it->first == Rcl::Doc::keyoc) {
|
||||||
doc.origcharset = it->second;
|
doc.origcharset = it->second;
|
||||||
} else if (it->first == keymt || it->first == keycs) {
|
} else if (it->first == cstr_keymt || it->first == cstr_keycs) {
|
||||||
// don't need/want these.
|
// don't need/want these.
|
||||||
} else {
|
} else {
|
||||||
doc.meta[it->first] = it->second;
|
doc.meta[it->first] = it->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[keyds].empty()) {
|
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) {
|
||||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
|
doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds];
|
||||||
doc.meta.erase(keyds);
|
doc.meta.erase(cstr_keyds);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -635,21 +636,21 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc) const
|
|||||||
if (!ipathel.empty()) {
|
if (!ipathel.empty()) {
|
||||||
// We have a non-empty ipath
|
// We have a non-empty ipath
|
||||||
hasipath = true;
|
hasipath = true;
|
||||||
getKeyValue(docdata, keymt, doc.mimetype);
|
getKeyValue(docdata, cstr_keymt, doc.mimetype);
|
||||||
getKeyValue(docdata, keyfn, doc.utf8fn);
|
getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
|
||||||
}
|
}
|
||||||
doc.ipath += colon_hide(ipathel) + isep;
|
doc.ipath += colon_hide(ipathel) + cstr_isep;
|
||||||
} else {
|
} else {
|
||||||
doc.ipath += isep;
|
doc.ipath += cstr_isep;
|
||||||
}
|
}
|
||||||
getKeyValue(docdata, keyau, doc.meta[Rcl::Doc::keyau]);
|
getKeyValue(docdata, cstr_keyau, doc.meta[Rcl::Doc::keyau]);
|
||||||
getKeyValue(docdata, keymd, doc.dmtime);
|
getKeyValue(docdata, cstr_keymd, doc.dmtime);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trim empty tail elements in ipath.
|
// Trim empty tail elements in ipath.
|
||||||
if (hasipath) {
|
if (hasipath) {
|
||||||
LOGDEB2(("IPATH [%s]\n", doc.ipath.c_str()));
|
LOGDEB2(("IPATH [%s]\n", doc.ipath.c_str()));
|
||||||
string::size_type sit = doc.ipath.find_last_not_of(isep);
|
string::size_type sit = doc.ipath.find_last_not_of(cstr_isep);
|
||||||
if (sit == string::npos)
|
if (sit == string::npos)
|
||||||
doc.ipath.erase();
|
doc.ipath.erase();
|
||||||
else if (sit < doc.ipath.length() -1)
|
else if (sit < doc.ipath.length() -1)
|
||||||
@ -681,8 +682,8 @@ int FileInterner::addHandler()
|
|||||||
{
|
{
|
||||||
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
||||||
string charset, mimetype;
|
string charset, mimetype;
|
||||||
getKeyValue(docdata, keycs, charset);
|
getKeyValue(docdata, cstr_keycs, charset);
|
||||||
getKeyValue(docdata, keymt, mimetype);
|
getKeyValue(docdata, cstr_keymt, mimetype);
|
||||||
|
|
||||||
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
||||||
|
|
||||||
@ -690,7 +691,7 @@ int FileInterner::addHandler()
|
|||||||
// general), we're done decoding. If we hit text/plain, we're done
|
// general), we're done decoding. If we hit text/plain, we're done
|
||||||
// in any case
|
// in any case
|
||||||
if (!stringicmp(mimetype, m_targetMType) ||
|
if (!stringicmp(mimetype, m_targetMType) ||
|
||||||
!stringicmp(mimetype, stxtplain)) {
|
!stringicmp(mimetype, cstr_stxtplain)) {
|
||||||
m_reachedMType = mimetype;
|
m_reachedMType = mimetype;
|
||||||
LOGDEB1(("FileInterner::addHandler: target reached\n"));
|
LOGDEB1(("FileInterner::addHandler: target reached\n"));
|
||||||
return ADD_BREAK;
|
return ADD_BREAK;
|
||||||
@ -723,7 +724,7 @@ int FileInterner::addHandler()
|
|||||||
const string *txt = &ns;
|
const string *txt = &ns;
|
||||||
{
|
{
|
||||||
map<string,string>::const_iterator it;
|
map<string,string>::const_iterator it;
|
||||||
it = docdata.find(keyct);
|
it = docdata.find(cstr_keyct);
|
||||||
if (it != docdata.end())
|
if (it != docdata.end())
|
||||||
txt = &it->second;
|
txt = &it->second;
|
||||||
}
|
}
|
||||||
@ -795,7 +796,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath
|
|||||||
int vipathidx = 0;
|
int vipathidx = 0;
|
||||||
if (!ipath.empty()) {
|
if (!ipath.empty()) {
|
||||||
vector<string> lipath;
|
vector<string> lipath;
|
||||||
stringToTokens(ipath, lipath, isep, true);
|
stringToTokens(ipath, lipath, cstr_isep, true);
|
||||||
for (vector<string>::iterator it = lipath.begin();
|
for (vector<string>::iterator it = lipath.begin();
|
||||||
it != lipath.end(); it++) {
|
it != lipath.end(); it++) {
|
||||||
*it = colon_restore(*it);
|
*it = colon_restore(*it);
|
||||||
|
|||||||
@ -53,7 +53,7 @@ public:
|
|||||||
private: FILE **m_fpp;
|
private: FILE **m_fpp;
|
||||||
};
|
};
|
||||||
|
|
||||||
static PTMutexInit o_mutex;
|
static PTMutexInit o_mcache_mutex;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles a cache for message numbers to offset translations. Permits direct
|
* Handles a cache for message numbers to offset translations. Permits direct
|
||||||
@ -86,7 +86,7 @@ public:
|
|||||||
LOGDEB0(("MboxCache::get_offsets: init failed\n"));
|
LOGDEB0(("MboxCache::get_offsets: init failed\n"));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
PTMutexLocker locker(o_mutex);
|
PTMutexLocker locker(o_mcache_mutex);
|
||||||
string fn = makefilename(udi);
|
string fn = makefilename(udi);
|
||||||
FILE *fp = 0;
|
FILE *fp = 0;
|
||||||
if ((fp = fopen(fn.c_str(), "r")) == 0) {
|
if ((fp = fopen(fn.c_str(), "r")) == 0) {
|
||||||
@ -133,7 +133,7 @@ public:
|
|||||||
return;
|
return;
|
||||||
if (fsize < m_minfsize)
|
if (fsize < m_minfsize)
|
||||||
return;
|
return;
|
||||||
PTMutexLocker locker(o_mutex);
|
PTMutexLocker locker(o_mcache_mutex);
|
||||||
string fn = makefilename(udi);
|
string fn = makefilename(udi);
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
if ((fp = fopen(fn.c_str(), "w")) == 0) {
|
if ((fp = fopen(fn.c_str(), "w")) == 0) {
|
||||||
@ -163,7 +163,7 @@ public:
|
|||||||
|
|
||||||
// Check state, possibly initialize
|
// Check state, possibly initialize
|
||||||
bool ok(RclConfig *config) {
|
bool ok(RclConfig *config) {
|
||||||
PTMutexLocker locker(o_mutex);
|
PTMutexLocker locker(o_mcache_mutex);
|
||||||
if (m_minfsize == -1)
|
if (m_minfsize == -1)
|
||||||
return false;
|
return false;
|
||||||
if (!m_ok) {
|
if (!m_ok) {
|
||||||
@ -224,9 +224,9 @@ private:
|
|||||||
|
|
||||||
const size_t MboxCache::o_b1size = 1024;
|
const size_t MboxCache::o_b1size = 1024;
|
||||||
|
|
||||||
static class MboxCache mcache;
|
static class MboxCache o_mcache;
|
||||||
|
|
||||||
static const string keyquirks("mhmboxquirks");
|
static const string cstr_keyquirks("mhmboxquirks");
|
||||||
|
|
||||||
MimeHandlerMbox::~MimeHandlerMbox()
|
MimeHandlerMbox::~MimeHandlerMbox()
|
||||||
{
|
{
|
||||||
@ -271,7 +271,7 @@ bool MimeHandlerMbox::set_document_file(const string &fn)
|
|||||||
|
|
||||||
// Check for location-based quirks:
|
// Check for location-based quirks:
|
||||||
string quirks;
|
string quirks;
|
||||||
if (m_config && m_config->getConfParam(keyquirks, quirks)) {
|
if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) {
|
||||||
if (quirks == "tbird") {
|
if (quirks == "tbird") {
|
||||||
LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n"));
|
LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n"));
|
||||||
m_quirks |= MBOXQUIRK_TBIRD;
|
m_quirks |= MBOXQUIRK_TBIRD;
|
||||||
@ -358,6 +358,20 @@ static const char *miniTbirdFrom = "^From $";
|
|||||||
static regex_t fromregex;
|
static regex_t fromregex;
|
||||||
static regex_t minifromregex;
|
static regex_t minifromregex;
|
||||||
static bool regcompiled;
|
static bool regcompiled;
|
||||||
|
static PTMutexInit o_regex_mutex;
|
||||||
|
|
||||||
|
static void compileregexes()
|
||||||
|
{
|
||||||
|
PTMutexLocker locker(o_regex_mutex);
|
||||||
|
// As the initial test of regcompiled is unprotected the value may
|
||||||
|
// have changed while we were waiting for the lock. Test again now
|
||||||
|
// that we are alone.
|
||||||
|
if (regcompiled)
|
||||||
|
return;
|
||||||
|
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
||||||
|
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
||||||
|
regcompiled = true;
|
||||||
|
}
|
||||||
|
|
||||||
bool MimeHandlerMbox::next_document()
|
bool MimeHandlerMbox::next_document()
|
||||||
{
|
{
|
||||||
@ -383,9 +397,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
mtarg = -1;
|
mtarg = -1;
|
||||||
|
|
||||||
if (!regcompiled) {
|
if (!regcompiled) {
|
||||||
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
compileregexes();
|
||||||
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
|
||||||
regcompiled = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are called to retrieve a specific message, seek to bof
|
// If we are called to retrieve a specific message, seek to bof
|
||||||
@ -403,7 +415,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
LOGDEB0(("MimeHandlerMbox::next_doc: mtarg %d m_udi[%s]\n",
|
LOGDEB0(("MimeHandlerMbox::next_doc: mtarg %d m_udi[%s]\n",
|
||||||
mtarg, m_udi.c_str()));
|
mtarg, m_udi.c_str()));
|
||||||
if (!m_udi.empty() &&
|
if (!m_udi.empty() &&
|
||||||
(off = mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||||
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
||||||
fgets(line, LL, fp) &&
|
fgets(line, LL, fp) &&
|
||||||
(!regexec(&fromregex, line, 0, 0, 0) ||
|
(!regexec(&fromregex, line, 0, 0, 0) ||
|
||||||
@ -492,7 +504,7 @@ bool MimeHandlerMbox::next_document()
|
|||||||
LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
|
LOGDEB2(("MimeHandlerMbox::next: eof hit\n"));
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
if (!m_udi.empty() && storeoffsets) {
|
if (!m_udi.empty() && storeoffsets) {
|
||||||
mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
|
o_mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return msgtxt.empty() ? false : true;
|
return msgtxt.empty() ? false : true;
|
||||||
|
|||||||
@ -34,12 +34,14 @@ using std::endl;
|
|||||||
#include "plaintorich.h"
|
#include "plaintorich.h"
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
|
||||||
// Default highlighter
|
// Default highlighter. No need for locking, this is query-only.
|
||||||
|
static const string cstr_hlfontcolor("<font color=\"blue\">");
|
||||||
|
static const string cstr_hlendfont("</font>");
|
||||||
class PlainToRichHtReslist : public PlainToRich {
|
class PlainToRichHtReslist : public PlainToRich {
|
||||||
public:
|
public:
|
||||||
virtual ~PlainToRichHtReslist() {}
|
virtual ~PlainToRichHtReslist() {}
|
||||||
virtual string startMatch() {return string("<font color=\"blue\">");}
|
virtual string startMatch() {return cstr_hlfontcolor;}
|
||||||
virtual string endMatch() {return string("</font>");}
|
virtual string endMatch() {return cstr_hlendfont;}
|
||||||
};
|
};
|
||||||
static PlainToRichHtReslist g_hiliter;
|
static PlainToRichHtReslist g_hiliter;
|
||||||
|
|
||||||
@ -406,10 +408,10 @@ string ResListPager::detailsLink()
|
|||||||
|
|
||||||
const string &ResListPager::parFormat()
|
const string &ResListPager::parFormat()
|
||||||
{
|
{
|
||||||
static const string format("<img src=\"%I\" align=\"left\">"
|
static const string cstr_format("<img src=\"%I\" align=\"left\">"
|
||||||
"%R %S %L <b>%T</b><br>"
|
"%R %S %L <b>%T</b><br>"
|
||||||
"%M %D <i>%U</i><br>"
|
"%M %D <i>%U</i><br>"
|
||||||
"%A %K");
|
"%A %K");
|
||||||
return format;
|
return cstr_format;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -51,6 +51,7 @@ using namespace std;
|
|||||||
#include "md5.h"
|
#include "md5.h"
|
||||||
#include "rclversion.h"
|
#include "rclversion.h"
|
||||||
#include "cancelcheck.h"
|
#include "cancelcheck.h"
|
||||||
|
#include "ptmutex.h"
|
||||||
|
|
||||||
#ifndef MAX
|
#ifndef MAX
|
||||||
#define MAX(A,B) (A>B?A:B)
|
#define MAX(A,B) (A>B?A:B)
|
||||||
@ -61,8 +62,8 @@ using namespace std;
|
|||||||
|
|
||||||
// Recoll index format version is stored in user metadata. When this change,
|
// Recoll index format version is stored in user metadata. When this change,
|
||||||
// we can't open the db and will have to reindex.
|
// we can't open the db and will have to reindex.
|
||||||
static const string RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
|
static const string cstr_RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
|
||||||
static const string RCL_IDX_VERSION("1");
|
static const string cstr_RCL_IDX_VERSION("1");
|
||||||
|
|
||||||
// This is the word position offset at which we index the body text
|
// This is the word position offset at which we index the body text
|
||||||
// (abstract, keywords, etc.. are stored before this)
|
// (abstract, keywords, etc.. are stored before this)
|
||||||
@ -79,7 +80,7 @@ const string end_of_field_term = "XXND";
|
|||||||
// This is used as a marker inside the abstract frag lists, but
|
// This is used as a marker inside the abstract frag lists, but
|
||||||
// normally doesn't remain in final output (which is built with a
|
// normally doesn't remain in final output (which is built with a
|
||||||
// custom sep. by our caller).
|
// custom sep. by our caller).
|
||||||
static const string ellipsis("...");
|
static const string cstr_ellipsis("...");
|
||||||
|
|
||||||
string version_string(){
|
string version_string(){
|
||||||
return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
|
return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
|
||||||
@ -88,12 +89,12 @@ string version_string(){
|
|||||||
|
|
||||||
// Synthetic abstract marker (to discriminate from abstract actually
|
// Synthetic abstract marker (to discriminate from abstract actually
|
||||||
// found in document)
|
// found in document)
|
||||||
static const string rclSyntAbs("?!#@");
|
static const string cstr_syntAbs("?!#@");
|
||||||
|
|
||||||
// Only ONE field name inside the index data record differs from the
|
// Only ONE field name inside the index data record differs from the
|
||||||
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
|
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
|
||||||
// omega
|
// omega
|
||||||
static const string keycap("caption");
|
static const string cstr_keycap("caption");
|
||||||
|
|
||||||
// Static/Default table for field->prefix/weight translation.
|
// Static/Default table for field->prefix/weight translation.
|
||||||
// This is logically const after initialization. Can't use a
|
// This is logically const after initialization. Can't use a
|
||||||
@ -106,8 +107,16 @@ static const string keycap("caption");
|
|||||||
// suppressed.
|
// suppressed.
|
||||||
|
|
||||||
static map<string, FieldTraits> fldToTraits;
|
static map<string, FieldTraits> fldToTraits;
|
||||||
|
static PTMutexInit o_fldToTraits_mutex;
|
||||||
|
|
||||||
static void initFldToTraits()
|
static void initFldToTraits()
|
||||||
{
|
{
|
||||||
|
PTMutexLocker locker(o_fldToTraits_mutex);
|
||||||
|
// As we perform non-locked testing of initialization, check again with
|
||||||
|
// the lock held
|
||||||
|
if (fldToTraits.size())
|
||||||
|
return;
|
||||||
|
|
||||||
// Can't remember why "abstract" is indexed without a prefix
|
// Can't remember why "abstract" is indexed without a prefix
|
||||||
// (result: it's indexed twice actually). Maybe I'll dare change
|
// (result: it's indexed twice actually). Maybe I'll dare change
|
||||||
// this one day
|
// this one day
|
||||||
@ -116,7 +125,7 @@ static void initFldToTraits()
|
|||||||
fldToTraits["ext"] = FieldTraits("XE");
|
fldToTraits["ext"] = FieldTraits("XE");
|
||||||
fldToTraits[Doc::keyfn] = FieldTraits("XSFN");
|
fldToTraits[Doc::keyfn] = FieldTraits("XSFN");
|
||||||
|
|
||||||
fldToTraits[keycap] = FieldTraits("S");
|
fldToTraits[cstr_keycap] = FieldTraits("S");
|
||||||
fldToTraits[Doc::keytt] = FieldTraits("S");
|
fldToTraits[Doc::keytt] = FieldTraits("S");
|
||||||
fldToTraits["subject"] = FieldTraits("S");
|
fldToTraits["subject"] = FieldTraits("S");
|
||||||
|
|
||||||
@ -189,14 +198,14 @@ bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
|||||||
parms.get(Doc::keyfmt, doc.fmtime);
|
parms.get(Doc::keyfmt, doc.fmtime);
|
||||||
parms.get(Doc::keydmt, doc.dmtime);
|
parms.get(Doc::keydmt, doc.dmtime);
|
||||||
parms.get(Doc::keyoc, doc.origcharset);
|
parms.get(Doc::keyoc, doc.origcharset);
|
||||||
parms.get(keycap, doc.meta[Doc::keytt]);
|
parms.get(cstr_keycap, doc.meta[Doc::keytt]);
|
||||||
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
||||||
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||||
// Possibly remove synthetic abstract indicator (if it's there, we
|
// Possibly remove synthetic abstract indicator (if it's there, we
|
||||||
// used to index the beginning of the text as abstract).
|
// used to index the beginning of the text as abstract).
|
||||||
doc.syntabs = false;
|
doc.syntabs = false;
|
||||||
if (doc.meta[Doc::keyabs].find(rclSyntAbs) == 0) {
|
if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
|
||||||
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(rclSyntAbs.length());
|
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
|
||||||
doc.syntabs = true;
|
doc.syntabs = true;
|
||||||
}
|
}
|
||||||
parms.get(Doc::keyipt, doc.ipath);
|
parms.get(Doc::keyipt, doc.ipath);
|
||||||
@ -417,7 +426,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
} else if (ii > (unsigned int)ipos &&
|
} else if (ii > (unsigned int)ipos &&
|
||||||
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
||||||
sparseDoc[ii] = occupiedmarker;
|
sparseDoc[ii] = occupiedmarker;
|
||||||
} else if (!sparseDoc[ii].compare(ellipsis)) {
|
} else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
|
||||||
// For an empty slot, the test has a side
|
// For an empty slot, the test has a side
|
||||||
// effect of inserting an empty string which
|
// effect of inserting an empty string which
|
||||||
// is what we want
|
// is what we want
|
||||||
@ -429,7 +438,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
// empty string here, we really want an empty slot,
|
// empty string here, we really want an empty slot,
|
||||||
// use find()
|
// use find()
|
||||||
if (sparseDoc.find(sto+1) == sparseDoc.end()) {
|
if (sparseDoc.find(sto+1) == sparseDoc.end()) {
|
||||||
sparseDoc[sto+1] = ellipsis;
|
sparseDoc[sto+1] = cstr_ellipsis;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Limit to allocated occurences and total size
|
// Limit to allocated occurences and total size
|
||||||
@ -531,7 +540,7 @@ vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
|
|||||||
if (!incjk || (incjk && !newcjk))
|
if (!incjk || (incjk && !newcjk))
|
||||||
chunk += " ";
|
chunk += " ";
|
||||||
incjk = newcjk;
|
incjk = newcjk;
|
||||||
if (it->second == ellipsis) {
|
if (it->second == cstr_ellipsis) {
|
||||||
vabs.push_back(chunk);
|
vabs.push_back(chunk);
|
||||||
chunk.clear();
|
chunk.clear();
|
||||||
} else {
|
} else {
|
||||||
@ -612,8 +621,8 @@ bool Db::open(OpenMode mode, OpenError *error)
|
|||||||
// If db is empty, write the data format version at once
|
// If db is empty, write the data format version at once
|
||||||
// to avoid stupid error messages:
|
// to avoid stupid error messages:
|
||||||
if (m_ndb->xwdb.get_doccount() == 0)
|
if (m_ndb->xwdb.get_doccount() == 0)
|
||||||
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY,
|
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
|
||||||
RCL_IDX_VERSION);
|
cstr_RCL_IDX_VERSION);
|
||||||
m_ndb->m_iswritable = true;
|
m_ndb->m_iswritable = true;
|
||||||
// We open a readonly object in all cases (possibly in
|
// We open a readonly object in all cases (possibly in
|
||||||
// addition to the r/w one) because some operations
|
// addition to the r/w one) because some operations
|
||||||
@ -650,11 +659,11 @@ bool Db::open(OpenMode mode, OpenError *error)
|
|||||||
// Check index format version. Must not try to check a just created or
|
// Check index format version. Must not try to check a just created or
|
||||||
// truncated db
|
// truncated db
|
||||||
if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) {
|
if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) {
|
||||||
string version = m_ndb->xdb().get_metadata(RCL_IDX_VERSION_KEY);
|
string version = m_ndb->xdb().get_metadata(cstr_RCL_IDX_VERSION_KEY);
|
||||||
if (version.compare(RCL_IDX_VERSION)) {
|
if (version.compare(cstr_RCL_IDX_VERSION)) {
|
||||||
m_ndb->m_noversionwrite = true;
|
m_ndb->m_noversionwrite = true;
|
||||||
LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n",
|
LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n",
|
||||||
version.c_str(), RCL_IDX_VERSION.c_str()));
|
version.c_str(), cstr_RCL_IDX_VERSION.c_str()));
|
||||||
throw Xapian::DatabaseError("Recoll index version mismatch",
|
throw Xapian::DatabaseError("Recoll index version mismatch",
|
||||||
"", "");
|
"", "");
|
||||||
}
|
}
|
||||||
@ -693,7 +702,7 @@ bool Db::i_close(bool final)
|
|||||||
bool w = m_ndb->m_iswritable;
|
bool w = m_ndb->m_iswritable;
|
||||||
if (w) {
|
if (w) {
|
||||||
if (!m_ndb->m_noversionwrite)
|
if (!m_ndb->m_noversionwrite)
|
||||||
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY, RCL_IDX_VERSION);
|
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION);
|
||||||
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
||||||
}
|
}
|
||||||
// Used to do a flush here. Cant see why it should be necessary.
|
// Used to do a flush here. Cant see why it should be necessary.
|
||||||
@ -952,7 +961,7 @@ void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const int MB = 1024 * 1024;
|
static const int MB = 1024 * 1024;
|
||||||
static const string nc("\n\r\x0c");
|
static const string cstr_nc("\n\r\x0c");
|
||||||
|
|
||||||
#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
|
#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
|
||||||
|
|
||||||
@ -1168,13 +1177,13 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
|||||||
if (doc.meta[Doc::keytt].empty())
|
if (doc.meta[Doc::keytt].empty())
|
||||||
doc.meta[Doc::keytt] = doc.utf8fn;
|
doc.meta[Doc::keytt] = doc.utf8fn;
|
||||||
doc.meta[Doc::keytt] =
|
doc.meta[Doc::keytt] =
|
||||||
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), nc);
|
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc);
|
||||||
if (!doc.meta[Doc::keytt].empty())
|
if (!doc.meta[Doc::keytt].empty())
|
||||||
RECORD_APPEND(record, keycap, doc.meta[Doc::keytt]);
|
RECORD_APPEND(record, cstr_keycap, doc.meta[Doc::keytt]);
|
||||||
|
|
||||||
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
||||||
doc.meta[Doc::keykw] =
|
doc.meta[Doc::keykw] =
|
||||||
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), nc);
|
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
|
||||||
if (!doc.meta[Doc::keykw].empty())
|
if (!doc.meta[Doc::keykw].empty())
|
||||||
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
|
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
|
||||||
|
|
||||||
@ -1189,12 +1198,12 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
|||||||
if (doc.meta[Doc::keyabs].empty()) {
|
if (doc.meta[Doc::keyabs].empty()) {
|
||||||
syntabs = true;
|
syntabs = true;
|
||||||
if (!doc.text.empty())
|
if (!doc.text.empty())
|
||||||
doc.meta[Doc::keyabs] = rclSyntAbs +
|
doc.meta[Doc::keyabs] = cstr_syntAbs +
|
||||||
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), nc);
|
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), cstr_nc);
|
||||||
} else {
|
} else {
|
||||||
doc.meta[Doc::keyabs] =
|
doc.meta[Doc::keyabs] =
|
||||||
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
||||||
nc);
|
cstr_nc);
|
||||||
}
|
}
|
||||||
if (!doc.meta[Doc::keyabs].empty())
|
if (!doc.meta[Doc::keyabs].empty())
|
||||||
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
||||||
@ -1205,7 +1214,7 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
|
|||||||
string nm = m_config->fieldCanon(*it);
|
string nm = m_config->fieldCanon(*it);
|
||||||
if (!doc.meta[*it].empty()) {
|
if (!doc.meta[*it].empty()) {
|
||||||
string value =
|
string value =
|
||||||
neutchars(truncate_to_word(doc.meta[*it], 150), nc);
|
neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);
|
||||||
RECORD_APPEND(record, nm, value);
|
RECORD_APPEND(record, nm, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1611,8 +1620,8 @@ static void addPrefix(list<TermMatchEntry>& terms, const string& prefix)
|
|||||||
// Characters that can begin a wildcard or regexp expression. We use skipto
|
// Characters that can begin a wildcard or regexp expression. We use skipto
|
||||||
// to begin the allterms search with terms that begin with the portion of
|
// to begin the allterms search with terms that begin with the portion of
|
||||||
// the input string prior to these chars.
|
// the input string prior to these chars.
|
||||||
const string wildSpecChars = "*?[";
|
const string cstr_wildSpecChars = "*?[";
|
||||||
const string regSpecChars = "(.[{";
|
const string cstr_regSpecChars = "(.[{";
|
||||||
|
|
||||||
// Find all index terms that match a wildcard or regular expression
|
// Find all index terms that match a wildcard or regular expression
|
||||||
bool Db::termMatch(MatchType typ, const string &lang,
|
bool Db::termMatch(MatchType typ, const string &lang,
|
||||||
@ -1639,7 +1648,7 @@ bool Db::termMatch(MatchType typ, const string &lang,
|
|||||||
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
|
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;
|
string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
|
||||||
|
|
||||||
string prefix;
|
string prefix;
|
||||||
if (!field.empty()) {
|
if (!field.empty()) {
|
||||||
@ -1852,7 +1861,7 @@ bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
|
|||||||
for (vector<string>::const_iterator it = vab.begin();
|
for (vector<string>::const_iterator it = vab.begin();
|
||||||
it != vab.end(); it++) {
|
it != vab.end(); it++) {
|
||||||
abstract.append(*it);
|
abstract.append(*it);
|
||||||
abstract.append(ellipsis);
|
abstract.append(cstr_ellipsis);
|
||||||
}
|
}
|
||||||
return m_reason.empty() ? true : false;
|
return m_reason.empty() ? true : false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -39,16 +39,17 @@
|
|||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static const string cstr_keycap("caption");
|
||||||
|
static const string cstr_keydmtime("dmtime");
|
||||||
|
|
||||||
// Field names inside the index data record may differ from the rcldoc ones
|
// Field names inside the index data record may differ from the rcldoc ones
|
||||||
// (esp.: caption / title)
|
// (esp.: caption / title)
|
||||||
static const string& docfToDatf(const string& df)
|
static const string& docfToDatf(const string& df)
|
||||||
{
|
{
|
||||||
static const string keycap("caption");
|
|
||||||
static const string keydmtime("dmtime");
|
|
||||||
if (!df.compare(Doc::keytt)) {
|
if (!df.compare(Doc::keytt)) {
|
||||||
return keycap;
|
return cstr_keycap;
|
||||||
} else if (!df.compare(Doc::keymt)) {
|
} else if (!df.compare(Doc::keymt)) {
|
||||||
return keydmtime;
|
return cstr_keydmtime;
|
||||||
} else {
|
} else {
|
||||||
return df;
|
return df;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -860,13 +860,13 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const string nullstemlang;
|
static const string cstr_null;
|
||||||
|
|
||||||
// Translate a simple OR, AND, or EXCL search clause.
|
// Translate a simple OR, AND, or EXCL search clause.
|
||||||
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||||
const string& stemlang)
|
const string& stemlang)
|
||||||
{
|
{
|
||||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? nullstemlang:
|
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
||||||
stemlang;
|
stemlang;
|
||||||
|
|
||||||
m_terms.clear();
|
m_terms.clear();
|
||||||
@ -945,7 +945,7 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||||
const string& stemlang)
|
const string& stemlang)
|
||||||
{
|
{
|
||||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? nullstemlang:
|
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
||||||
stemlang;
|
stemlang;
|
||||||
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
||||||
m_terms.clear();
|
m_terms.clear();
|
||||||
|
|||||||
@ -38,21 +38,21 @@ namespace Rcl {
|
|||||||
namespace StemDb {
|
namespace StemDb {
|
||||||
|
|
||||||
|
|
||||||
static const string stemdirstem = "stem_";
|
static const string cstr_stemdirstem = "stem_";
|
||||||
|
|
||||||
/// Compute name of stem db for given base database and language
|
/// Compute name of stem db for given base database and language
|
||||||
static string stemdbname(const string& dbdir, const string& lang)
|
static string stemdbname(const string& dbdir, const string& lang)
|
||||||
{
|
{
|
||||||
return path_cat(dbdir, stemdirstem + lang);
|
return path_cat(dbdir, cstr_stemdirstem + lang);
|
||||||
}
|
}
|
||||||
|
|
||||||
list<string> getLangs(const string& dbdir)
|
list<string> getLangs(const string& dbdir)
|
||||||
{
|
{
|
||||||
string pattern = stemdirstem + "*";
|
string pattern = cstr_stemdirstem + "*";
|
||||||
list<string> dirs = path_dirglob(dbdir, pattern);
|
list<string> dirs = path_dirglob(dbdir, pattern);
|
||||||
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
||||||
*it = path_basename(*it);
|
*it = path_basename(*it);
|
||||||
*it = it->substr(stemdirstem.length(), string::npos);
|
*it = it->substr(cstr_stemdirstem.length(), string::npos);
|
||||||
}
|
}
|
||||||
return dirs;
|
return dirs;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -36,6 +36,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#endif /* HAVE_VSNPRINTF */
|
#endif /* HAVE_VSNPRINTF */
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
#include "unac.h"
|
#include "unac.h"
|
||||||
#include "unac_version.h"
|
#include "unac_version.h"
|
||||||
@ -10555,8 +10556,6 @@ int unacfold_string_utf16(const char* in, size_t in_length,
|
|||||||
outp, out_lengthp, 1);
|
outp, out_lengthp, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAXOUT 1024
|
|
||||||
|
|
||||||
static int convert(const char* from, const char* to,
|
static int convert(const char* from, const char* to,
|
||||||
const char* in, size_t in_length,
|
const char* in, size_t in_length,
|
||||||
char** outp, size_t* out_lengthp);
|
char** outp, size_t* out_lengthp);
|
||||||
@ -10564,6 +10563,14 @@ static int convert(const char* from, const char* to,
|
|||||||
static const char *utf16be = "UTF-16BE";
|
static const char *utf16be = "UTF-16BE";
|
||||||
static iconv_t u8tou16_cd = (iconv_t)-1;
|
static iconv_t u8tou16_cd = (iconv_t)-1;
|
||||||
static iconv_t u16tou8_cd = (iconv_t)-1;
|
static iconv_t u16tou8_cd = (iconv_t)-1;
|
||||||
|
static pthread_mutex_t o_unac_mutex;
|
||||||
|
static int unac_mutex_is_init;
|
||||||
|
// Call this or take your chances with the auto init.
|
||||||
|
void unac_init_mt()
|
||||||
|
{
|
||||||
|
pthread_mutex_init(&o_unac_mutex, 0);
|
||||||
|
unac_mutex_is_init = 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convert buffer <in> containing string encoded in charset <from> into
|
* Convert buffer <in> containing string encoded in charset <from> into
|
||||||
@ -10576,6 +10583,7 @@ static int convert(const char* from, const char* to,
|
|||||||
const char* in, size_t in_length,
|
const char* in, size_t in_length,
|
||||||
char** outp, size_t* out_lengthp)
|
char** outp, size_t* out_lengthp)
|
||||||
{
|
{
|
||||||
|
int ret = -1;
|
||||||
iconv_t cd;
|
iconv_t cd;
|
||||||
char* out;
|
char* out;
|
||||||
size_t out_remain;
|
size_t out_remain;
|
||||||
@ -10584,6 +10592,15 @@ static int convert(const char* from, const char* to,
|
|||||||
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
||||||
const char space[] = { 0x00, 0x20 };
|
const char space[] = { 0x00, 0x20 };
|
||||||
|
|
||||||
|
/* Note: better call explicit unac_init_mt() before starting threads than
|
||||||
|
rely on this.
|
||||||
|
*/
|
||||||
|
if (unac_mutex_is_init == 0) {
|
||||||
|
pthread_mutex_init(&o_unac_mutex, 0);
|
||||||
|
unac_mutex_is_init = 1;
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&o_unac_mutex);
|
||||||
|
|
||||||
if (!strcmp(utf16be, from)) {
|
if (!strcmp(utf16be, from)) {
|
||||||
from_utf8 = 0;
|
from_utf8 = 0;
|
||||||
from_utf16 = 1;
|
from_utf16 = 1;
|
||||||
@ -10614,7 +10631,7 @@ static int convert(const char* from, const char* to,
|
|||||||
/* *outp still valid, no freeing */
|
/* *outp still valid, no freeing */
|
||||||
if(debug_level >= UNAC_DEBUG_LOW)
|
if(debug_level >= UNAC_DEBUG_LOW)
|
||||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* +1 for null */
|
/* +1 for null */
|
||||||
@ -10622,7 +10639,7 @@ static int convert(const char* from, const char* to,
|
|||||||
if(out == 0) {
|
if(out == 0) {
|
||||||
if(debug_level >= UNAC_DEBUG_LOW)
|
if(debug_level >= UNAC_DEBUG_LOW)
|
||||||
DEBUG("malloc %d bytes failed\n", out_size+1);
|
DEBUG("malloc %d bytes failed\n", out_size+1);
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out_remain = out_size;
|
out_remain = out_size;
|
||||||
@ -10631,7 +10648,7 @@ static int convert(const char* from, const char* to,
|
|||||||
if (u8tou16) {
|
if (u8tou16) {
|
||||||
if (u8tou16_cd == (iconv_t)-1) {
|
if (u8tou16_cd == (iconv_t)-1) {
|
||||||
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
iconv(u8tou16_cd, 0, 0, 0, 0);
|
iconv(u8tou16_cd, 0, 0, 0, 0);
|
||||||
@ -10640,7 +10657,7 @@ static int convert(const char* from, const char* to,
|
|||||||
} else if (u16tou8) {
|
} else if (u16tou8) {
|
||||||
if (u16tou8_cd == (iconv_t)-1) {
|
if (u16tou8_cd == (iconv_t)-1) {
|
||||||
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
iconv(u16tou8_cd, 0, 0, 0, 0);
|
iconv(u16tou8_cd, 0, 0, 0, 0);
|
||||||
@ -10648,7 +10665,7 @@ static int convert(const char* from, const char* to,
|
|||||||
cd = u16tou8_cd;
|
cd = u16tou8_cd;
|
||||||
} else {
|
} else {
|
||||||
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10682,7 +10699,7 @@ static int convert(const char* from, const char* to,
|
|||||||
if(errno == E2BIG)
|
if(errno == E2BIG)
|
||||||
/* fall thru to the E2BIG case below */;
|
/* fall thru to the E2BIG case below */;
|
||||||
else
|
else
|
||||||
return -1;
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
/* The offending character was replaced by a SPACE, skip it. */
|
/* The offending character was replaced by a SPACE, skip it. */
|
||||||
in += 2;
|
in += 2;
|
||||||
@ -10691,7 +10708,7 @@ static int convert(const char* from, const char* to,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
case E2BIG:
|
case E2BIG:
|
||||||
{
|
{
|
||||||
@ -10711,7 +10728,7 @@ static int convert(const char* from, const char* to,
|
|||||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||||
free(saved);
|
free(saved);
|
||||||
*outp = 0;
|
*outp = 0;
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out = out_base + length;
|
out = out_base + length;
|
||||||
@ -10719,7 +10736,7 @@ static int convert(const char* from, const char* to,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return -1;
|
goto out;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -10732,7 +10749,10 @@ static int convert(const char* from, const char* to,
|
|||||||
*out_lengthp = out - out_base;
|
*out_lengthp = out - out_base;
|
||||||
(*outp)[*out_lengthp] = '\0';
|
(*outp)[*out_lengthp] = '\0';
|
||||||
|
|
||||||
return 0;
|
ret = 0;
|
||||||
|
out:
|
||||||
|
pthread_mutex_unlock(&o_unac_mutex);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int unacmaybefold_string(const char* charset,
|
int unacmaybefold_string(const char* charset,
|
||||||
|
|||||||
@ -113,6 +113,9 @@ int unacfold_string(const char* charset,
|
|||||||
const char* in, size_t in_length,
|
const char* in, size_t in_length,
|
||||||
char** out, size_t* out_length);
|
char** out, size_t* out_length);
|
||||||
|
|
||||||
|
/* To be called before starting threads in mt programs */
|
||||||
|
void unac_init_mt();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return unac version number.
|
* Return unac version number.
|
||||||
*/
|
*/
|
||||||
|
|||||||
106
src/utils/ptmutex.cpp
Normal file
106
src/utils/ptmutex.cpp
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
/* Copyright (C) 2004 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//
|
||||||
|
// Small test program to evaluate the cost of using mutex locks: calls
|
||||||
|
// to methods doing a small (150 bytes) base64 encoding job + string
|
||||||
|
// manips, with and without locking. The performance cost is
|
||||||
|
// negligible on all machines I tested (around 0.3% to 2% depending on
|
||||||
|
// the system and machine), but not inexistent, you would not want
|
||||||
|
// this in a tight loop.
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "ptmutex.h"
|
||||||
|
#include "base64.h"
|
||||||
|
|
||||||
|
static char *thisprog;
|
||||||
|
static char usage [] =
|
||||||
|
"ptmutex [-l] count\n"
|
||||||
|
"\n"
|
||||||
|
;
|
||||||
|
static void
|
||||||
|
Usage(void)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int op_flags;
|
||||||
|
#define OPT_MOINS 0x1
|
||||||
|
#define OPT_l 0x2
|
||||||
|
|
||||||
|
static const string convertbuffer =
|
||||||
|
"* The recoll GUI program sometimes crashes when running a query while\
|
||||||
|
the indexing thread is active. Possible workarounds:";
|
||||||
|
|
||||||
|
static PTMutexInit o_lock;
|
||||||
|
void workerlock(string& out)
|
||||||
|
{
|
||||||
|
PTMutexLocker locker(o_lock);
|
||||||
|
base64_encode(convertbuffer, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
void workernolock(string& out)
|
||||||
|
{
|
||||||
|
base64_encode(convertbuffer, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
thisprog = argv[0];
|
||||||
|
argc--; argv++;
|
||||||
|
|
||||||
|
while (argc > 0 && **argv == '-') {
|
||||||
|
(*argv)++;
|
||||||
|
if (!(**argv))
|
||||||
|
/* Cas du "adb - core" */
|
||||||
|
Usage();
|
||||||
|
while (**argv)
|
||||||
|
switch (*(*argv)++) {
|
||||||
|
case 'l': op_flags |= OPT_l; break;
|
||||||
|
default: Usage(); break;
|
||||||
|
}
|
||||||
|
b1: argc--; argv++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc != 1)
|
||||||
|
Usage();
|
||||||
|
count = atoi(*argv++);argc--;
|
||||||
|
|
||||||
|
if (op_flags & OPT_l) {
|
||||||
|
fprintf(stderr, "Looping %d, locking\n", count);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
string s;
|
||||||
|
workerlock(s);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Looping %d, no locking\n", count);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
string s;
|
||||||
|
workernolock(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
@ -405,7 +405,7 @@ list(int fd, const string& path, vector<string>* names, flags flags, nspace dom)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const string nullstring("");
|
static const string cstr_nullstring("");
|
||||||
|
|
||||||
bool get(const string& path, const string& _name, string *value,
|
bool get(const string& path, const string& _name, string *value,
|
||||||
flags flags, nspace dom)
|
flags flags, nspace dom)
|
||||||
@ -414,7 +414,7 @@ bool get(const string& path, const string& _name, string *value,
|
|||||||
}
|
}
|
||||||
bool get(int fd, const string& _name, string *value, flags flags, nspace dom)
|
bool get(int fd, const string& _name, string *value, flags flags, nspace dom)
|
||||||
{
|
{
|
||||||
return get(fd, nullstring, _name, value, flags, dom);
|
return get(fd, cstr_nullstring, _name, value, flags, dom);
|
||||||
}
|
}
|
||||||
bool set(const string& path, const string& _name, const string& value,
|
bool set(const string& path, const string& _name, const string& value,
|
||||||
flags flags, nspace dom)
|
flags flags, nspace dom)
|
||||||
@ -424,7 +424,7 @@ bool set(const string& path, const string& _name, const string& value,
|
|||||||
bool set(int fd, const string& _name, const string& value,
|
bool set(int fd, const string& _name, const string& value,
|
||||||
flags flags, nspace dom)
|
flags flags, nspace dom)
|
||||||
{
|
{
|
||||||
return set(fd, nullstring, _name, value, flags, dom);
|
return set(fd, cstr_nullstring, _name, value, flags, dom);
|
||||||
}
|
}
|
||||||
bool del(const string& path, const string& _name, flags flags, nspace dom)
|
bool del(const string& path, const string& _name, flags flags, nspace dom)
|
||||||
{
|
{
|
||||||
@ -432,7 +432,7 @@ bool del(const string& path, const string& _name, flags flags, nspace dom)
|
|||||||
}
|
}
|
||||||
bool del(int fd, const string& _name, flags flags, nspace dom)
|
bool del(int fd, const string& _name, flags flags, nspace dom)
|
||||||
{
|
{
|
||||||
return del(fd, nullstring, _name, flags, dom);
|
return del(fd, cstr_nullstring, _name, flags, dom);
|
||||||
}
|
}
|
||||||
bool list(const string& path, vector<string>* names, flags flags, nspace dom)
|
bool list(const string& path, vector<string>* names, flags flags, nspace dom)
|
||||||
{
|
{
|
||||||
@ -440,17 +440,17 @@ bool list(const string& path, vector<string>* names, flags flags, nspace dom)
|
|||||||
}
|
}
|
||||||
bool list(int fd, vector<string>* names, flags flags, nspace dom)
|
bool list(int fd, vector<string>* names, flags flags, nspace dom)
|
||||||
{
|
{
|
||||||
return list(fd, nullstring, names, flags, dom);
|
return list(fd, cstr_nullstring, names, flags, dom);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const string userstring("user.");
|
static const string cstr_userstring("user.");
|
||||||
bool sysname(nspace dom, const string& pname, string* sname)
|
bool sysname(nspace dom, const string& pname, string* sname)
|
||||||
{
|
{
|
||||||
if (dom != PXATTR_USER) {
|
if (dom != PXATTR_USER) {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
*sname = userstring + pname;
|
*sname = cstr_userstring + pname;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -460,7 +460,7 @@ bool pxname(nspace dom, const string& sname, string* pname)
|
|||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
*pname = sname.substr(userstring.length());
|
*pname = sname.substr(cstr_userstring.length());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -428,7 +428,7 @@ void neutchars(const string &str, string &out, const string& chars)
|
|||||||
* if reasonably possible. Note: we could also use textsplit, stopping when
|
* if reasonably possible. Note: we could also use textsplit, stopping when
|
||||||
* we have enough, this would be cleanly utf8-aware but would remove
|
* we have enough, this would be cleanly utf8-aware but would remove
|
||||||
* punctuation */
|
* punctuation */
|
||||||
static const string SEPAR = " \t\n\r-:.;,/[]{}";
|
static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}";
|
||||||
string truncate_to_word(const string &input, string::size_type maxlen)
|
string truncate_to_word(const string &input, string::size_type maxlen)
|
||||||
{
|
{
|
||||||
string output;
|
string output;
|
||||||
@ -436,7 +436,7 @@ string truncate_to_word(const string &input, string::size_type maxlen)
|
|||||||
output = input;
|
output = input;
|
||||||
} else {
|
} else {
|
||||||
output = input.substr(0, maxlen);
|
output = input.substr(0, maxlen);
|
||||||
string::size_type space = output.find_last_of(SEPAR);
|
string::size_type space = output.find_last_of(cstr_SEPAR);
|
||||||
// Original version only truncated at space if space was found after
|
// Original version only truncated at space if space was found after
|
||||||
// maxlen/2. But we HAVE to truncate at space, else we'd need to do
|
// maxlen/2. But we HAVE to truncate at space, else we'd need to do
|
||||||
// utf8 stuff to avoid truncating at multibyte char. In any case,
|
// utf8 stuff to avoid truncating at multibyte char. In any case,
|
||||||
@ -676,6 +676,9 @@ static void gettime(int, struct m_timespec *ts)
|
|||||||
}
|
}
|
||||||
///// End system interface
|
///// End system interface
|
||||||
|
|
||||||
|
// Note: this not protected against multithread access and not reentrant, but
|
||||||
|
// this is mostly debug code, and it won't crash, just show bad results. Also
|
||||||
|
// the frozen thing is not used that much
|
||||||
static m_timespec frozen_tv;
|
static m_timespec frozen_tv;
|
||||||
void Chrono::refnow()
|
void Chrono::refnow()
|
||||||
{
|
{
|
||||||
|
|||||||
@ -29,15 +29,21 @@ using std::string;
|
|||||||
|
|
||||||
#include "transcode.h"
|
#include "transcode.h"
|
||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
|
#include "ptmutex.h"
|
||||||
#ifdef RCL_ICONV_INBUF_CONST
|
#ifdef RCL_ICONV_INBUF_CONST
|
||||||
#define ICV_P2_TYPE const char**
|
#define ICV_P2_TYPE const char**
|
||||||
#else
|
#else
|
||||||
#define ICV_P2_TYPE char**
|
#define ICV_P2_TYPE char**
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// We gain approximately 28% exec time for word at a time conversions by
|
// We gain approximately 25% exec time for word at a time conversions by
|
||||||
// caching the iconv_open thing.
|
// caching the iconv_open thing.
|
||||||
|
//
|
||||||
|
// We may also lose some concurrency on multiproc because of the
|
||||||
|
// necessary locking, but we only have one processing-intensive
|
||||||
|
// possible thread for now (the indexing one), so this is probably not
|
||||||
|
// an issue (and could be worked around with a slightly more
|
||||||
|
// sohisticated approach).
|
||||||
#define ICONV_CACHE_OPEN
|
#define ICONV_CACHE_OPEN
|
||||||
|
|
||||||
bool transcode(const string &in, string &out, const string &icode,
|
bool transcode(const string &in, string &out, const string &icode,
|
||||||
@ -48,6 +54,8 @@ bool transcode(const string &in, string &out, const string &icode,
|
|||||||
static iconv_t ic = (iconv_t)-1;
|
static iconv_t ic = (iconv_t)-1;
|
||||||
static string cachedicode;
|
static string cachedicode;
|
||||||
static string cachedocode;
|
static string cachedocode;
|
||||||
|
static PTMutexInit o_cachediconv_mutex;
|
||||||
|
PTMutexLocker locker(o_cachediconv_mutex);
|
||||||
#else
|
#else
|
||||||
iconv_t ic;
|
iconv_t ic;
|
||||||
#endif
|
#endif
|
||||||
@ -163,13 +171,14 @@ using namespace std;
|
|||||||
|
|
||||||
// Repeatedly transcode a small string for timing measurements
|
// Repeatedly transcode a small string for timing measurements
|
||||||
static const string testword("\xc3\xa9\x6c\x69\x6d\x69\x6e\xc3\xa9\xc3\xa0");
|
static const string testword("\xc3\xa9\x6c\x69\x6d\x69\x6e\xc3\xa9\xc3\xa0");
|
||||||
// Without cache 10e6 reps on macpro -> 1.88 S
|
// Without cache 10e6 reps on y -> 6.68
|
||||||
// With cache -> 1.56
|
// With cache -> 4.73
|
||||||
|
// With cache and lock -> 4.9
|
||||||
void looptest()
|
void looptest()
|
||||||
{
|
{
|
||||||
cout << testword << endl;
|
cout << testword << endl;
|
||||||
string out;
|
string out;
|
||||||
for (int i = 0; i < 1000*1000; i++) {
|
for (int i = 0; i < 10*1000*1000; i++) {
|
||||||
if (!transcode(testword, out, "UTF-8", "UTF-16BE")) {
|
if (!transcode(testword, out, "UTF-8", "UTF-16BE")) {
|
||||||
cerr << "Transcode failed" << endl;
|
cerr << "Transcode failed" << endl;
|
||||||
break;
|
break;
|
||||||
@ -184,7 +193,7 @@ int main(int argc, char **argv)
|
|||||||
exit(0);
|
exit(0);
|
||||||
#endif
|
#endif
|
||||||
if (argc != 5) {
|
if (argc != 5) {
|
||||||
cerr << "Usage: trcsguess ifilename icode ofilename ocode" << endl;
|
cerr << "Usage: transcode ifilename icode ofilename ocode" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
const string ifilename = argv[1];
|
const string ifilename = argv[1];
|
||||||
|
|||||||
44
unac/unac.c
44
unac/unac.c
@ -36,6 +36,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#endif /* HAVE_VSNPRINTF */
|
#endif /* HAVE_VSNPRINTF */
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
#include "unac.h"
|
#include "unac.h"
|
||||||
#include "unac_version.h"
|
#include "unac_version.h"
|
||||||
@ -10555,8 +10556,6 @@ int unacfold_string_utf16(const char* in, size_t in_length,
|
|||||||
outp, out_lengthp, 1);
|
outp, out_lengthp, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAXOUT 1024
|
|
||||||
|
|
||||||
static int convert(const char* from, const char* to,
|
static int convert(const char* from, const char* to,
|
||||||
const char* in, size_t in_length,
|
const char* in, size_t in_length,
|
||||||
char** outp, size_t* out_lengthp);
|
char** outp, size_t* out_lengthp);
|
||||||
@ -10564,6 +10563,14 @@ static int convert(const char* from, const char* to,
|
|||||||
static const char *utf16be = "UTF-16BE";
|
static const char *utf16be = "UTF-16BE";
|
||||||
static iconv_t u8tou16_cd = (iconv_t)-1;
|
static iconv_t u8tou16_cd = (iconv_t)-1;
|
||||||
static iconv_t u16tou8_cd = (iconv_t)-1;
|
static iconv_t u16tou8_cd = (iconv_t)-1;
|
||||||
|
static pthread_mutex_t o_unac_mutex;
|
||||||
|
static int unac_mutex_is_init;
|
||||||
|
// Call this or take your chances with the auto init.
|
||||||
|
void unac_init_mt()
|
||||||
|
{
|
||||||
|
pthread_mutex_init(&o_unac_mutex, 0);
|
||||||
|
unac_mutex_is_init = 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Convert buffer <in> containing string encoded in charset <from> into
|
* Convert buffer <in> containing string encoded in charset <from> into
|
||||||
@ -10576,6 +10583,7 @@ static int convert(const char* from, const char* to,
|
|||||||
const char* in, size_t in_length,
|
const char* in, size_t in_length,
|
||||||
char** outp, size_t* out_lengthp)
|
char** outp, size_t* out_lengthp)
|
||||||
{
|
{
|
||||||
|
int ret = -1;
|
||||||
iconv_t cd;
|
iconv_t cd;
|
||||||
char* out;
|
char* out;
|
||||||
size_t out_remain;
|
size_t out_remain;
|
||||||
@ -10584,6 +10592,15 @@ static int convert(const char* from, const char* to,
|
|||||||
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
|
||||||
const char space[] = { 0x00, 0x20 };
|
const char space[] = { 0x00, 0x20 };
|
||||||
|
|
||||||
|
/* Note: better call explicit unac_init_mt() before starting threads than
|
||||||
|
rely on this.
|
||||||
|
*/
|
||||||
|
if (unac_mutex_is_init == 0) {
|
||||||
|
pthread_mutex_init(&o_unac_mutex, 0);
|
||||||
|
unac_mutex_is_init = 1;
|
||||||
|
}
|
||||||
|
pthread_mutex_lock(&o_unac_mutex);
|
||||||
|
|
||||||
if (!strcmp(utf16be, from)) {
|
if (!strcmp(utf16be, from)) {
|
||||||
from_utf8 = 0;
|
from_utf8 = 0;
|
||||||
from_utf16 = 1;
|
from_utf16 = 1;
|
||||||
@ -10614,7 +10631,7 @@ static int convert(const char* from, const char* to,
|
|||||||
/* *outp still valid, no freeing */
|
/* *outp still valid, no freeing */
|
||||||
if(debug_level >= UNAC_DEBUG_LOW)
|
if(debug_level >= UNAC_DEBUG_LOW)
|
||||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* +1 for null */
|
/* +1 for null */
|
||||||
@ -10622,7 +10639,7 @@ static int convert(const char* from, const char* to,
|
|||||||
if(out == 0) {
|
if(out == 0) {
|
||||||
if(debug_level >= UNAC_DEBUG_LOW)
|
if(debug_level >= UNAC_DEBUG_LOW)
|
||||||
DEBUG("malloc %d bytes failed\n", out_size+1);
|
DEBUG("malloc %d bytes failed\n", out_size+1);
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out_remain = out_size;
|
out_remain = out_size;
|
||||||
@ -10631,7 +10648,7 @@ static int convert(const char* from, const char* to,
|
|||||||
if (u8tou16) {
|
if (u8tou16) {
|
||||||
if (u8tou16_cd == (iconv_t)-1) {
|
if (u8tou16_cd == (iconv_t)-1) {
|
||||||
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
iconv(u8tou16_cd, 0, 0, 0, 0);
|
iconv(u8tou16_cd, 0, 0, 0, 0);
|
||||||
@ -10640,7 +10657,7 @@ static int convert(const char* from, const char* to,
|
|||||||
} else if (u16tou8) {
|
} else if (u16tou8) {
|
||||||
if (u16tou8_cd == (iconv_t)-1) {
|
if (u16tou8_cd == (iconv_t)-1) {
|
||||||
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
iconv(u16tou8_cd, 0, 0, 0, 0);
|
iconv(u16tou8_cd, 0, 0, 0, 0);
|
||||||
@ -10648,7 +10665,7 @@ static int convert(const char* from, const char* to,
|
|||||||
cd = u16tou8_cd;
|
cd = u16tou8_cd;
|
||||||
} else {
|
} else {
|
||||||
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
if((cd = iconv_open(to, from)) == (iconv_t)-1) {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10682,7 +10699,7 @@ static int convert(const char* from, const char* to,
|
|||||||
if(errno == E2BIG)
|
if(errno == E2BIG)
|
||||||
/* fall thru to the E2BIG case below */;
|
/* fall thru to the E2BIG case below */;
|
||||||
else
|
else
|
||||||
return -1;
|
goto out;
|
||||||
} else {
|
} else {
|
||||||
/* The offending character was replaced by a SPACE, skip it. */
|
/* The offending character was replaced by a SPACE, skip it. */
|
||||||
in += 2;
|
in += 2;
|
||||||
@ -10691,7 +10708,7 @@ static int convert(const char* from, const char* to,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
case E2BIG:
|
case E2BIG:
|
||||||
{
|
{
|
||||||
@ -10711,7 +10728,7 @@ static int convert(const char* from, const char* to,
|
|||||||
DEBUG("realloc %d bytes failed\n", out_size+1);
|
DEBUG("realloc %d bytes failed\n", out_size+1);
|
||||||
free(saved);
|
free(saved);
|
||||||
*outp = 0;
|
*outp = 0;
|
||||||
return -1;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out = out_base + length;
|
out = out_base + length;
|
||||||
@ -10719,7 +10736,7 @@ static int convert(const char* from, const char* to,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return -1;
|
goto out;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -10732,7 +10749,10 @@ static int convert(const char* from, const char* to,
|
|||||||
*out_lengthp = out - out_base;
|
*out_lengthp = out - out_base;
|
||||||
(*outp)[*out_lengthp] = '\0';
|
(*outp)[*out_lengthp] = '\0';
|
||||||
|
|
||||||
return 0;
|
ret = 0;
|
||||||
|
out:
|
||||||
|
pthread_mutex_unlock(&o_unac_mutex);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int unacmaybefold_string(const char* charset,
|
int unacmaybefold_string(const char* charset,
|
||||||
|
|||||||
@ -113,6 +113,9 @@ int unacfold_string(const char* charset,
|
|||||||
const char* in, size_t in_length,
|
const char* in, size_t in_length,
|
||||||
char** out, size_t* out_length);
|
char** out, size_t* out_length);
|
||||||
|
|
||||||
|
/* To be called before starting threads in mt programs */
|
||||||
|
void unac_init_mt();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return unac version number.
|
* Return unac version number.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -38,7 +38,19 @@
|
|||||||
<h2><a name="b_latest">recoll 1.16</a></h2>
|
<h2><a name="b_latest">recoll 1.16</a></h2>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
|
|
||||||
|
<li>The <tt>recoll</tt> GUI program sometimes crashes when
|
||||||
|
running a query while the indexing thread is active.
|
||||||
|
Possible workarounds:<br>
|
||||||
|
<ul>
|
||||||
|
<li>(Recommended) Use the command
|
||||||
|
line <tt>recollindex</tt> program to perform indexing
|
||||||
|
(usually just type "recollindex" in a console, or see "man
|
||||||
|
recollindex").</li>
|
||||||
|
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
|
||||||
|
is running (as indicated in the bottom status line).</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
<li>Cancelling a preview in the GUI will also cancel the indexing
|
<li>Cancelling a preview in the GUI will also cancel the indexing
|
||||||
thread if it is running.</li>
|
thread if it is running.</li>
|
||||||
|
|
||||||
|
|||||||
@ -53,6 +53,27 @@
|
|||||||
<p>The current version is 1.16.0. <a href="release-1.16.html">
|
<p>The current version is 1.16.0. <a href="release-1.16.html">
|
||||||
Release notes</a>.</p>
|
Release notes</a>.</p>
|
||||||
|
|
||||||
|
<div class="important">
|
||||||
|
<p>Notice for 1.16.0: the
|
||||||
|
<tt>recoll</tt> GUI program sometimes crashes when running a query
|
||||||
|
while the indexing thread is active. I can reproduce the problem
|
||||||
|
and I am working on a correction. Meanwhile, there are two possible
|
||||||
|
workarounds:<br>
|
||||||
|
<ul>
|
||||||
|
<li>(Recommended) Use the command
|
||||||
|
line <tt>recollindex</tt> program to perform indexing
|
||||||
|
(usually just type "recollindex" in a console, or see "man
|
||||||
|
recollindex").</li>
|
||||||
|
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
|
||||||
|
is running (as indicated in the bottom status line).</li>
|
||||||
|
</ul>
|
||||||
|
If the workaround fails or you experience other kinds of
|
||||||
|
crashes with either <tt>recoll</tt> or <tt>recollindex</tt>, and
|
||||||
|
want to help, please follow the instructions on
|
||||||
|
<a href="https://bitbucket.org/medoc/recoll/wiki/GettingAStackTrace">
|
||||||
|
this wiki page</a>.</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
<p>The download page for Recoll 1.15 is
|
<p>The download page for Recoll 1.15 is
|
||||||
<a href="download-1.15.html">still available</a>.</p>
|
<a href="download-1.15.html">still available</a>.</p>
|
||||||
|
|
||||||
@ -204,21 +225,26 @@
|
|||||||
Xapian</a>,
|
Xapian</a>,
|
||||||
<a href="https://launchpad.net/~recoll-backports/+archive/recoll-1.15-on">
|
<a href="https://launchpad.net/~recoll-backports/+archive/recoll-1.15-on">
|
||||||
Recoll and kio-recoll</a>. These were built from the latest versions,
|
Recoll and kio-recoll</a>. These were built from the latest versions,
|
||||||
for a set of Ubuntu series. You just need to add the
|
for a set of Ubuntu series.</p>
|
||||||
PPAs to your system software sources (the instructions are on
|
|
||||||
|
<p>Ubuntu 10.04 (lucid) and later versions just need the Recoll
|
||||||
|
PPA. Older versions also needed a backport for Xapian
|
||||||
|
(xapian-backports/xapian-1.2).</p>
|
||||||
|
|
||||||
|
<p>Just add the
|
||||||
|
PPA to your system software sources (the instructions are on
|
||||||
the PPA page or
|
the PPA page or
|
||||||
<a href="https://help.launchpad.net/Packaging/PPA/InstallingSoftware">
|
<a href="https://help.launchpad.net/Packaging/PPA/InstallingSoftware">
|
||||||
here</a>), and you can then use the normal package
|
here</a>), and you can then use the normal package
|
||||||
manager to install or update Recoll and Xapian. For Ubuntu versions
|
manager to install or update Recoll. For Ubuntu versions
|
||||||
from 9.10 (Karmic), only two commands are needed:
|
after 9.10 (Karmic), only one command is needed:
|
||||||
<pre><tt>
|
<pre><tt>
|
||||||
sudo add-apt-repository ppa:xapian-backports/xapian-1.2
|
|
||||||
sudo add-apt-repository ppa:recoll-backports/recoll-1.15-on
|
sudo add-apt-repository ppa:recoll-backports/recoll-1.15-on
|
||||||
</tt></pre>
|
</tt></pre>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>For Ubuntu 9.04 (Jaunty) and older, to avoid
|
<p>For Ubuntu 9.04 (Jaunty) and older,
|
||||||
messages about signature errors, you may have to explicitely import the
|
you may have to explicitely import the
|
||||||
Recoll and Xapian public keys: <pre><tt>
|
Recoll and Xapian public keys: <pre><tt>
|
||||||
gpg --keyserver keyserver.ubuntu.com --recv 9DA85604
|
gpg --keyserver keyserver.ubuntu.com --recv 9DA85604
|
||||||
gpg --export --armor 9DA85604 | sudo apt-key add -
|
gpg --export --armor 9DA85604 | sudo apt-key add -
|
||||||
@ -390,7 +416,11 @@ I now use the OpenSUSE build service to create Recoll OpenSUSE packages.
|
|||||||
<h3>Updated 1.16 translations that became available after the
|
<h3>Updated 1.16 translations that became available after the
|
||||||
release:</h3>
|
release:</h3>
|
||||||
|
|
||||||
<p>None for now :(</p>
|
<p>Czech, thanks to Pavel !
|
||||||
|
<a href="translations/recoll_cs.ts">recoll_cs.ts</a>
|
||||||
|
<a href="translations/recoll_cs.qm">recoll_cs.qm</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
<p>Lithuanian.
|
<p>Lithuanian.
|
||||||
<a href="translations/recoll_lt.ts">recoll_lt.ts</a>
|
<a href="translations/recoll_lt.ts">recoll_lt.ts</a>
|
||||||
|
|||||||
@ -90,6 +90,30 @@
|
|||||||
|
|
||||||
<h2>News: </h2>
|
<h2>News: </h2>
|
||||||
<ul>
|
<ul>
|
||||||
|
|
||||||
|
<li>
|
||||||
|
<div class="important">
|
||||||
|
<p>Notice for 1.16.0: the
|
||||||
|
<tt>recoll</tt> GUI program sometimes crashes when running a query
|
||||||
|
while the indexing thread is active. I can reproduce the problem
|
||||||
|
and I am working on a correction. Meanwhile, there are two possible
|
||||||
|
workarounds:<br>
|
||||||
|
<ul>
|
||||||
|
<li>(Recommended) Use the command
|
||||||
|
line <tt>recollindex</tt> program to perform indexing
|
||||||
|
(usually just type "recollindex" in a console, or see "man
|
||||||
|
recollindex").</li>
|
||||||
|
<li>Do not run queries in <tt>recoll</tt> while the indexing thread
|
||||||
|
is running (as indicated in the bottom status line).</li>
|
||||||
|
</ul>
|
||||||
|
If the workaround fails or you experience other kinds of
|
||||||
|
crashes with either <tt>recoll</tt> or <tt>recollindex</tt>, and
|
||||||
|
want to help, please follow the instructions on
|
||||||
|
<a href="https://bitbucket.org/medoc/recoll/wiki/GettingAStackTrace">
|
||||||
|
this wiki page</a>.</p>
|
||||||
|
</div>
|
||||||
|
</li>
|
||||||
|
|
||||||
<li>2011-09-03: release <a href="download.html#source">1.16.0</a>
|
<li>2011-09-03: release <a href="download.html#source">1.16.0</a>
|
||||||
is out with many <a href="release-1.16.html">small improvements</a>
|
is out with many <a href="release-1.16.html">small improvements</a>
|
||||||
over 1.15. </li>
|
over 1.15. </li>
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user