diff --git a/src/Makefile.am b/src/Makefile.am index 9a502098..9bf33658 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -576,9 +576,13 @@ recollpython-install: done; \ ) recollpython-clean: - rm -rf python/recoll/build - rm -rf python/recoll/__pycache__ rm -f python/recoll/*.pyc + rm -rf python/pychm/build + rm -rf python/pychm/recollchm.egg-info + rm -rf python/pychm/setup.py + rm -rf python/recoll/Recoll.egg-info + rm -rf python/recoll/__pycache__ + rm -rf python/recoll/build endif if MAKEPYTHONCHM diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp index 9f4700d4..68c4d172 100644 --- a/src/internfile/mh_mbox.cpp +++ b/src/internfile/mh_mbox.cpp @@ -23,16 +23,6 @@ #include "safesysstat.h" #include -#if defined(_WIN32) -#define USING_STD_REGEX -#endif - -#ifdef USING_STD_REGEX -#include -#else -#include -#endif - #include #include #include @@ -363,7 +353,7 @@ static inline void stripendnl(line_type& line, int& ll) // This was added as an alternative format. By the way it also fools "mail" and // emacs-vm, Recoll is not alone // Update: 2009-11-27: word after From may be quoted string: From "john bull" -static const char *frompat = +static const string frompat{ "^From[ ]+([^ ]+|\"[^\"]+\")[ ]+" // 'From (toto@tutu|"john bull") ' "[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26 "[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional @@ -374,45 +364,15 @@ static const char *frompat = "[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May "[12][0-9][0-9][0-9][ ]+" // Year "[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional - ; + }; // Extreme thunderbird brokiness. Will sometimes use From lines // exactly like: From ^M (From followed by space and eol). We only // test for this if QUIRKS_TBIRD is set -static const char *miniTbirdFrom = "^From $"; -#ifndef USING_STD_REGEX -static regex_t fromregex; -static regex_t minifromregex; -#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E) -#else -basic_regex fromregex; -basic_regex minifromregex; -#define REG_NOSUB std::regex_constants::nosubs -#define REG_EXTENDED std::regex_constants::extended -#define M_regexec(A, B, C, D, E) (!regex_match(B,A)) +static const string miniTbirdFrom{"^From $"}; -#endif - -static bool regcompiled; -static std::mutex o_regex_mutex; - -static void compileregexes() -{ - std::unique_lock locker(o_regex_mutex); - // As the initial test of regcompiled is unprotected the value may - // have changed while we were waiting for the lock. Test again now - // that we are alone. - if (regcompiled) - return; -#ifndef USING_STD_REGEX - regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED); - regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED); -#else - fromregex = basic_regex(frompat, REG_NOSUB | REG_EXTENDED); - minifromregex = basic_regex(miniTbirdFrom, REG_NOSUB | REG_EXTENDED); -#endif - regcompiled = true; -} +static SimpleRegexp fromregex(frompat, SimpleRegexp::SRE_NOSUB); +static SimpleRegexp minifromregex(miniTbirdFrom, SimpleRegexp::SRE_NOSUB); bool MimeHandlerMbox::next_document() { @@ -432,13 +392,11 @@ bool MimeHandlerMbox::next_document() LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n"); return false; } - LOGDEB0("MimeHandlerMbox::next_document: fn " << (m_fn) << ", msgnum " << (m_msgnum) << " mtarg " << (mtarg) << " \n"); + LOGDEB0("MimeHandlerMbox::next_document: fn " << m_fn << ", msgnum " << + m_msgnum << " mtarg " << mtarg << " \n"); if (mtarg == 0) mtarg = -1; - if (!regcompiled) { - compileregexes(); - } // If we are called to retrieve a specific message, seek to bof // (then scan up to the message). This is for the case where the @@ -452,14 +410,14 @@ bool MimeHandlerMbox::next_document() if (mtarg > 0) { mbhoff_type off; line_type line; - LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << (mtarg) << " m_udi[" << (m_udi) << "]\n"); + LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << mtarg << " m_udi[" << + m_udi << "]\n"); if (!m_udi.empty() && (off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 && fseeko(fp, (off_t)off, SEEK_SET) >= 0 && fgets(line, LL, fp) && - (!M_regexec(fromregex, line, 0, 0, 0) || - ((m_quirks & MBOXQUIRK_TBIRD) && - !M_regexec(minifromregex, line, 0, 0, 0))) ) { + (fromregex(line) || ((m_quirks & MBOXQUIRK_TBIRD) && + minifromregex(line))) ) { LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n"); fseeko(fp, (off_t)off, SEEK_SET); m_msgnum = mtarg -1; @@ -487,7 +445,8 @@ bool MimeHandlerMbox::next_document() m_lineno++; int ll; stripendnl(line, ll); - LOGDEB2("mhmbox:next: hadempty " << (hademptyline) << " lineno " << (m_lineno) << " ll " << (ll) << " Line: [" << (line) << "]\n"); + LOGDEB2("mhmbox:next: hadempty " << hademptyline << " lineno " << + m_lineno << " ll " << ll << " Line: [" << line << "]\n"); if (hademptyline) { if (ll > 0) { // Non-empty line with empty line flag set, reset flag @@ -501,11 +460,12 @@ bool MimeHandlerMbox::next_document() /* The 'F' compare is redundant but it improves performance A LOT */ if (line[0] == 'F' && ( - !M_regexec(fromregex, line, 0, 0, 0) || - ((m_quirks & MBOXQUIRK_TBIRD) && - !M_regexec(minifromregex, line, 0, 0, 0))) + fromregex(line) || + ((m_quirks & MBOXQUIRK_TBIRD) && minifromregex(line))) ) { - LOGDEB0("MimeHandlerMbox: msgnum " << (m_msgnum) << ", From_ at line " << (m_lineno) << ": [" << (line) << "]\n"); + LOGDEB0("MimeHandlerMbox: msgnum " << m_msgnum << + ", From_ at line " << m_lineno << ": [" << line + << "]\n"); if (storeoffsets) m_offsets.push_back(message_end); m_msgnum++; @@ -528,13 +488,15 @@ bool MimeHandlerMbox::next_document() line[ll+1] = 0; msgtxt += line; if (msgtxt.size() > max_mbox_member_size) { - LOGERR("mh_mbox: huge message (more than " << (max_mbox_member_size/(1024*1024)) << " MB) inside " << (m_fn) << ", giving up\n"); + LOGERR("mh_mbox: huge message (more than " << + max_mbox_member_size/(1024*1024) << " MB) inside " << + m_fn << ", giving up\n"); return false; } } } - LOGDEB2("Message text length " << (msgtxt.size()) << "\n"); - LOGDEB2("Message text: [" << (msgtxt) << "]\n"); + LOGDEB2("Message text length " << msgtxt.size() << "\n"); + LOGDEB2("Message text: [" << msgtxt << "]\n"); char buf[20]; // m_msgnum was incremented when hitting the next From_ or eof, so the data // is for m_msgnum - 1 diff --git a/src/qtgui/webcache.cpp b/src/qtgui/webcache.cpp index 5cf09e13..e63cca2b 100644 --- a/src/qtgui/webcache.cpp +++ b/src/qtgui/webcache.cpp @@ -21,17 +21,6 @@ #include #include -#ifdef _WIN32 -#define USING_STD_REGEX -#endif - -#ifndef USING_STD_REGEX -#include -#include -#else -#include -#endif - #include #include #include @@ -47,6 +36,7 @@ #include "circache.h" #include "conftree.h" #include "rclmain_w.h" +#include "smallut.h" using namespace std; @@ -158,21 +148,8 @@ QVariant WebcacheModel::data(const QModelIndex& index, int role) const return QVariant(); } - /* We now read the data on init */ -#if 0 - string sdic; - if (!m->cache->cc()->get(m->disp[row].udi, sdic)) { - return QVariant(); - } - ConfSimple dic(sdic); - //ostringstream os; dic.write(os); cerr << "DIC: " << os.str() << endl; - string mime, url; - dic.get("mimetype", mime); - dic.get("url", url); -#else const string& mime = m->disp[row].mimetype; const string& url = m->disp[row].url; -#endif switch (index.column()) { case 0: return QVariant(QString::fromUtf8(mime.c_str())); @@ -181,35 +158,13 @@ QVariant WebcacheModel::data(const QModelIndex& index, int role) const } } -#ifndef USING_STD_REGEX -#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E) -#else -#define M_regexec(A,B,C,D,E) (!regex_match(B,A)) -#endif - void WebcacheModel::setSearchFilter(const QString& _txt) { - string txt = qs2utf8s(_txt); - -#ifndef USING_STD_REGEX - regex_t exp; - if (regcomp(&exp, txt.c_str(), REG_NOSUB|REG_EXTENDED)) { - //qDebug() << "regcomp failed for " << _txt; - return; - } -#else - basic_regex exp; - try { - exp = basic_regex(txt, std::regex_constants::nosubs | - std::regex_constants::extended); - } catch(...) { - return; - } -#endif + SimpleRegexp re(qs2utf8s(_txt), SimpleRegexp::SRE_NOSUB); m->disp.clear(); for (unsigned int i = 0; i < m->all.size(); i++) { - if (!M_regexec(exp, m->all[i].url.c_str(), 0, 0, 0)) { + if (re(m->all[i].url)) { m->disp.push_back(m->all[i]); } else { //qDebug() << "match failed. exp" << _txt << "data" << diff --git a/src/utils/strmatcher.cpp b/src/utils/strmatcher.cpp index 7f33704a..c6260e4b 100644 --- a/src/utils/strmatcher.cpp +++ b/src/utils/strmatcher.cpp @@ -16,14 +16,10 @@ */ #include "autoconfig.h" +#include "strmatcher.h" #include #include -#ifdef _WIN32 -#include -#else -#include -#endif #include #include @@ -31,19 +27,19 @@ #include "cstr.h" #include "log.h" #include "pathut.h" -#include "strmatcher.h" using namespace std; bool StrWildMatcher::match(const string& val) const { - LOGDEB2("StrWildMatcher::match: [" << (m_sexp) << "] against [" << (val) << "]\n" ); + LOGDEB2("StrWildMatcher::match ["<< m_sexp<< "] against [" << val << "]\n"); int ret = fnmatch(m_sexp.c_str(), val.c_str(), FNM_NOESCAPE); switch (ret) { case 0: return true; case FNM_NOMATCH: return false; default: - LOGINFO("StrWildMatcher::match:err: e [" << (m_sexp) << "] s [" << (val) << "] (" << (url_encode(val)) << ") ret " << (ret) << "\n" ); + LOGINFO("StrWildMatcher::match:err: e [" << m_sexp << "] s [" << val + << "] (" << url_encode(val) << ") ret " << ret << "\n"); return false; } } @@ -54,68 +50,22 @@ string::size_type StrWildMatcher::baseprefixlen() const } StrRegexpMatcher::StrRegexpMatcher(const string& exp) - : StrMatcher(exp), m_compiled(0), m_errcode(0) + : StrMatcher(exp), + m_re(exp, SimpleRegexp::SRE_NOSUB) { - setExp(exp); } bool StrRegexpMatcher::setExp(const string& exp) { - if (m_compiled) { -#ifdef _WIN32 - delete (regex*)m_compiled; -#else - regfree((regex_t*)m_compiled); - delete (regex_t*)m_compiled; -#endif - } - m_compiled = 0; - -#ifdef _WIN32 - try { - m_compiled = new regex(exp, std::regex_constants::nosubs | - std::regex_constants::extended); - } catch (...) { - m_reason = string("StrRegexpMatcher:regcomp failed for ") - + exp + string("syntax error ?"); - return false; - } -#else - m_compiled = new regex_t; - if ((m_errcode = - regcomp((regex_t*)m_compiled, exp.c_str(), REG_EXTENDED|REG_NOSUB))) { - char errbuf[200]; - regerror(m_errcode, (regex_t*)m_compiled, errbuf, 199); - m_reason = string("StrRegexpMatcher:regcomp failed for ") - + exp + string(errbuf); - return false; - } -#endif - m_sexp = exp; - return true; -} - -StrRegexpMatcher::~StrRegexpMatcher() -{ - if (m_compiled) { -#ifdef _WIN32 - delete (regex *)m_compiled; -#else - regfree((regex_t*)m_compiled); - delete (regex_t*)m_compiled; -#endif - } + m_re = SimpleRegexp(exp, SimpleRegexp::SRE_NOSUB); + return m_re.ok(); } bool StrRegexpMatcher::match(const string& val) const { - if (m_errcode) + if (!m_re.ok()) return false; -#ifdef _WIN32 - return regex_match(val, *((regex *)m_compiled)); -#else - return regexec((regex_t*)m_compiled, val.c_str(), 0, 0, 0) != REG_NOMATCH; -#endif + return m_re(val); } string::size_type StrRegexpMatcher::baseprefixlen() const @@ -125,6 +75,6 @@ string::size_type StrRegexpMatcher::baseprefixlen() const bool StrRegexpMatcher::ok() const { - return !m_errcode; + return m_re.ok(); } diff --git a/src/utils/strmatcher.h b/src/utils/strmatcher.h index 3df43f8a..7959db3f 100644 --- a/src/utils/strmatcher.h +++ b/src/utils/strmatcher.h @@ -18,6 +18,7 @@ #define _STRMATCHER_H_INCLUDED_ #include +#include "smallut.h" // Encapsulating simple wildcard/regexp string matching. @@ -25,28 +26,22 @@ class StrMatcher { public: StrMatcher(const std::string& exp) - : m_sexp(exp) - { - } + : m_sexp(exp) {} virtual ~StrMatcher() {}; virtual bool match(const std::string &val) const = 0; virtual std::string::size_type baseprefixlen() const = 0; - virtual bool setExp(const std::string& newexp) - { + virtual bool setExp(const std::string& newexp) { m_sexp = newexp; return true; } - virtual bool ok() const - { + virtual bool ok() const { return true; } - virtual const std::string& exp() - { + virtual const std::string& exp() const { return m_sexp; } - virtual StrMatcher *clone() = 0; - const string& getreason() - { + virtual StrMatcher *clone() const = 0; + const std::string& getreason() const { return m_reason; } protected: @@ -57,16 +52,11 @@ protected: class StrWildMatcher : public StrMatcher { public: StrWildMatcher(const std::string& exp) - : StrMatcher(exp) - { - } - virtual ~StrWildMatcher() - { - } + : StrMatcher(exp) {} + virtual ~StrWildMatcher() {} virtual bool match(const std::string& val) const; virtual std::string::size_type baseprefixlen() const; - virtual StrWildMatcher *clone() - { + virtual StrWildMatcher *clone() const { return new StrWildMatcher(m_sexp); } }; @@ -74,22 +64,16 @@ public: class StrRegexpMatcher : public StrMatcher { public: StrRegexpMatcher(const std::string& exp); - virtual bool setExp(const std::string& newexp); - virtual ~StrRegexpMatcher(); + virtual bool setExp(const std::string& newexp) override; + virtual ~StrRegexpMatcher() {}; virtual bool match(const std::string& val) const; virtual std::string::size_type baseprefixlen() const; - virtual bool ok() const; - virtual StrRegexpMatcher *clone() - { + virtual bool ok() const override; + virtual StrRegexpMatcher *clone() const { return new StrRegexpMatcher(m_sexp); } - const string& getreason() - { - return m_reason; - } private: - void *m_compiled; - bool m_errcode; + SimpleRegexp m_re; }; #endif /* _STRMATCHER_H_INCLUDED_ */