Simplified code by replacing misc direct regex/regex.h invocation with SimpleRegex wrapper
This commit is contained in:
parent
8358742132
commit
2267e5f2f5
@ -576,9 +576,13 @@ recollpython-install:
|
||||
done; \
|
||||
)
|
||||
recollpython-clean:
|
||||
rm -rf python/recoll/build
|
||||
rm -rf python/recoll/__pycache__
|
||||
rm -f python/recoll/*.pyc
|
||||
rm -rf python/pychm/build
|
||||
rm -rf python/pychm/recollchm.egg-info
|
||||
rm -rf python/pychm/setup.py
|
||||
rm -rf python/recoll/Recoll.egg-info
|
||||
rm -rf python/recoll/__pycache__
|
||||
rm -rf python/recoll/build
|
||||
endif
|
||||
|
||||
if MAKEPYTHONCHM
|
||||
|
||||
@ -23,16 +23,6 @@
|
||||
#include "safesysstat.h"
|
||||
#include <time.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define USING_STD_REGEX
|
||||
#endif
|
||||
|
||||
#ifdef USING_STD_REGEX
|
||||
#include <regex>
|
||||
#else
|
||||
#include <regex.h>
|
||||
#endif
|
||||
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
@ -363,7 +353,7 @@ static inline void stripendnl(line_type& line, int& ll)
|
||||
// This was added as an alternative format. By the way it also fools "mail" and
|
||||
// emacs-vm, Recoll is not alone
|
||||
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
|
||||
static const char *frompat =
|
||||
static const string frompat{
|
||||
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+" // 'From (toto@tutu|"john bull") '
|
||||
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
|
||||
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional
|
||||
@ -374,45 +364,15 @@ static const char *frompat =
|
||||
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
|
||||
"[12][0-9][0-9][0-9][ ]+" // Year
|
||||
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional
|
||||
;
|
||||
};
|
||||
|
||||
// Extreme thunderbird brokiness. Will sometimes use From lines
|
||||
// exactly like: From ^M (From followed by space and eol). We only
|
||||
// test for this if QUIRKS_TBIRD is set
|
||||
static const char *miniTbirdFrom = "^From $";
|
||||
#ifndef USING_STD_REGEX
|
||||
static regex_t fromregex;
|
||||
static regex_t minifromregex;
|
||||
#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E)
|
||||
#else
|
||||
basic_regex<char> fromregex;
|
||||
basic_regex<char> minifromregex;
|
||||
#define REG_NOSUB std::regex_constants::nosubs
|
||||
#define REG_EXTENDED std::regex_constants::extended
|
||||
#define M_regexec(A, B, C, D, E) (!regex_match(B,A))
|
||||
static const string miniTbirdFrom{"^From $"};
|
||||
|
||||
#endif
|
||||
|
||||
static bool regcompiled;
|
||||
static std::mutex o_regex_mutex;
|
||||
|
||||
static void compileregexes()
|
||||
{
|
||||
std::unique_lock<std::mutex> locker(o_regex_mutex);
|
||||
// As the initial test of regcompiled is unprotected the value may
|
||||
// have changed while we were waiting for the lock. Test again now
|
||||
// that we are alone.
|
||||
if (regcompiled)
|
||||
return;
|
||||
#ifndef USING_STD_REGEX
|
||||
regcomp(&fromregex, frompat, REG_NOSUB|REG_EXTENDED);
|
||||
regcomp(&minifromregex, miniTbirdFrom, REG_NOSUB|REG_EXTENDED);
|
||||
#else
|
||||
fromregex = basic_regex<char>(frompat, REG_NOSUB | REG_EXTENDED);
|
||||
minifromregex = basic_regex<char>(miniTbirdFrom, REG_NOSUB | REG_EXTENDED);
|
||||
#endif
|
||||
regcompiled = true;
|
||||
}
|
||||
static SimpleRegexp fromregex(frompat, SimpleRegexp::SRE_NOSUB);
|
||||
static SimpleRegexp minifromregex(miniTbirdFrom, SimpleRegexp::SRE_NOSUB);
|
||||
|
||||
bool MimeHandlerMbox::next_document()
|
||||
{
|
||||
@ -432,13 +392,11 @@ bool MimeHandlerMbox::next_document()
|
||||
LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n");
|
||||
return false;
|
||||
}
|
||||
LOGDEB0("MimeHandlerMbox::next_document: fn " << (m_fn) << ", msgnum " << (m_msgnum) << " mtarg " << (mtarg) << " \n");
|
||||
LOGDEB0("MimeHandlerMbox::next_document: fn " << m_fn << ", msgnum " <<
|
||||
m_msgnum << " mtarg " << mtarg << " \n");
|
||||
if (mtarg == 0)
|
||||
mtarg = -1;
|
||||
|
||||
if (!regcompiled) {
|
||||
compileregexes();
|
||||
}
|
||||
|
||||
// If we are called to retrieve a specific message, seek to bof
|
||||
// (then scan up to the message). This is for the case where the
|
||||
@ -452,14 +410,14 @@ bool MimeHandlerMbox::next_document()
|
||||
if (mtarg > 0) {
|
||||
mbhoff_type off;
|
||||
line_type line;
|
||||
LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << (mtarg) << " m_udi[" << (m_udi) << "]\n");
|
||||
LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << mtarg << " m_udi[" <<
|
||||
m_udi << "]\n");
|
||||
if (!m_udi.empty() &&
|
||||
(off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0 &&
|
||||
fseeko(fp, (off_t)off, SEEK_SET) >= 0 &&
|
||||
fgets(line, LL, fp) &&
|
||||
(!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||
!M_regexec(minifromregex, line, 0, 0, 0))) ) {
|
||||
(fromregex(line) || ((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||
minifromregex(line))) ) {
|
||||
LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n");
|
||||
fseeko(fp, (off_t)off, SEEK_SET);
|
||||
m_msgnum = mtarg -1;
|
||||
@ -487,7 +445,8 @@ bool MimeHandlerMbox::next_document()
|
||||
m_lineno++;
|
||||
int ll;
|
||||
stripendnl(line, ll);
|
||||
LOGDEB2("mhmbox:next: hadempty " << (hademptyline) << " lineno " << (m_lineno) << " ll " << (ll) << " Line: [" << (line) << "]\n");
|
||||
LOGDEB2("mhmbox:next: hadempty " << hademptyline << " lineno " <<
|
||||
m_lineno << " ll " << ll << " Line: [" << line << "]\n");
|
||||
if (hademptyline) {
|
||||
if (ll > 0) {
|
||||
// Non-empty line with empty line flag set, reset flag
|
||||
@ -501,11 +460,12 @@ bool MimeHandlerMbox::next_document()
|
||||
/* The 'F' compare is redundant but it improves performance
|
||||
A LOT */
|
||||
if (line[0] == 'F' && (
|
||||
!M_regexec(fromregex, line, 0, 0, 0) ||
|
||||
((m_quirks & MBOXQUIRK_TBIRD) &&
|
||||
!M_regexec(minifromregex, line, 0, 0, 0)))
|
||||
fromregex(line) ||
|
||||
((m_quirks & MBOXQUIRK_TBIRD) && minifromregex(line)))
|
||||
) {
|
||||
LOGDEB0("MimeHandlerMbox: msgnum " << (m_msgnum) << ", From_ at line " << (m_lineno) << ": [" << (line) << "]\n");
|
||||
LOGDEB0("MimeHandlerMbox: msgnum " << m_msgnum <<
|
||||
", From_ at line " << m_lineno << ": [" << line
|
||||
<< "]\n");
|
||||
if (storeoffsets)
|
||||
m_offsets.push_back(message_end);
|
||||
m_msgnum++;
|
||||
@ -528,13 +488,15 @@ bool MimeHandlerMbox::next_document()
|
||||
line[ll+1] = 0;
|
||||
msgtxt += line;
|
||||
if (msgtxt.size() > max_mbox_member_size) {
|
||||
LOGERR("mh_mbox: huge message (more than " << (max_mbox_member_size/(1024*1024)) << " MB) inside " << (m_fn) << ", giving up\n");
|
||||
LOGERR("mh_mbox: huge message (more than " <<
|
||||
max_mbox_member_size/(1024*1024) << " MB) inside " <<
|
||||
m_fn << ", giving up\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOGDEB2("Message text length " << (msgtxt.size()) << "\n");
|
||||
LOGDEB2("Message text: [" << (msgtxt) << "]\n");
|
||||
LOGDEB2("Message text length " << msgtxt.size() << "\n");
|
||||
LOGDEB2("Message text: [" << msgtxt << "]\n");
|
||||
char buf[20];
|
||||
// m_msgnum was incremented when hitting the next From_ or eof, so the data
|
||||
// is for m_msgnum - 1
|
||||
|
||||
@ -21,17 +21,6 @@
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define USING_STD_REGEX
|
||||
#endif
|
||||
|
||||
#ifndef USING_STD_REGEX
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
#else
|
||||
#include <regex>
|
||||
#endif
|
||||
|
||||
#include <QDebug>
|
||||
#include <QSettings>
|
||||
#include <QCloseEvent>
|
||||
@ -47,6 +36,7 @@
|
||||
#include "circache.h"
|
||||
#include "conftree.h"
|
||||
#include "rclmain_w.h"
|
||||
#include "smallut.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -158,21 +148,8 @@ QVariant WebcacheModel::data(const QModelIndex& index, int role) const
|
||||
return QVariant();
|
||||
}
|
||||
|
||||
/* We now read the data on init */
|
||||
#if 0
|
||||
string sdic;
|
||||
if (!m->cache->cc()->get(m->disp[row].udi, sdic)) {
|
||||
return QVariant();
|
||||
}
|
||||
ConfSimple dic(sdic);
|
||||
//ostringstream os; dic.write(os); cerr << "DIC: " << os.str() << endl;
|
||||
string mime, url;
|
||||
dic.get("mimetype", mime);
|
||||
dic.get("url", url);
|
||||
#else
|
||||
const string& mime = m->disp[row].mimetype;
|
||||
const string& url = m->disp[row].url;
|
||||
#endif
|
||||
|
||||
switch (index.column()) {
|
||||
case 0: return QVariant(QString::fromUtf8(mime.c_str()));
|
||||
@ -181,35 +158,13 @@ QVariant WebcacheModel::data(const QModelIndex& index, int role) const
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef USING_STD_REGEX
|
||||
#define M_regexec(A,B,C,D,E) regexec(&(A),B,C,D,E)
|
||||
#else
|
||||
#define M_regexec(A,B,C,D,E) (!regex_match(B,A))
|
||||
#endif
|
||||
|
||||
void WebcacheModel::setSearchFilter(const QString& _txt)
|
||||
{
|
||||
string txt = qs2utf8s(_txt);
|
||||
|
||||
#ifndef USING_STD_REGEX
|
||||
regex_t exp;
|
||||
if (regcomp(&exp, txt.c_str(), REG_NOSUB|REG_EXTENDED)) {
|
||||
//qDebug() << "regcomp failed for " << _txt;
|
||||
return;
|
||||
}
|
||||
#else
|
||||
basic_regex<char> exp;
|
||||
try {
|
||||
exp = basic_regex<char>(txt, std::regex_constants::nosubs |
|
||||
std::regex_constants::extended);
|
||||
} catch(...) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
SimpleRegexp re(qs2utf8s(_txt), SimpleRegexp::SRE_NOSUB);
|
||||
|
||||
m->disp.clear();
|
||||
for (unsigned int i = 0; i < m->all.size(); i++) {
|
||||
if (!M_regexec(exp, m->all[i].url.c_str(), 0, 0, 0)) {
|
||||
if (re(m->all[i].url)) {
|
||||
m->disp.push_back(m->all[i]);
|
||||
} else {
|
||||
//qDebug() << "match failed. exp" << _txt << "data" <<
|
||||
|
||||
@ -16,14 +16,10 @@
|
||||
*/
|
||||
|
||||
#include "autoconfig.h"
|
||||
#include "strmatcher.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#ifdef _WIN32
|
||||
#include <regex>
|
||||
#else
|
||||
#include <regex.h>
|
||||
#endif
|
||||
#include <fnmatch.h>
|
||||
|
||||
#include <string>
|
||||
@ -31,19 +27,19 @@
|
||||
#include "cstr.h"
|
||||
#include "log.h"
|
||||
#include "pathut.h"
|
||||
#include "strmatcher.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool StrWildMatcher::match(const string& val) const
|
||||
{
|
||||
LOGDEB2("StrWildMatcher::match: [" << (m_sexp) << "] against [" << (val) << "]\n" );
|
||||
LOGDEB2("StrWildMatcher::match ["<< m_sexp<< "] against [" << val << "]\n");
|
||||
int ret = fnmatch(m_sexp.c_str(), val.c_str(), FNM_NOESCAPE);
|
||||
switch (ret) {
|
||||
case 0: return true;
|
||||
case FNM_NOMATCH: return false;
|
||||
default:
|
||||
LOGINFO("StrWildMatcher::match:err: e [" << (m_sexp) << "] s [" << (val) << "] (" << (url_encode(val)) << ") ret " << (ret) << "\n" );
|
||||
LOGINFO("StrWildMatcher::match:err: e [" << m_sexp << "] s [" << val
|
||||
<< "] (" << url_encode(val) << ") ret " << ret << "\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -54,68 +50,22 @@ string::size_type StrWildMatcher::baseprefixlen() const
|
||||
}
|
||||
|
||||
StrRegexpMatcher::StrRegexpMatcher(const string& exp)
|
||||
: StrMatcher(exp), m_compiled(0), m_errcode(0)
|
||||
: StrMatcher(exp),
|
||||
m_re(exp, SimpleRegexp::SRE_NOSUB)
|
||||
{
|
||||
setExp(exp);
|
||||
}
|
||||
|
||||
bool StrRegexpMatcher::setExp(const string& exp)
|
||||
{
|
||||
if (m_compiled) {
|
||||
#ifdef _WIN32
|
||||
delete (regex*)m_compiled;
|
||||
#else
|
||||
regfree((regex_t*)m_compiled);
|
||||
delete (regex_t*)m_compiled;
|
||||
#endif
|
||||
}
|
||||
m_compiled = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
try {
|
||||
m_compiled = new regex(exp, std::regex_constants::nosubs |
|
||||
std::regex_constants::extended);
|
||||
} catch (...) {
|
||||
m_reason = string("StrRegexpMatcher:regcomp failed for ")
|
||||
+ exp + string("syntax error ?");
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
m_compiled = new regex_t;
|
||||
if ((m_errcode =
|
||||
regcomp((regex_t*)m_compiled, exp.c_str(), REG_EXTENDED|REG_NOSUB))) {
|
||||
char errbuf[200];
|
||||
regerror(m_errcode, (regex_t*)m_compiled, errbuf, 199);
|
||||
m_reason = string("StrRegexpMatcher:regcomp failed for ")
|
||||
+ exp + string(errbuf);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
m_sexp = exp;
|
||||
return true;
|
||||
}
|
||||
|
||||
StrRegexpMatcher::~StrRegexpMatcher()
|
||||
{
|
||||
if (m_compiled) {
|
||||
#ifdef _WIN32
|
||||
delete (regex *)m_compiled;
|
||||
#else
|
||||
regfree((regex_t*)m_compiled);
|
||||
delete (regex_t*)m_compiled;
|
||||
#endif
|
||||
}
|
||||
m_re = SimpleRegexp(exp, SimpleRegexp::SRE_NOSUB);
|
||||
return m_re.ok();
|
||||
}
|
||||
|
||||
bool StrRegexpMatcher::match(const string& val) const
|
||||
{
|
||||
if (m_errcode)
|
||||
if (!m_re.ok())
|
||||
return false;
|
||||
#ifdef _WIN32
|
||||
return regex_match(val, *((regex *)m_compiled));
|
||||
#else
|
||||
return regexec((regex_t*)m_compiled, val.c_str(), 0, 0, 0) != REG_NOMATCH;
|
||||
#endif
|
||||
return m_re(val);
|
||||
}
|
||||
|
||||
string::size_type StrRegexpMatcher::baseprefixlen() const
|
||||
@ -125,6 +75,6 @@ string::size_type StrRegexpMatcher::baseprefixlen() const
|
||||
|
||||
bool StrRegexpMatcher::ok() const
|
||||
{
|
||||
return !m_errcode;
|
||||
return m_re.ok();
|
||||
}
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#define _STRMATCHER_H_INCLUDED_
|
||||
|
||||
#include <string>
|
||||
#include "smallut.h"
|
||||
|
||||
// Encapsulating simple wildcard/regexp string matching.
|
||||
|
||||
@ -25,28 +26,22 @@
|
||||
class StrMatcher {
|
||||
public:
|
||||
StrMatcher(const std::string& exp)
|
||||
: m_sexp(exp)
|
||||
{
|
||||
}
|
||||
: m_sexp(exp) {}
|
||||
virtual ~StrMatcher() {};
|
||||
virtual bool match(const std::string &val) const = 0;
|
||||
virtual std::string::size_type baseprefixlen() const = 0;
|
||||
virtual bool setExp(const std::string& newexp)
|
||||
{
|
||||
virtual bool setExp(const std::string& newexp) {
|
||||
m_sexp = newexp;
|
||||
return true;
|
||||
}
|
||||
virtual bool ok() const
|
||||
{
|
||||
virtual bool ok() const {
|
||||
return true;
|
||||
}
|
||||
virtual const std::string& exp()
|
||||
{
|
||||
virtual const std::string& exp() const {
|
||||
return m_sexp;
|
||||
}
|
||||
virtual StrMatcher *clone() = 0;
|
||||
const string& getreason()
|
||||
{
|
||||
virtual StrMatcher *clone() const = 0;
|
||||
const std::string& getreason() const {
|
||||
return m_reason;
|
||||
}
|
||||
protected:
|
||||
@ -57,16 +52,11 @@ protected:
|
||||
class StrWildMatcher : public StrMatcher {
|
||||
public:
|
||||
StrWildMatcher(const std::string& exp)
|
||||
: StrMatcher(exp)
|
||||
{
|
||||
}
|
||||
virtual ~StrWildMatcher()
|
||||
{
|
||||
}
|
||||
: StrMatcher(exp) {}
|
||||
virtual ~StrWildMatcher() {}
|
||||
virtual bool match(const std::string& val) const;
|
||||
virtual std::string::size_type baseprefixlen() const;
|
||||
virtual StrWildMatcher *clone()
|
||||
{
|
||||
virtual StrWildMatcher *clone() const {
|
||||
return new StrWildMatcher(m_sexp);
|
||||
}
|
||||
};
|
||||
@ -74,22 +64,16 @@ public:
|
||||
class StrRegexpMatcher : public StrMatcher {
|
||||
public:
|
||||
StrRegexpMatcher(const std::string& exp);
|
||||
virtual bool setExp(const std::string& newexp);
|
||||
virtual ~StrRegexpMatcher();
|
||||
virtual bool setExp(const std::string& newexp) override;
|
||||
virtual ~StrRegexpMatcher() {};
|
||||
virtual bool match(const std::string& val) const;
|
||||
virtual std::string::size_type baseprefixlen() const;
|
||||
virtual bool ok() const;
|
||||
virtual StrRegexpMatcher *clone()
|
||||
{
|
||||
virtual bool ok() const override;
|
||||
virtual StrRegexpMatcher *clone() const {
|
||||
return new StrRegexpMatcher(m_sexp);
|
||||
}
|
||||
const string& getreason()
|
||||
{
|
||||
return m_reason;
|
||||
}
|
||||
private:
|
||||
void *m_compiled;
|
||||
bool m_errcode;
|
||||
SimpleRegexp m_re;
|
||||
};
|
||||
|
||||
#endif /* _STRMATCHER_H_INCLUDED_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user