shared
This commit is contained in:
parent
4508b6b064
commit
476a3ba743
@ -6,7 +6,7 @@
|
||||
|
||||
PPA_KEYID=7808CE96D38B9201
|
||||
|
||||
RCLVERS=1.27.2
|
||||
RCLVERS=1.27.3
|
||||
SCOPEVERS=1.20.2.4
|
||||
GSSPVERS=1.0.0
|
||||
PPAVERS=2
|
||||
@ -51,7 +51,7 @@ debdir=debian
|
||||
# 19.10 eoan 2020-07
|
||||
# 20.04LTS focal 2025-04
|
||||
series="xenial bionic eoan focal"
|
||||
series=
|
||||
#series=
|
||||
|
||||
if test "X$series" != X ; then
|
||||
check_recoll_orig
|
||||
@ -137,7 +137,7 @@ done
|
||||
# 19.10 eoan 2020-07
|
||||
# 20.04LTS focal 2025-04
|
||||
series="xenial bionic eoan focal"
|
||||
series=focal
|
||||
series=
|
||||
|
||||
debdir=debiangssp
|
||||
if test ! -d ${debdir}/ ; then
|
||||
|
||||
@ -132,7 +132,7 @@ int ConfTabsW::addForeignPanel(ConfPanelWIF* w, const QString& title)
|
||||
m_widgets.push_back(w);
|
||||
QWidget *qw = dynamic_cast<QWidget *>(w);
|
||||
if (qw == 0) {
|
||||
qDebug() << "Can't cast panel to QWidget";
|
||||
qDebug() << "addForeignPanel: can't cast panel to QWidget";
|
||||
abort();
|
||||
}
|
||||
return tabWidget->addTab(qw, title);
|
||||
@ -227,9 +227,11 @@ ConfParamW *ConfTabsW::findParamW(const QString& varname)
|
||||
}
|
||||
void ConfTabsW::endOfList(int tabindex)
|
||||
{
|
||||
ConfPanelW *panel = (ConfPanelW*)tabWidget->widget(tabindex);
|
||||
if (nullptr == panel)
|
||||
ConfPanelW *panel = dynamic_cast<ConfPanelW*>(tabWidget->widget(tabindex));
|
||||
// panel may be null if this is a foreign panel (not a conftabsw)
|
||||
if (nullptr == panel) {
|
||||
return;
|
||||
}
|
||||
panel->endOfList();
|
||||
}
|
||||
|
||||
|
||||
@ -19,6 +19,11 @@
|
||||
|
||||
#include <errno.h>
|
||||
#include <fstream>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define localtime_r(A,B) localtime_s(B,A)
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -54,6 +59,17 @@ bool Logger::reopen(const std::string& fn)
|
||||
return true;
|
||||
}
|
||||
|
||||
const char *Logger::datestring()
|
||||
{
|
||||
time_t clk = time(0);
|
||||
struct tm tmb;
|
||||
localtime_r(&clk, &tmb);
|
||||
if (strftime(m_datebuf, LOGGER_DATESIZE, m_datefmt.c_str(), &tmb)) {
|
||||
return m_datebuf;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
static Logger *theLog;
|
||||
|
||||
Logger *Logger::getTheLog(const string& fn)
|
||||
|
||||
@ -18,11 +18,9 @@
|
||||
#define _LOG_H_X_INCLUDED_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
|
||||
#ifndef LOGGER_THREADSAFE
|
||||
#define LOGGER_THREADSAFE 1
|
||||
@ -33,30 +31,74 @@
|
||||
#endif
|
||||
|
||||
// Can't use the symbolic Logger::LLXX names in preproc. 6 is LLDEB1
|
||||
// STATICVERBOSITY is the level above which logging statements are
|
||||
// preproc'ed out (can't be dynamically turned on).
|
||||
#ifndef LOGGER_STATICVERBOSITY
|
||||
#define LOGGER_STATICVERBOSITY 5
|
||||
#endif
|
||||
|
||||
#define LOGGER_DATESIZE 100
|
||||
|
||||
/** @brief This is a singleton class. The logger pointer is obtained
|
||||
* when needed by calls to @ref getTheLog(), only the first of which
|
||||
* actually creates the object and initializes the output. */
|
||||
class Logger {
|
||||
public:
|
||||
/** Initialize logging to file name. Use "stderr" for stderr
|
||||
output. Creates the singleton logger object */
|
||||
* output. Creates the singleton logger object. Only the first
|
||||
* call changes the state, further ones just return the Logger
|
||||
* pointer. */
|
||||
static Logger *getTheLog(const std::string& fn);
|
||||
|
||||
/** Close and reopen the output file. For rotating the log: rename
|
||||
* then reopen. */
|
||||
bool reopen(const std::string& fn);
|
||||
|
||||
|
||||
/** Retrieve the output stream in case you need to write directly
|
||||
* to it. In a multithreaded program, you probably also need to obtain
|
||||
* the mutex with @ref getmutex, and lock it. */
|
||||
std::ostream& getstream() {
|
||||
return m_tocerr ? std::cerr : m_stream;
|
||||
}
|
||||
|
||||
/** @brief Log level values. Messages at level above the current will
|
||||
* not be printed. Messages at a level above
|
||||
* LOGGER_STATICVERBOSITY will not even be compiled in. */
|
||||
enum LogLevel {LLNON=0, LLFAT=1, LLERR=2, LLINF=3, LLDEB=4,
|
||||
LLDEB0=5, LLDEB1=6, LLDEB2=7};
|
||||
|
||||
/** @brief Set the log dynamic verbosity level */
|
||||
void setLogLevel(LogLevel level) {
|
||||
m_loglevel = level;
|
||||
}
|
||||
int getloglevel() {
|
||||
/** @brief Set the log dynamic verbosity level */
|
||||
void setloglevel(LogLevel level) {
|
||||
m_loglevel = level;
|
||||
}
|
||||
|
||||
/** @brief Retrieve the current log level */
|
||||
int getloglevel() const {
|
||||
return m_loglevel;
|
||||
}
|
||||
|
||||
/** @brief turn date logging on or off (default is off) */
|
||||
void logthedate(bool onoff) {
|
||||
m_logdate = onoff;
|
||||
}
|
||||
|
||||
bool loggingdate() const {
|
||||
return m_logdate;
|
||||
}
|
||||
|
||||
/** @brief Set the date format, as an strftime() format string.
|
||||
* Default: "%Y%m%d-%H%M%S" . */
|
||||
void setdateformat(const std::string fmt) {
|
||||
m_datefmt = fmt;
|
||||
}
|
||||
|
||||
/** Call with log locked */
|
||||
const char *datestring();
|
||||
|
||||
#if LOGGER_THREADSAFE
|
||||
std::recursive_mutex& getmutex() {
|
||||
return m_mutex;
|
||||
@ -65,13 +107,15 @@ public:
|
||||
|
||||
private:
|
||||
bool m_tocerr{false};
|
||||
bool m_logdate{false};
|
||||
int m_loglevel{LLERR};
|
||||
std::string m_datefmt{"%Y%m%d-%H%M%S"};
|
||||
std::string m_fn;
|
||||
std::ofstream m_stream;
|
||||
#if LOGGER_THREADSAFE
|
||||
std::recursive_mutex m_mutex;
|
||||
#endif
|
||||
|
||||
char m_datebuf[LOGGER_DATESIZE];
|
||||
Logger(const std::string& fn);
|
||||
Logger(const Logger &);
|
||||
Logger& operator=(const Logger &);
|
||||
@ -93,10 +137,14 @@ private:
|
||||
#define LOGGER_LEVEL (Logger::getTheLog("")->getloglevel() + \
|
||||
LOGGER_LOCAL_LOGINC)
|
||||
|
||||
#define LOGGER_DOLOG(L,X) LOGGER_PRT << ":" << L << ":" << \
|
||||
__FILE__ << ":" << __LINE__ << "::" << X \
|
||||
#define LOGGER_DATE (Logger::getTheLog("")->loggingdate() ? \
|
||||
Logger::getTheLog("")->datestring() : "")
|
||||
|
||||
#define LOGGER_DOLOG(L,X) LOGGER_PRT << LOGGER_DATE << ":" << L << ":" << \
|
||||
__FILE__ << ":" << __LINE__ << "::" << X \
|
||||
<< std::flush
|
||||
|
||||
|
||||
#if LOGGER_STATICVERBOSITY >= 7
|
||||
#define LOGDEB2(X) { \
|
||||
if (LOGGER_LEVEL >= Logger::LLDEB2) { \
|
||||
@ -142,6 +190,10 @@ private:
|
||||
#endif
|
||||
|
||||
#if LOGGER_STATICVERBOSITY >= 3
|
||||
/** Log a message at level INFO. Other macros exist for other levels (LOGFAT,
|
||||
* LOGERR, LOGINF, LOGDEB, LOGDEB0... Use as:
|
||||
* LOGINF("some text" << other stuff << ... << "\n");
|
||||
*/
|
||||
#define LOGINF(X) { \
|
||||
if (LOGGER_LEVEL >= Logger::LLINF) { \
|
||||
LOGGER_LOCK; \
|
||||
|
||||
@ -56,7 +56,6 @@
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
@ -88,12 +87,12 @@
|
||||
#endif
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOGDI
|
||||
#define MAXPATHLEN PATH_MAX
|
||||
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <direct.h>
|
||||
#include <Shlobj.h>
|
||||
#include <Stringapiset.h>
|
||||
|
||||
#if !defined(S_IFLNK)
|
||||
#define S_IFLNK 0
|
||||
@ -104,11 +103,13 @@
|
||||
#ifndef S_ISREG
|
||||
# define S_ISREG(ST_MODE) (((ST_MODE) & _S_IFMT) == _S_IFREG)
|
||||
#endif
|
||||
|
||||
#include <direct.h>
|
||||
|
||||
#include <Shlobj.h>
|
||||
#include <Stringapiset.h>
|
||||
#define MAXPATHLEN PATH_MAX
|
||||
#ifndef PATH_MAX
|
||||
#define PATH_MAX MAX_PATH
|
||||
#endif
|
||||
#ifndef R_OK
|
||||
#define R_OK 4
|
||||
#endif
|
||||
|
||||
#define STAT _wstati64
|
||||
#define LSTAT _wstati64
|
||||
@ -130,7 +131,9 @@
|
||||
// For getpid
|
||||
#include <process.h>
|
||||
#define getpid _getpid
|
||||
#endif
|
||||
|
||||
#define PATHUT_SSIZE_T int
|
||||
#endif // _MSC_VER
|
||||
|
||||
#else /* !_WIN32 -> */
|
||||
|
||||
@ -159,8 +162,19 @@
|
||||
|
||||
#endif /* !_WIN32 */
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <msvc_dirent.h>
|
||||
#else // !_MSC_VER
|
||||
#include <dirent.h>
|
||||
#endif // _MSC_VER
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifndef PATHUT_SSIZE_T
|
||||
#define PATHUT_SSIZE_T ssize_t
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
std::string wchartoutf8(const wchar_t *in, size_t len)
|
||||
@ -1401,7 +1415,7 @@ int Pidfile::write_pid()
|
||||
char pidstr[20];
|
||||
sprintf(pidstr, "%u", int(getpid()));
|
||||
lseek(m_fd, 0, 0);
|
||||
if (::write(m_fd, pidstr, strlen(pidstr)) != (ssize_t)strlen(pidstr)) {
|
||||
if (::write(m_fd, pidstr, strlen(pidstr)) != (PATHUT_SSIZE_T)strlen(pidstr)) {
|
||||
m_reason = "write failed";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -143,11 +143,7 @@ extern bool path_unlink(const std::string& path);
|
||||
* extension. On other OSes, just builds the fstream. We'd need to
|
||||
* find a way to make this work with g++. It would be easier in this
|
||||
* case to use a FILE (_openw(), then fdopen()), but conftree really
|
||||
* depends on std::iostream. One possible workaround for g++ would be
|
||||
* to use shortpaths (which we already use to pass file names to
|
||||
* xapian and aspell). Most of the problems are caused by the home
|
||||
* directory name being non-ASCII, so returning a short path in
|
||||
* path_home() would probably solve everything (but not pretty).
|
||||
* depends on std::iostream.
|
||||
*
|
||||
* @param path an utf-8 file path.
|
||||
* @param mode is an std::fstream mode (ios::in etc.) */
|
||||
|
||||
@ -66,7 +66,7 @@ extern void stringtolower(std::string& io);
|
||||
extern std::string stringtolower(const std::string& io);
|
||||
extern void stringtoupper(std::string& io);
|
||||
extern std::string stringtoupper(const std::string& io);
|
||||
extern bool beginswith(const std::string& big, const std::string& small);
|
||||
extern bool beginswith(const std::string& bg, const std::string& sml);
|
||||
|
||||
// Is one string the end part of the other ?
|
||||
extern int stringisuffcmp(const std::string& s1, const std::string& s2);
|
||||
|
||||
@ -16,21 +16,114 @@
|
||||
*/
|
||||
|
||||
#include "utf8iter.h"
|
||||
#include <string>
|
||||
|
||||
using std::string;
|
||||
#include <unordered_set>
|
||||
#include <iostream>
|
||||
|
||||
void utf8truncate(std::string& s, int maxlen)
|
||||
using namespace std;
|
||||
|
||||
void utf8truncate(std::string& s, int maxlen, int flags, string ellipsis,
|
||||
string ws)
|
||||
{
|
||||
if (s.size() <= string::size_type(maxlen)) {
|
||||
return;
|
||||
}
|
||||
unordered_set<int> wss;
|
||||
if (flags & UTF8T_ATWORD) {
|
||||
Utf8Iter iter(ws);
|
||||
for (; !iter.eof(); iter++) {
|
||||
unsigned int c = *iter;
|
||||
wss.insert(c);
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & UTF8T_ELLIPSIS) {
|
||||
size_t ellen = utf8len(ellipsis);
|
||||
if (maxlen > int(ellen)) {
|
||||
maxlen -= ellen;
|
||||
} else {
|
||||
maxlen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Utf8Iter iter(s);
|
||||
string::size_type pos = 0;
|
||||
while (iter++ != string::npos)
|
||||
if (iter.getBpos() < string::size_type(maxlen)) {
|
||||
pos = iter.getBpos();
|
||||
string::size_type lastwspos = 0;
|
||||
for (; !iter.eof(); iter++) {
|
||||
unsigned int c = *iter;
|
||||
if (iter.getCpos() < string::size_type(maxlen)) {
|
||||
pos = iter.getBpos() + iter.getBlen();
|
||||
if ((flags & UTF8T_ATWORD) && wss.find(c) != wss.end()) {
|
||||
lastwspos = pos;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
s.erase(pos);
|
||||
if (flags & UTF8T_ATWORD) {
|
||||
s.erase(lastwspos);
|
||||
for (;;) {
|
||||
Utf8Iter iter(s);
|
||||
unsigned int c = 0;
|
||||
for (; !iter.eof(); iter++) {
|
||||
c = *iter;
|
||||
pos = iter.getBpos();
|
||||
}
|
||||
if (wss.find(c) == wss.end()) {
|
||||
break;
|
||||
}
|
||||
s.erase(pos);
|
||||
}
|
||||
} else {
|
||||
s.erase(pos);
|
||||
}
|
||||
|
||||
if (flags & UTF8T_ELLIPSIS) {
|
||||
s += ellipsis;
|
||||
}
|
||||
}
|
||||
|
||||
size_t utf8len(const string& s)
|
||||
{
|
||||
size_t len = 0;
|
||||
Utf8Iter iter(s);
|
||||
while (iter++ != string::npos) {
|
||||
len++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static const std::string replchar{"\xef\xbf\xbd"};
|
||||
|
||||
// Check utf-8 encoding, replacing errors with the ? char above
|
||||
int utf8check(const std::string& in, std::string& out, bool fixit, int maxrepl)
|
||||
{
|
||||
int cnt = 0;
|
||||
Utf8Iter it(in);
|
||||
for (;!it.eof(); it++) {
|
||||
if (it.error()) {
|
||||
if (!fixit) {
|
||||
return -1;
|
||||
}
|
||||
out += replchar;
|
||||
++cnt;
|
||||
for (; cnt < maxrepl; cnt++) {
|
||||
it.retryfurther();
|
||||
if (it.eof())
|
||||
return cnt;
|
||||
if (!it.error())
|
||||
break;
|
||||
out += replchar;
|
||||
}
|
||||
if (it.error()) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// We have reached a good char and eof is false
|
||||
if (fixit) {
|
||||
it.appendchartostring(out);
|
||||
}
|
||||
}
|
||||
return cnt;
|
||||
}
|
||||
|
||||
@ -32,110 +32,120 @@
|
||||
class Utf8Iter {
|
||||
public:
|
||||
Utf8Iter(const std::string &in)
|
||||
: m_sp(&in) {
|
||||
update_cl();
|
||||
: m_sp(&in) {
|
||||
update_cl();
|
||||
}
|
||||
|
||||
const std::string& buffer() const {
|
||||
return *m_sp;
|
||||
}
|
||||
|
||||
|
||||
void rewind() {
|
||||
m_cl = 0;
|
||||
m_pos = 0;
|
||||
m_charpos = 0;
|
||||
update_cl();
|
||||
m_cl = 0;
|
||||
m_pos = 0;
|
||||
m_charpos = 0;
|
||||
update_cl();
|
||||
}
|
||||
|
||||
void retryfurther() {
|
||||
if (eof())
|
||||
return;
|
||||
m_pos++;
|
||||
if (eof()) {
|
||||
return;
|
||||
}
|
||||
update_cl();
|
||||
}
|
||||
|
||||
/** "Direct" access. Awfully inefficient as we skip from start or current
|
||||
* position at best. This can only be useful for a lookahead from the
|
||||
* current position */
|
||||
unsigned int operator[](std::string::size_type charpos) const {
|
||||
std::string::size_type mypos = 0;
|
||||
unsigned int mycp = 0;
|
||||
if (charpos >= m_charpos) {
|
||||
mypos = m_pos;
|
||||
mycp = m_charpos;
|
||||
}
|
||||
int l;
|
||||
while (mypos < m_sp->length() && mycp != charpos) {
|
||||
l = get_cl(mypos);
|
||||
if (l <= 0 || !poslok(mypos, l) || !checkvalidat(mypos, l))
|
||||
std::string::size_type mypos = 0;
|
||||
unsigned int mycp = 0;
|
||||
if (charpos >= m_charpos) {
|
||||
mypos = m_pos;
|
||||
mycp = m_charpos;
|
||||
}
|
||||
int l;
|
||||
while (mypos < m_sp->length() && mycp != charpos) {
|
||||
l = get_cl(mypos);
|
||||
if (l <= 0 || !poslok(mypos, l) || !checkvalidat(mypos, l))
|
||||
return (unsigned int)-1;
|
||||
mypos += l;
|
||||
++mycp;
|
||||
}
|
||||
if (mypos < m_sp->length() && mycp == charpos) {
|
||||
l = get_cl(mypos);
|
||||
if (poslok(mypos, l) && checkvalidat(mypos, l))
|
||||
return getvalueat(mypos, l);
|
||||
}
|
||||
return (unsigned int)-1;
|
||||
mypos += l;
|
||||
++mycp;
|
||||
}
|
||||
if (mypos < m_sp->length() && mycp == charpos) {
|
||||
l = get_cl(mypos);
|
||||
if (poslok(mypos, l) && checkvalidat(mypos, l))
|
||||
return getvalueat(mypos, l);
|
||||
}
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
/** Increment current position to next utf-8 char */
|
||||
std::string::size_type operator++(int) {
|
||||
// Note: m_cl may be zero at eof if user's test not right
|
||||
// this shouldn't crash the program until actual data access
|
||||
// Note: m_cl may be zero at eof if user's test not right
|
||||
// this shouldn't crash the program until actual data access
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(m_cl != 0);
|
||||
assert(m_cl != 0);
|
||||
#endif
|
||||
if (m_cl == 0)
|
||||
return std::string::npos;
|
||||
if (m_cl == 0)
|
||||
return std::string::npos;
|
||||
|
||||
m_pos += m_cl;
|
||||
m_charpos++;
|
||||
update_cl();
|
||||
return m_pos;
|
||||
m_pos += m_cl;
|
||||
m_charpos++;
|
||||
update_cl();
|
||||
return m_pos;
|
||||
}
|
||||
|
||||
/** operator* returns the ucs4 value as a machine integer*/
|
||||
unsigned int operator*() {
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(m_cl > 0);
|
||||
assert(m_cl > 0);
|
||||
#endif
|
||||
return m_cl == 0 ? (unsigned int)-1 : getvalueat(m_pos, m_cl);
|
||||
return m_cl == 0 ? (unsigned int)-1 : getvalueat(m_pos, m_cl);
|
||||
}
|
||||
|
||||
/** Append current utf-8 possibly multi-byte character to string param.
|
||||
This needs to be fast. No error checking. */
|
||||
This needs to be fast. No error checking. */
|
||||
unsigned int appendchartostring(std::string &out) const {
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(m_cl != 0);
|
||||
assert(m_cl != 0);
|
||||
#endif
|
||||
out.append(&(*m_sp)[m_pos], m_cl);
|
||||
return m_cl;
|
||||
out.append(&(*m_sp)[m_pos], m_cl);
|
||||
return m_cl;
|
||||
}
|
||||
|
||||
/** Return current character as string */
|
||||
operator std::string() {
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(m_cl != 0);
|
||||
assert(m_cl != 0);
|
||||
#endif
|
||||
return m_cl > 0 ? m_sp->substr(m_pos, m_cl) : std::string();
|
||||
return m_cl > 0 ? m_sp->substr(m_pos, m_cl) : std::string();
|
||||
}
|
||||
|
||||
bool eof() const {
|
||||
return m_pos == m_sp->length();
|
||||
return m_pos == m_sp->length();
|
||||
}
|
||||
|
||||
bool error() const {
|
||||
return m_cl == 0;
|
||||
return m_cl == 0;
|
||||
}
|
||||
|
||||
/** Return current byte offset in input string */
|
||||
std::string::size_type getBpos() const {
|
||||
return m_pos;
|
||||
return m_pos;
|
||||
}
|
||||
|
||||
/** Return current character length */
|
||||
std::string::size_type getBlen() const {
|
||||
return m_cl;
|
||||
return m_cl;
|
||||
}
|
||||
|
||||
/** Return current unicode character offset in input string */
|
||||
std::string::size_type getCpos() const {
|
||||
return m_charpos;
|
||||
return m_charpos;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -151,128 +161,143 @@ private:
|
||||
|
||||
// Check position and cl against string length
|
||||
bool poslok(std::string::size_type p, int l) const {
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(p != std::string::npos && l > 0 && p + l <= m_sp->length());
|
||||
#endif
|
||||
return p != std::string::npos && l > 0 && p + l <= m_sp->length();
|
||||
return p != std::string::npos && l > 0 && p + l <= m_sp->length();
|
||||
}
|
||||
|
||||
// Update current char length in object state, check
|
||||
// for errors
|
||||
inline void update_cl() {
|
||||
m_cl = 0;
|
||||
if (m_pos >= m_sp->length())
|
||||
return;
|
||||
m_cl = get_cl(m_pos);
|
||||
if (!poslok(m_pos, m_cl)) {
|
||||
// Used to set eof here for safety, but this is bad because it
|
||||
// basically prevents the caller to discriminate error and eof.
|
||||
// m_pos = m_sp->length();
|
||||
m_cl = 0;
|
||||
return;
|
||||
}
|
||||
if (!checkvalidat(m_pos, m_cl)) {
|
||||
m_cl = 0;
|
||||
}
|
||||
if (m_pos >= m_sp->length())
|
||||
return;
|
||||
m_cl = get_cl(m_pos);
|
||||
if (!poslok(m_pos, m_cl)) {
|
||||
// Used to set eof here for safety, but this is bad because it
|
||||
// basically prevents the caller to discriminate error and eof.
|
||||
// m_pos = m_sp->length();
|
||||
m_cl = 0;
|
||||
return;
|
||||
}
|
||||
if (!checkvalidat(m_pos, m_cl)) {
|
||||
m_cl = 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool checkvalidat(std::string::size_type p, int l) const {
|
||||
switch (l) {
|
||||
case 1:
|
||||
return (unsigned char)(*m_sp)[p] < 128;
|
||||
case 2:
|
||||
return (((unsigned char)(*m_sp)[p]) & 224) == 192
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128;
|
||||
case 3:
|
||||
return (((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
;
|
||||
case 4:
|
||||
return (((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||
;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (l) {
|
||||
case 1:
|
||||
return (unsigned char)(*m_sp)[p] < 128;
|
||||
case 2:
|
||||
return (((unsigned char)(*m_sp)[p]) & 224) == 192
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128;
|
||||
case 3:
|
||||
return (((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
;
|
||||
case 4:
|
||||
return (((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||
;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Get character byte length at specified position. Returns 0 for error.
|
||||
inline int get_cl(std::string::size_type p) const {
|
||||
unsigned int z = (unsigned char)(*m_sp)[p];
|
||||
if (z <= 127) {
|
||||
return 1;
|
||||
} else if ((z & 224) == 192) {
|
||||
return 2;
|
||||
} else if ((z & 240) == 224) {
|
||||
return 3;
|
||||
} else if ((z & 248) == 240) {
|
||||
return 4;
|
||||
}
|
||||
unsigned int z = (unsigned char)(*m_sp)[p];
|
||||
if (z <= 127) {
|
||||
return 1;
|
||||
} else if ((z & 224) == 192) {
|
||||
return 2;
|
||||
} else if ((z & 240) == 224) {
|
||||
return 3;
|
||||
} else if ((z & 248) == 240) {
|
||||
return 4;
|
||||
}
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(z <= 127 || (z & 224) == 192 || (z & 240) == 224 ||
|
||||
(z & 248) == 240);
|
||||
assert(z <= 127 || (z & 224) == 192 || (z & 240) == 224 ||
|
||||
(z & 248) == 240);
|
||||
#endif
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Compute value at given position. No error checking.
|
||||
inline unsigned int getvalueat(std::string::size_type p, int l) const {
|
||||
switch (l) {
|
||||
case 1:
|
||||
switch (l) {
|
||||
case 1:
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert((unsigned char)(*m_sp)[p] < 128);
|
||||
assert((unsigned char)(*m_sp)[p] < 128);
|
||||
#endif
|
||||
return (unsigned char)(*m_sp)[p];
|
||||
case 2:
|
||||
return (unsigned char)(*m_sp)[p];
|
||||
case 2:
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(
|
||||
((unsigned char)(*m_sp)[p] & 224) == 192
|
||||
&& ((unsigned char)(*m_sp)[p+1] & 192) == 128
|
||||
);
|
||||
assert(
|
||||
((unsigned char)(*m_sp)[p] & 224) == 192
|
||||
&& ((unsigned char)(*m_sp)[p+1] & 192) == 128
|
||||
);
|
||||
#endif
|
||||
return ((unsigned char)(*m_sp)[p] - 192) * 64 +
|
||||
(unsigned char)(*m_sp)[p+1] - 128 ;
|
||||
case 3:
|
||||
return ((unsigned char)(*m_sp)[p] - 192) * 64 +
|
||||
(unsigned char)(*m_sp)[p+1] - 128 ;
|
||||
case 3:
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(
|
||||
(((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
);
|
||||
assert(
|
||||
(((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
);
|
||||
#endif
|
||||
|
||||
return ((unsigned char)(*m_sp)[p] - 224) * 4096 +
|
||||
((unsigned char)(*m_sp)[p+1] - 128) * 64 +
|
||||
(unsigned char)(*m_sp)[p+2] - 128;
|
||||
case 4:
|
||||
return ((unsigned char)(*m_sp)[p] - 224) * 4096 +
|
||||
((unsigned char)(*m_sp)[p+1] - 128) * 64 +
|
||||
(unsigned char)(*m_sp)[p+2] - 128;
|
||||
case 4:
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(
|
||||
(((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||
);
|
||||
assert(
|
||||
(((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||
);
|
||||
#endif
|
||||
|
||||
return ((unsigned char)(*m_sp)[p]-240)*262144 +
|
||||
((unsigned char)(*m_sp)[p+1]-128)*4096 +
|
||||
((unsigned char)(*m_sp)[p+2]-128)*64 +
|
||||
(unsigned char)(*m_sp)[p+3]-128;
|
||||
return ((unsigned char)(*m_sp)[p]-240)*262144 +
|
||||
((unsigned char)(*m_sp)[p+1]-128)*4096 +
|
||||
((unsigned char)(*m_sp)[p+2]-128)*64 +
|
||||
(unsigned char)(*m_sp)[p+3]-128;
|
||||
|
||||
default:
|
||||
default:
|
||||
#ifdef UTF8ITER_CHECK
|
||||
assert(l <= 4);
|
||||
assert(l <= 4);
|
||||
#endif
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
extern void utf8truncate(std::string& s, int maxlen);
|
||||
enum Utf8TruncateFlag {UTF8T_NONE, UTF8T_ATWORD, UTF8T_ELLIPSIS};
|
||||
// maxlen is in utf-8 chars.
|
||||
extern void utf8truncate(std::string& s, int maxlen, int flags = 0,
|
||||
std::string ellipsis = "...",
|
||||
std::string ws = " \t\n\r");
|
||||
extern size_t utf8len(const std::string& s);
|
||||
|
||||
/** @brief Check and possibly fix string by replacing badly encoded
|
||||
* characters with the standard question mark replacement character.
|
||||
*
|
||||
* @param in the string to check
|
||||
* @param[out] if fixit is true, the fixed output string
|
||||
* @param fixit if true, copy a fixed string to out
|
||||
* @param maxrepl maximum replacements before we bail out
|
||||
* @return -1 for failure (fixit false or maxrepl reached).
|
||||
* 0 or positive: replacement count.
|
||||
*/
|
||||
extern int utf8check(
|
||||
const std::string& in, std::string& out, bool fixit=false, int maxrepl=100);
|
||||
|
||||
#endif /* _UTF8ITER_H_INCLUDED_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user