shared
This commit is contained in:
parent
4508b6b064
commit
476a3ba743
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
PPA_KEYID=7808CE96D38B9201
|
PPA_KEYID=7808CE96D38B9201
|
||||||
|
|
||||||
RCLVERS=1.27.2
|
RCLVERS=1.27.3
|
||||||
SCOPEVERS=1.20.2.4
|
SCOPEVERS=1.20.2.4
|
||||||
GSSPVERS=1.0.0
|
GSSPVERS=1.0.0
|
||||||
PPAVERS=2
|
PPAVERS=2
|
||||||
@ -51,7 +51,7 @@ debdir=debian
|
|||||||
# 19.10 eoan 2020-07
|
# 19.10 eoan 2020-07
|
||||||
# 20.04LTS focal 2025-04
|
# 20.04LTS focal 2025-04
|
||||||
series="xenial bionic eoan focal"
|
series="xenial bionic eoan focal"
|
||||||
series=
|
#series=
|
||||||
|
|
||||||
if test "X$series" != X ; then
|
if test "X$series" != X ; then
|
||||||
check_recoll_orig
|
check_recoll_orig
|
||||||
@ -137,7 +137,7 @@ done
|
|||||||
# 19.10 eoan 2020-07
|
# 19.10 eoan 2020-07
|
||||||
# 20.04LTS focal 2025-04
|
# 20.04LTS focal 2025-04
|
||||||
series="xenial bionic eoan focal"
|
series="xenial bionic eoan focal"
|
||||||
series=focal
|
series=
|
||||||
|
|
||||||
debdir=debiangssp
|
debdir=debiangssp
|
||||||
if test ! -d ${debdir}/ ; then
|
if test ! -d ${debdir}/ ; then
|
||||||
|
|||||||
@ -132,7 +132,7 @@ int ConfTabsW::addForeignPanel(ConfPanelWIF* w, const QString& title)
|
|||||||
m_widgets.push_back(w);
|
m_widgets.push_back(w);
|
||||||
QWidget *qw = dynamic_cast<QWidget *>(w);
|
QWidget *qw = dynamic_cast<QWidget *>(w);
|
||||||
if (qw == 0) {
|
if (qw == 0) {
|
||||||
qDebug() << "Can't cast panel to QWidget";
|
qDebug() << "addForeignPanel: can't cast panel to QWidget";
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
return tabWidget->addTab(qw, title);
|
return tabWidget->addTab(qw, title);
|
||||||
@ -227,9 +227,11 @@ ConfParamW *ConfTabsW::findParamW(const QString& varname)
|
|||||||
}
|
}
|
||||||
void ConfTabsW::endOfList(int tabindex)
|
void ConfTabsW::endOfList(int tabindex)
|
||||||
{
|
{
|
||||||
ConfPanelW *panel = (ConfPanelW*)tabWidget->widget(tabindex);
|
ConfPanelW *panel = dynamic_cast<ConfPanelW*>(tabWidget->widget(tabindex));
|
||||||
if (nullptr == panel)
|
// panel may be null if this is a foreign panel (not a conftabsw)
|
||||||
|
if (nullptr == panel) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
panel->endOfList();
|
panel->endOfList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -19,6 +19,11 @@
|
|||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define localtime_r(A,B) localtime_s(B,A)
|
||||||
|
#endif
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -54,6 +59,17 @@ bool Logger::reopen(const std::string& fn)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *Logger::datestring()
|
||||||
|
{
|
||||||
|
time_t clk = time(0);
|
||||||
|
struct tm tmb;
|
||||||
|
localtime_r(&clk, &tmb);
|
||||||
|
if (strftime(m_datebuf, LOGGER_DATESIZE, m_datefmt.c_str(), &tmb)) {
|
||||||
|
return m_datebuf;
|
||||||
|
} else {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
static Logger *theLog;
|
static Logger *theLog;
|
||||||
|
|
||||||
Logger *Logger::getTheLog(const string& fn)
|
Logger *Logger::getTheLog(const string& fn)
|
||||||
|
|||||||
@ -18,11 +18,9 @@
|
|||||||
#define _LOG_H_X_INCLUDED_
|
#define _LOG_H_X_INCLUDED_
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
#ifndef LOGGER_THREADSAFE
|
#ifndef LOGGER_THREADSAFE
|
||||||
#define LOGGER_THREADSAFE 1
|
#define LOGGER_THREADSAFE 1
|
||||||
@ -33,30 +31,74 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Can't use the symbolic Logger::LLXX names in preproc. 6 is LLDEB1
|
// Can't use the symbolic Logger::LLXX names in preproc. 6 is LLDEB1
|
||||||
|
// STATICVERBOSITY is the level above which logging statements are
|
||||||
|
// preproc'ed out (can't be dynamically turned on).
|
||||||
#ifndef LOGGER_STATICVERBOSITY
|
#ifndef LOGGER_STATICVERBOSITY
|
||||||
#define LOGGER_STATICVERBOSITY 5
|
#define LOGGER_STATICVERBOSITY 5
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define LOGGER_DATESIZE 100
|
||||||
|
|
||||||
|
/** @brief This is a singleton class. The logger pointer is obtained
|
||||||
|
* when needed by calls to @ref getTheLog(), only the first of which
|
||||||
|
* actually creates the object and initializes the output. */
|
||||||
class Logger {
|
class Logger {
|
||||||
public:
|
public:
|
||||||
/** Initialize logging to file name. Use "stderr" for stderr
|
/** Initialize logging to file name. Use "stderr" for stderr
|
||||||
output. Creates the singleton logger object */
|
* output. Creates the singleton logger object. Only the first
|
||||||
|
* call changes the state, further ones just return the Logger
|
||||||
|
* pointer. */
|
||||||
static Logger *getTheLog(const std::string& fn);
|
static Logger *getTheLog(const std::string& fn);
|
||||||
|
|
||||||
|
/** Close and reopen the output file. For rotating the log: rename
|
||||||
|
* then reopen. */
|
||||||
bool reopen(const std::string& fn);
|
bool reopen(const std::string& fn);
|
||||||
|
|
||||||
|
/** Retrieve the output stream in case you need to write directly
|
||||||
|
* to it. In a multithreaded program, you probably also need to obtain
|
||||||
|
* the mutex with @ref getmutex, and lock it. */
|
||||||
std::ostream& getstream() {
|
std::ostream& getstream() {
|
||||||
return m_tocerr ? std::cerr : m_stream;
|
return m_tocerr ? std::cerr : m_stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @brief Log level values. Messages at level above the current will
|
||||||
|
* not be printed. Messages at a level above
|
||||||
|
* LOGGER_STATICVERBOSITY will not even be compiled in. */
|
||||||
enum LogLevel {LLNON=0, LLFAT=1, LLERR=2, LLINF=3, LLDEB=4,
|
enum LogLevel {LLNON=0, LLFAT=1, LLERR=2, LLINF=3, LLDEB=4,
|
||||||
LLDEB0=5, LLDEB1=6, LLDEB2=7};
|
LLDEB0=5, LLDEB1=6, LLDEB2=7};
|
||||||
|
|
||||||
|
/** @brief Set the log dynamic verbosity level */
|
||||||
void setLogLevel(LogLevel level) {
|
void setLogLevel(LogLevel level) {
|
||||||
m_loglevel = level;
|
m_loglevel = level;
|
||||||
}
|
}
|
||||||
int getloglevel() {
|
/** @brief Set the log dynamic verbosity level */
|
||||||
|
void setloglevel(LogLevel level) {
|
||||||
|
m_loglevel = level;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Retrieve the current log level */
|
||||||
|
int getloglevel() const {
|
||||||
return m_loglevel;
|
return m_loglevel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @brief turn date logging on or off (default is off) */
|
||||||
|
void logthedate(bool onoff) {
|
||||||
|
m_logdate = onoff;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool loggingdate() const {
|
||||||
|
return m_logdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @brief Set the date format, as an strftime() format string.
|
||||||
|
* Default: "%Y%m%d-%H%M%S" . */
|
||||||
|
void setdateformat(const std::string fmt) {
|
||||||
|
m_datefmt = fmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Call with log locked */
|
||||||
|
const char *datestring();
|
||||||
|
|
||||||
#if LOGGER_THREADSAFE
|
#if LOGGER_THREADSAFE
|
||||||
std::recursive_mutex& getmutex() {
|
std::recursive_mutex& getmutex() {
|
||||||
return m_mutex;
|
return m_mutex;
|
||||||
@ -65,13 +107,15 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
bool m_tocerr{false};
|
bool m_tocerr{false};
|
||||||
|
bool m_logdate{false};
|
||||||
int m_loglevel{LLERR};
|
int m_loglevel{LLERR};
|
||||||
|
std::string m_datefmt{"%Y%m%d-%H%M%S"};
|
||||||
std::string m_fn;
|
std::string m_fn;
|
||||||
std::ofstream m_stream;
|
std::ofstream m_stream;
|
||||||
#if LOGGER_THREADSAFE
|
#if LOGGER_THREADSAFE
|
||||||
std::recursive_mutex m_mutex;
|
std::recursive_mutex m_mutex;
|
||||||
#endif
|
#endif
|
||||||
|
char m_datebuf[LOGGER_DATESIZE];
|
||||||
Logger(const std::string& fn);
|
Logger(const std::string& fn);
|
||||||
Logger(const Logger &);
|
Logger(const Logger &);
|
||||||
Logger& operator=(const Logger &);
|
Logger& operator=(const Logger &);
|
||||||
@ -93,10 +137,14 @@ private:
|
|||||||
#define LOGGER_LEVEL (Logger::getTheLog("")->getloglevel() + \
|
#define LOGGER_LEVEL (Logger::getTheLog("")->getloglevel() + \
|
||||||
LOGGER_LOCAL_LOGINC)
|
LOGGER_LOCAL_LOGINC)
|
||||||
|
|
||||||
#define LOGGER_DOLOG(L,X) LOGGER_PRT << ":" << L << ":" << \
|
#define LOGGER_DATE (Logger::getTheLog("")->loggingdate() ? \
|
||||||
__FILE__ << ":" << __LINE__ << "::" << X \
|
Logger::getTheLog("")->datestring() : "")
|
||||||
|
|
||||||
|
#define LOGGER_DOLOG(L,X) LOGGER_PRT << LOGGER_DATE << ":" << L << ":" << \
|
||||||
|
__FILE__ << ":" << __LINE__ << "::" << X \
|
||||||
<< std::flush
|
<< std::flush
|
||||||
|
|
||||||
|
|
||||||
#if LOGGER_STATICVERBOSITY >= 7
|
#if LOGGER_STATICVERBOSITY >= 7
|
||||||
#define LOGDEB2(X) { \
|
#define LOGDEB2(X) { \
|
||||||
if (LOGGER_LEVEL >= Logger::LLDEB2) { \
|
if (LOGGER_LEVEL >= Logger::LLDEB2) { \
|
||||||
@ -142,6 +190,10 @@ private:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if LOGGER_STATICVERBOSITY >= 3
|
#if LOGGER_STATICVERBOSITY >= 3
|
||||||
|
/** Log a message at level INFO. Other macros exist for other levels (LOGFAT,
|
||||||
|
* LOGERR, LOGINF, LOGDEB, LOGDEB0... Use as:
|
||||||
|
* LOGINF("some text" << other stuff << ... << "\n");
|
||||||
|
*/
|
||||||
#define LOGINF(X) { \
|
#define LOGINF(X) { \
|
||||||
if (LOGGER_LEVEL >= Logger::LLINF) { \
|
if (LOGGER_LEVEL >= Logger::LLINF) { \
|
||||||
LOGGER_LOCK; \
|
LOGGER_LOCK; \
|
||||||
|
|||||||
@ -56,7 +56,6 @@
|
|||||||
|
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <dirent.h>
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -88,12 +87,12 @@
|
|||||||
#endif
|
#endif
|
||||||
#define WIN32_LEAN_AND_MEAN
|
#define WIN32_LEAN_AND_MEAN
|
||||||
#define NOGDI
|
#define NOGDI
|
||||||
#define MAXPATHLEN PATH_MAX
|
|
||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
|
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#include <direct.h>
|
||||||
|
#include <Shlobj.h>
|
||||||
|
#include <Stringapiset.h>
|
||||||
|
|
||||||
#if !defined(S_IFLNK)
|
#if !defined(S_IFLNK)
|
||||||
#define S_IFLNK 0
|
#define S_IFLNK 0
|
||||||
@ -104,11 +103,13 @@
|
|||||||
#ifndef S_ISREG
|
#ifndef S_ISREG
|
||||||
# define S_ISREG(ST_MODE) (((ST_MODE) & _S_IFMT) == _S_IFREG)
|
# define S_ISREG(ST_MODE) (((ST_MODE) & _S_IFMT) == _S_IFREG)
|
||||||
#endif
|
#endif
|
||||||
|
#define MAXPATHLEN PATH_MAX
|
||||||
#include <direct.h>
|
#ifndef PATH_MAX
|
||||||
|
#define PATH_MAX MAX_PATH
|
||||||
#include <Shlobj.h>
|
#endif
|
||||||
#include <Stringapiset.h>
|
#ifndef R_OK
|
||||||
|
#define R_OK 4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define STAT _wstati64
|
#define STAT _wstati64
|
||||||
#define LSTAT _wstati64
|
#define LSTAT _wstati64
|
||||||
@ -130,7 +131,9 @@
|
|||||||
// For getpid
|
// For getpid
|
||||||
#include <process.h>
|
#include <process.h>
|
||||||
#define getpid _getpid
|
#define getpid _getpid
|
||||||
#endif
|
|
||||||
|
#define PATHUT_SSIZE_T int
|
||||||
|
#endif // _MSC_VER
|
||||||
|
|
||||||
#else /* !_WIN32 -> */
|
#else /* !_WIN32 -> */
|
||||||
|
|
||||||
@ -159,8 +162,19 @@
|
|||||||
|
|
||||||
#endif /* !_WIN32 */
|
#endif /* !_WIN32 */
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <msvc_dirent.h>
|
||||||
|
#else // !_MSC_VER
|
||||||
|
#include <dirent.h>
|
||||||
|
#endif // _MSC_VER
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
#ifndef PATHUT_SSIZE_T
|
||||||
|
#define PATHUT_SSIZE_T ssize_t
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
std::string wchartoutf8(const wchar_t *in, size_t len)
|
std::string wchartoutf8(const wchar_t *in, size_t len)
|
||||||
@ -1401,7 +1415,7 @@ int Pidfile::write_pid()
|
|||||||
char pidstr[20];
|
char pidstr[20];
|
||||||
sprintf(pidstr, "%u", int(getpid()));
|
sprintf(pidstr, "%u", int(getpid()));
|
||||||
lseek(m_fd, 0, 0);
|
lseek(m_fd, 0, 0);
|
||||||
if (::write(m_fd, pidstr, strlen(pidstr)) != (ssize_t)strlen(pidstr)) {
|
if (::write(m_fd, pidstr, strlen(pidstr)) != (PATHUT_SSIZE_T)strlen(pidstr)) {
|
||||||
m_reason = "write failed";
|
m_reason = "write failed";
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -143,11 +143,7 @@ extern bool path_unlink(const std::string& path);
|
|||||||
* extension. On other OSes, just builds the fstream. We'd need to
|
* extension. On other OSes, just builds the fstream. We'd need to
|
||||||
* find a way to make this work with g++. It would be easier in this
|
* find a way to make this work with g++. It would be easier in this
|
||||||
* case to use a FILE (_openw(), then fdopen()), but conftree really
|
* case to use a FILE (_openw(), then fdopen()), but conftree really
|
||||||
* depends on std::iostream. One possible workaround for g++ would be
|
* depends on std::iostream.
|
||||||
* to use shortpaths (which we already use to pass file names to
|
|
||||||
* xapian and aspell). Most of the problems are caused by the home
|
|
||||||
* directory name being non-ASCII, so returning a short path in
|
|
||||||
* path_home() would probably solve everything (but not pretty).
|
|
||||||
*
|
*
|
||||||
* @param path an utf-8 file path.
|
* @param path an utf-8 file path.
|
||||||
* @param mode is an std::fstream mode (ios::in etc.) */
|
* @param mode is an std::fstream mode (ios::in etc.) */
|
||||||
|
|||||||
@ -66,7 +66,7 @@ extern void stringtolower(std::string& io);
|
|||||||
extern std::string stringtolower(const std::string& io);
|
extern std::string stringtolower(const std::string& io);
|
||||||
extern void stringtoupper(std::string& io);
|
extern void stringtoupper(std::string& io);
|
||||||
extern std::string stringtoupper(const std::string& io);
|
extern std::string stringtoupper(const std::string& io);
|
||||||
extern bool beginswith(const std::string& big, const std::string& small);
|
extern bool beginswith(const std::string& bg, const std::string& sml);
|
||||||
|
|
||||||
// Is one string the end part of the other ?
|
// Is one string the end part of the other ?
|
||||||
extern int stringisuffcmp(const std::string& s1, const std::string& s2);
|
extern int stringisuffcmp(const std::string& s1, const std::string& s2);
|
||||||
|
|||||||
@ -16,21 +16,114 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "utf8iter.h"
|
#include "utf8iter.h"
|
||||||
#include <string>
|
|
||||||
|
|
||||||
using std::string;
|
#include <unordered_set>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
void utf8truncate(std::string& s, int maxlen)
|
using namespace std;
|
||||||
|
|
||||||
|
void utf8truncate(std::string& s, int maxlen, int flags, string ellipsis,
|
||||||
|
string ws)
|
||||||
{
|
{
|
||||||
if (s.size() <= string::size_type(maxlen)) {
|
if (s.size() <= string::size_type(maxlen)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
unordered_set<int> wss;
|
||||||
|
if (flags & UTF8T_ATWORD) {
|
||||||
|
Utf8Iter iter(ws);
|
||||||
|
for (; !iter.eof(); iter++) {
|
||||||
|
unsigned int c = *iter;
|
||||||
|
wss.insert(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & UTF8T_ELLIPSIS) {
|
||||||
|
size_t ellen = utf8len(ellipsis);
|
||||||
|
if (maxlen > int(ellen)) {
|
||||||
|
maxlen -= ellen;
|
||||||
|
} else {
|
||||||
|
maxlen = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Utf8Iter iter(s);
|
Utf8Iter iter(s);
|
||||||
string::size_type pos = 0;
|
string::size_type pos = 0;
|
||||||
while (iter++ != string::npos)
|
string::size_type lastwspos = 0;
|
||||||
if (iter.getBpos() < string::size_type(maxlen)) {
|
for (; !iter.eof(); iter++) {
|
||||||
pos = iter.getBpos();
|
unsigned int c = *iter;
|
||||||
|
if (iter.getCpos() < string::size_type(maxlen)) {
|
||||||
|
pos = iter.getBpos() + iter.getBlen();
|
||||||
|
if ((flags & UTF8T_ATWORD) && wss.find(c) != wss.end()) {
|
||||||
|
lastwspos = pos;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
s.erase(pos);
|
if (flags & UTF8T_ATWORD) {
|
||||||
|
s.erase(lastwspos);
|
||||||
|
for (;;) {
|
||||||
|
Utf8Iter iter(s);
|
||||||
|
unsigned int c = 0;
|
||||||
|
for (; !iter.eof(); iter++) {
|
||||||
|
c = *iter;
|
||||||
|
pos = iter.getBpos();
|
||||||
|
}
|
||||||
|
if (wss.find(c) == wss.end()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s.erase(pos);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s.erase(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & UTF8T_ELLIPSIS) {
|
||||||
|
s += ellipsis;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t utf8len(const string& s)
|
||||||
|
{
|
||||||
|
size_t len = 0;
|
||||||
|
Utf8Iter iter(s);
|
||||||
|
while (iter++ != string::npos) {
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const std::string replchar{"\xef\xbf\xbd"};
|
||||||
|
|
||||||
|
// Check utf-8 encoding, replacing errors with the ? char above
|
||||||
|
int utf8check(const std::string& in, std::string& out, bool fixit, int maxrepl)
|
||||||
|
{
|
||||||
|
int cnt = 0;
|
||||||
|
Utf8Iter it(in);
|
||||||
|
for (;!it.eof(); it++) {
|
||||||
|
if (it.error()) {
|
||||||
|
if (!fixit) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
out += replchar;
|
||||||
|
++cnt;
|
||||||
|
for (; cnt < maxrepl; cnt++) {
|
||||||
|
it.retryfurther();
|
||||||
|
if (it.eof())
|
||||||
|
return cnt;
|
||||||
|
if (!it.error())
|
||||||
|
break;
|
||||||
|
out += replchar;
|
||||||
|
}
|
||||||
|
if (it.error()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We have reached a good char and eof is false
|
||||||
|
if (fixit) {
|
||||||
|
it.appendchartostring(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cnt;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -32,110 +32,120 @@
|
|||||||
class Utf8Iter {
|
class Utf8Iter {
|
||||||
public:
|
public:
|
||||||
Utf8Iter(const std::string &in)
|
Utf8Iter(const std::string &in)
|
||||||
: m_sp(&in) {
|
: m_sp(&in) {
|
||||||
update_cl();
|
update_cl();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string& buffer() const {
|
const std::string& buffer() const {
|
||||||
return *m_sp;
|
return *m_sp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void rewind() {
|
void rewind() {
|
||||||
m_cl = 0;
|
m_cl = 0;
|
||||||
m_pos = 0;
|
m_pos = 0;
|
||||||
m_charpos = 0;
|
m_charpos = 0;
|
||||||
update_cl();
|
update_cl();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void retryfurther() {
|
||||||
|
if (eof())
|
||||||
|
return;
|
||||||
|
m_pos++;
|
||||||
|
if (eof()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
update_cl();
|
||||||
|
}
|
||||||
|
|
||||||
/** "Direct" access. Awfully inefficient as we skip from start or current
|
/** "Direct" access. Awfully inefficient as we skip from start or current
|
||||||
* position at best. This can only be useful for a lookahead from the
|
* position at best. This can only be useful for a lookahead from the
|
||||||
* current position */
|
* current position */
|
||||||
unsigned int operator[](std::string::size_type charpos) const {
|
unsigned int operator[](std::string::size_type charpos) const {
|
||||||
std::string::size_type mypos = 0;
|
std::string::size_type mypos = 0;
|
||||||
unsigned int mycp = 0;
|
unsigned int mycp = 0;
|
||||||
if (charpos >= m_charpos) {
|
if (charpos >= m_charpos) {
|
||||||
mypos = m_pos;
|
mypos = m_pos;
|
||||||
mycp = m_charpos;
|
mycp = m_charpos;
|
||||||
}
|
}
|
||||||
int l;
|
int l;
|
||||||
while (mypos < m_sp->length() && mycp != charpos) {
|
while (mypos < m_sp->length() && mycp != charpos) {
|
||||||
l = get_cl(mypos);
|
l = get_cl(mypos);
|
||||||
if (l <= 0 || !poslok(mypos, l) || !checkvalidat(mypos, l))
|
if (l <= 0 || !poslok(mypos, l) || !checkvalidat(mypos, l))
|
||||||
|
return (unsigned int)-1;
|
||||||
|
mypos += l;
|
||||||
|
++mycp;
|
||||||
|
}
|
||||||
|
if (mypos < m_sp->length() && mycp == charpos) {
|
||||||
|
l = get_cl(mypos);
|
||||||
|
if (poslok(mypos, l) && checkvalidat(mypos, l))
|
||||||
|
return getvalueat(mypos, l);
|
||||||
|
}
|
||||||
return (unsigned int)-1;
|
return (unsigned int)-1;
|
||||||
mypos += l;
|
|
||||||
++mycp;
|
|
||||||
}
|
|
||||||
if (mypos < m_sp->length() && mycp == charpos) {
|
|
||||||
l = get_cl(mypos);
|
|
||||||
if (poslok(mypos, l) && checkvalidat(mypos, l))
|
|
||||||
return getvalueat(mypos, l);
|
|
||||||
}
|
|
||||||
return (unsigned int)-1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Increment current position to next utf-8 char */
|
/** Increment current position to next utf-8 char */
|
||||||
std::string::size_type operator++(int) {
|
std::string::size_type operator++(int) {
|
||||||
// Note: m_cl may be zero at eof if user's test not right
|
// Note: m_cl may be zero at eof if user's test not right
|
||||||
// this shouldn't crash the program until actual data access
|
// this shouldn't crash the program until actual data access
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(m_cl != 0);
|
assert(m_cl != 0);
|
||||||
#endif
|
#endif
|
||||||
if (m_cl == 0)
|
if (m_cl == 0)
|
||||||
return std::string::npos;
|
return std::string::npos;
|
||||||
|
|
||||||
m_pos += m_cl;
|
m_pos += m_cl;
|
||||||
m_charpos++;
|
m_charpos++;
|
||||||
update_cl();
|
update_cl();
|
||||||
return m_pos;
|
return m_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** operator* returns the ucs4 value as a machine integer*/
|
/** operator* returns the ucs4 value as a machine integer*/
|
||||||
unsigned int operator*() {
|
unsigned int operator*() {
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(m_cl > 0);
|
assert(m_cl > 0);
|
||||||
#endif
|
#endif
|
||||||
return m_cl == 0 ? (unsigned int)-1 : getvalueat(m_pos, m_cl);
|
return m_cl == 0 ? (unsigned int)-1 : getvalueat(m_pos, m_cl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Append current utf-8 possibly multi-byte character to string param.
|
/** Append current utf-8 possibly multi-byte character to string param.
|
||||||
This needs to be fast. No error checking. */
|
This needs to be fast. No error checking. */
|
||||||
unsigned int appendchartostring(std::string &out) const {
|
unsigned int appendchartostring(std::string &out) const {
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(m_cl != 0);
|
assert(m_cl != 0);
|
||||||
#endif
|
#endif
|
||||||
out.append(&(*m_sp)[m_pos], m_cl);
|
out.append(&(*m_sp)[m_pos], m_cl);
|
||||||
return m_cl;
|
return m_cl;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return current character as string */
|
/** Return current character as string */
|
||||||
operator std::string() {
|
operator std::string() {
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(m_cl != 0);
|
assert(m_cl != 0);
|
||||||
#endif
|
#endif
|
||||||
return m_cl > 0 ? m_sp->substr(m_pos, m_cl) : std::string();
|
return m_cl > 0 ? m_sp->substr(m_pos, m_cl) : std::string();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool eof() const {
|
bool eof() const {
|
||||||
return m_pos == m_sp->length();
|
return m_pos == m_sp->length();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool error() const {
|
bool error() const {
|
||||||
return m_cl == 0;
|
return m_cl == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return current byte offset in input string */
|
/** Return current byte offset in input string */
|
||||||
std::string::size_type getBpos() const {
|
std::string::size_type getBpos() const {
|
||||||
return m_pos;
|
return m_pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return current character length */
|
/** Return current character length */
|
||||||
std::string::size_type getBlen() const {
|
std::string::size_type getBlen() const {
|
||||||
return m_cl;
|
return m_cl;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return current unicode character offset in input string */
|
/** Return current unicode character offset in input string */
|
||||||
std::string::size_type getCpos() const {
|
std::string::size_type getCpos() const {
|
||||||
return m_charpos;
|
return m_charpos;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -151,128 +161,143 @@ private:
|
|||||||
|
|
||||||
// Check position and cl against string length
|
// Check position and cl against string length
|
||||||
bool poslok(std::string::size_type p, int l) const {
|
bool poslok(std::string::size_type p, int l) const {
|
||||||
#ifdef UTF8ITER_CHECK
|
return p != std::string::npos && l > 0 && p + l <= m_sp->length();
|
||||||
assert(p != std::string::npos && l > 0 && p + l <= m_sp->length());
|
|
||||||
#endif
|
|
||||||
return p != std::string::npos && l > 0 && p + l <= m_sp->length();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update current char length in object state, check
|
// Update current char length in object state, check
|
||||||
// for errors
|
// for errors
|
||||||
inline void update_cl() {
|
inline void update_cl() {
|
||||||
m_cl = 0;
|
|
||||||
if (m_pos >= m_sp->length())
|
|
||||||
return;
|
|
||||||
m_cl = get_cl(m_pos);
|
|
||||||
if (!poslok(m_pos, m_cl)) {
|
|
||||||
// Used to set eof here for safety, but this is bad because it
|
|
||||||
// basically prevents the caller to discriminate error and eof.
|
|
||||||
// m_pos = m_sp->length();
|
|
||||||
m_cl = 0;
|
m_cl = 0;
|
||||||
return;
|
if (m_pos >= m_sp->length())
|
||||||
}
|
return;
|
||||||
if (!checkvalidat(m_pos, m_cl)) {
|
m_cl = get_cl(m_pos);
|
||||||
m_cl = 0;
|
if (!poslok(m_pos, m_cl)) {
|
||||||
}
|
// Used to set eof here for safety, but this is bad because it
|
||||||
|
// basically prevents the caller to discriminate error and eof.
|
||||||
|
// m_pos = m_sp->length();
|
||||||
|
m_cl = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!checkvalidat(m_pos, m_cl)) {
|
||||||
|
m_cl = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool checkvalidat(std::string::size_type p, int l) const {
|
inline bool checkvalidat(std::string::size_type p, int l) const {
|
||||||
switch (l) {
|
switch (l) {
|
||||||
case 1:
|
case 1:
|
||||||
return (unsigned char)(*m_sp)[p] < 128;
|
return (unsigned char)(*m_sp)[p] < 128;
|
||||||
case 2:
|
case 2:
|
||||||
return (((unsigned char)(*m_sp)[p]) & 224) == 192
|
return (((unsigned char)(*m_sp)[p]) & 224) == 192
|
||||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128;
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128;
|
||||||
case 3:
|
case 3:
|
||||||
return (((unsigned char)(*m_sp)[p]) & 240) == 224
|
return (((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
;
|
;
|
||||||
case 4:
|
case 4:
|
||||||
return (((unsigned char)(*m_sp)[p]) & 248) == 240
|
return (((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||||
;
|
;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get character byte length at specified position. Returns 0 for error.
|
// Get character byte length at specified position. Returns 0 for error.
|
||||||
inline int get_cl(std::string::size_type p) const {
|
inline int get_cl(std::string::size_type p) const {
|
||||||
unsigned int z = (unsigned char)(*m_sp)[p];
|
unsigned int z = (unsigned char)(*m_sp)[p];
|
||||||
if (z <= 127) {
|
if (z <= 127) {
|
||||||
return 1;
|
return 1;
|
||||||
} else if ((z & 224) == 192) {
|
} else if ((z & 224) == 192) {
|
||||||
return 2;
|
return 2;
|
||||||
} else if ((z & 240) == 224) {
|
} else if ((z & 240) == 224) {
|
||||||
return 3;
|
return 3;
|
||||||
} else if ((z & 248) == 240) {
|
} else if ((z & 248) == 240) {
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(z <= 127 || (z & 224) == 192 || (z & 240) == 224 ||
|
assert(z <= 127 || (z & 224) == 192 || (z & 240) == 224 ||
|
||||||
(z & 248) == 240);
|
(z & 248) == 240);
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute value at given position. No error checking.
|
// Compute value at given position. No error checking.
|
||||||
inline unsigned int getvalueat(std::string::size_type p, int l) const {
|
inline unsigned int getvalueat(std::string::size_type p, int l) const {
|
||||||
switch (l) {
|
switch (l) {
|
||||||
case 1:
|
case 1:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert((unsigned char)(*m_sp)[p] < 128);
|
assert((unsigned char)(*m_sp)[p] < 128);
|
||||||
#endif
|
#endif
|
||||||
return (unsigned char)(*m_sp)[p];
|
return (unsigned char)(*m_sp)[p];
|
||||||
case 2:
|
case 2:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(
|
assert(
|
||||||
((unsigned char)(*m_sp)[p] & 224) == 192
|
((unsigned char)(*m_sp)[p] & 224) == 192
|
||||||
&& ((unsigned char)(*m_sp)[p+1] & 192) == 128
|
&& ((unsigned char)(*m_sp)[p+1] & 192) == 128
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
return ((unsigned char)(*m_sp)[p] - 192) * 64 +
|
return ((unsigned char)(*m_sp)[p] - 192) * 64 +
|
||||||
(unsigned char)(*m_sp)[p+1] - 128 ;
|
(unsigned char)(*m_sp)[p+1] - 128 ;
|
||||||
case 3:
|
case 3:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(
|
assert(
|
||||||
(((unsigned char)(*m_sp)[p]) & 240) == 224
|
(((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return ((unsigned char)(*m_sp)[p] - 224) * 4096 +
|
return ((unsigned char)(*m_sp)[p] - 224) * 4096 +
|
||||||
((unsigned char)(*m_sp)[p+1] - 128) * 64 +
|
((unsigned char)(*m_sp)[p+1] - 128) * 64 +
|
||||||
(unsigned char)(*m_sp)[p+2] - 128;
|
(unsigned char)(*m_sp)[p+2] - 128;
|
||||||
case 4:
|
case 4:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(
|
assert(
|
||||||
(((unsigned char)(*m_sp)[p]) & 248) == 240
|
(((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||||
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return ((unsigned char)(*m_sp)[p]-240)*262144 +
|
return ((unsigned char)(*m_sp)[p]-240)*262144 +
|
||||||
((unsigned char)(*m_sp)[p+1]-128)*4096 +
|
((unsigned char)(*m_sp)[p+1]-128)*4096 +
|
||||||
((unsigned char)(*m_sp)[p+2]-128)*64 +
|
((unsigned char)(*m_sp)[p+2]-128)*64 +
|
||||||
(unsigned char)(*m_sp)[p+3]-128;
|
(unsigned char)(*m_sp)[p+3]-128;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(l <= 4);
|
assert(l <= 4);
|
||||||
#endif
|
#endif
|
||||||
return (unsigned int)-1;
|
return (unsigned int)-1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
extern void utf8truncate(std::string& s, int maxlen);
|
enum Utf8TruncateFlag {UTF8T_NONE, UTF8T_ATWORD, UTF8T_ELLIPSIS};
|
||||||
|
// maxlen is in utf-8 chars.
|
||||||
|
extern void utf8truncate(std::string& s, int maxlen, int flags = 0,
|
||||||
|
std::string ellipsis = "...",
|
||||||
|
std::string ws = " \t\n\r");
|
||||||
|
extern size_t utf8len(const std::string& s);
|
||||||
|
|
||||||
|
/** @brief Check and possibly fix string by replacing badly encoded
|
||||||
|
* characters with the standard question mark replacement character.
|
||||||
|
*
|
||||||
|
* @param in the string to check
|
||||||
|
* @param[out] if fixit is true, the fixed output string
|
||||||
|
* @param fixit if true, copy a fixed string to out
|
||||||
|
* @param maxrepl maximum replacements before we bail out
|
||||||
|
* @return -1 for failure (fixit false or maxrepl reached).
|
||||||
|
* 0 or positive: replacement count.
|
||||||
|
*/
|
||||||
|
extern int utf8check(
|
||||||
|
const std::string& in, std::string& out, bool fixit=false, int maxrepl=100);
|
||||||
|
|
||||||
#endif /* _UTF8ITER_H_INCLUDED_ */
|
#endif /* _UTF8ITER_H_INCLUDED_ */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user