Moved some recoll-specific code from smallut to rclutil
This commit is contained in:
parent
960a4649d3
commit
225b59e5ee
@ -41,6 +41,7 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
#include "rclutil.h"
|
#include "rclutil.h"
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
@ -648,9 +649,72 @@ bool thumbPathForUrl(const string& url, int size, string& path)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compare charset names, removing the more common spelling variations
|
||||||
|
bool samecharset(const string& cs1, const string& cs2)
|
||||||
|
{
|
||||||
|
auto mcs1 = std::accumulate(cs1.begin(), cs1.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; });
|
||||||
|
auto mcs2 = std::accumulate(cs2.begin(), cs2.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; });
|
||||||
|
return mcs1 == mcs2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const std::unordered_map<string, string> lang_to_code {
|
||||||
|
{"be", "cp1251"},
|
||||||
|
{"bg", "cp1251"},
|
||||||
|
{"cs", "iso-8859-2"},
|
||||||
|
{"el", "iso-8859-7"},
|
||||||
|
{"he", "iso-8859-8"},
|
||||||
|
{"hr", "iso-8859-2"},
|
||||||
|
{"hu", "iso-8859-2"},
|
||||||
|
{"ja", "eucjp"},
|
||||||
|
{"kk", "pt154"},
|
||||||
|
{"ko", "euckr"},
|
||||||
|
{"lt", "iso-8859-13"},
|
||||||
|
{"lv", "iso-8859-13"},
|
||||||
|
{"pl", "iso-8859-2"},
|
||||||
|
{"rs", "iso-8859-2"},
|
||||||
|
{"ro", "iso-8859-2"},
|
||||||
|
{"ru", "koi8-r"},
|
||||||
|
{"sk", "iso-8859-2"},
|
||||||
|
{"sl", "iso-8859-2"},
|
||||||
|
{"sr", "iso-8859-2"},
|
||||||
|
{"th", "iso-8859-11"},
|
||||||
|
{"tr", "iso-8859-9"},
|
||||||
|
{"uk", "koi8-u"},
|
||||||
|
};
|
||||||
|
|
||||||
|
string langtocode(const string& lang)
|
||||||
|
{
|
||||||
|
const auto it = lang_to_code.find(lang);
|
||||||
|
|
||||||
|
// Use cp1252 by default...
|
||||||
|
if (it == lang_to_code.end()) {
|
||||||
|
return cstr_cp1252;
|
||||||
|
}
|
||||||
|
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
string localelang()
|
||||||
|
{
|
||||||
|
const char *lang = getenv("LANG");
|
||||||
|
|
||||||
|
if (lang == nullptr || *lang == 0 || !strcmp(lang, "C") ||
|
||||||
|
!strcmp(lang, "POSIX")) {
|
||||||
|
return "en";
|
||||||
|
}
|
||||||
|
string locale(lang);
|
||||||
|
string::size_type under = locale.find_first_of('_');
|
||||||
|
if (under == string::npos) {
|
||||||
|
return locale;
|
||||||
|
}
|
||||||
|
return locale.substr(0, under);
|
||||||
|
}
|
||||||
|
|
||||||
void rclutil_init_mt()
|
void rclutil_init_mt()
|
||||||
{
|
{
|
||||||
path_pkgdatadir();
|
path_pkgdatadir();
|
||||||
tmplocation();
|
tmplocation();
|
||||||
thumbnailsdir();
|
thumbnailsdir();
|
||||||
|
// Init langtocode() static table
|
||||||
|
langtocode("");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -122,4 +122,11 @@ template <class T> void map_ss_cp_noshr(T s, T *d);
|
|||||||
template <class T> void addmeta(T& store, const std::string& nm,
|
template <class T> void addmeta(T& store, const std::string& nm,
|
||||||
const std::string& value);
|
const std::string& value);
|
||||||
|
|
||||||
|
// Compare charset names, removing the more common spelling variations
|
||||||
|
extern bool samecharset(const std::string& cs1, const std::string& cs2);
|
||||||
|
// Divine language from locale
|
||||||
|
extern std::string localelang();
|
||||||
|
// Divine 8bit charset from language
|
||||||
|
extern std::string langtocode(const std::string& lang);
|
||||||
|
|
||||||
#endif /* _RCLUTIL_H_INCLUDED_ */
|
#endif /* _RCLUTIL_H_INCLUDED_ */
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2006-2016 J.F.Dockes
|
/* Copyright (C) 2006-2020 J.F.Dockes
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
@ -15,10 +15,24 @@
|
|||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
* 02110-1301 USA
|
* 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
#include "smallut.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
|
#include <cerrno>
|
||||||
|
#include <cinttypes>
|
||||||
|
#include <cmath>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cinttypes>
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
|
#include <iostream>
|
||||||
|
#include <list>
|
||||||
|
#include <numeric>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
// needed for localtime_r under mingw?
|
// needed for localtime_r under mingw?
|
||||||
@ -28,11 +42,6 @@
|
|||||||
#endif /* _MSC_VER */
|
#endif /* _MSC_VER */
|
||||||
#endif /* _WIN32 */
|
#endif /* _WIN32 */
|
||||||
|
|
||||||
#include <ctime>
|
|
||||||
#include <cctype>
|
|
||||||
#include <cerrno>
|
|
||||||
#include <cstring>
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
// Older compilers don't support stdc++ regex, but Windows does not
|
// Older compilers don't support stdc++ regex, but Windows does not
|
||||||
// have the Linux one. Have a simple class to solve the simple cases.
|
// have the Linux one. Have a simple class to solve the simple cases.
|
||||||
@ -44,48 +53,13 @@
|
|||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <string>
|
|
||||||
#include <iostream>
|
|
||||||
#include <list>
|
|
||||||
#include <numeric>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
#include "smallut.h"
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
int stringicmp(const string& s1, const string& s2)
|
int stringicmp(const string& s1, const string& s2)
|
||||||
{
|
{
|
||||||
string::const_iterator it1 = s1.begin();
|
return strcasecmp(s1.c_str(), s2.c_str());
|
||||||
string::const_iterator it2 = s2.begin();
|
|
||||||
string::size_type size1 = s1.length(), size2 = s2.length();
|
|
||||||
char c1, c2;
|
|
||||||
|
|
||||||
if (size1 < size2) {
|
|
||||||
while (it1 != s1.end()) {
|
|
||||||
c1 = ::toupper(*it1);
|
|
||||||
c2 = ::toupper(*it2);
|
|
||||||
if (c1 != c2) {
|
|
||||||
return c1 > c2 ? 1 : -1;
|
|
||||||
}
|
|
||||||
++it1;
|
|
||||||
++it2;
|
|
||||||
}
|
|
||||||
return size1 == size2 ? 0 : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (it2 != s2.end()) {
|
|
||||||
c1 = ::toupper(*it1);
|
|
||||||
c2 = ::toupper(*it2);
|
|
||||||
if (c1 != c2) {
|
|
||||||
return c1 > c2 ? 1 : -1;
|
|
||||||
}
|
|
||||||
++it1;
|
|
||||||
++it2;
|
|
||||||
}
|
|
||||||
return size1 == size2 ? 0 : 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void stringtolower(string& io)
|
void stringtolower(string& io)
|
||||||
{
|
{
|
||||||
std::transform(io.begin(), io.end(), io.begin(), [](unsigned char c) { return std::tolower(c); });
|
std::transform(io.begin(), io.end(), io.begin(), [](unsigned char c) { return std::tolower(c); });
|
||||||
@ -110,22 +84,6 @@ string stringtoupper(const string& i)
|
|||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int stringisuffcmp(const string& s1, const string& s2)
|
|
||||||
{
|
|
||||||
string::const_reverse_iterator r1 = s1.rbegin(), re1 = s1.rend(),
|
|
||||||
r2 = s2.rbegin(), re2 = s2.rend();
|
|
||||||
while (r1 != re1 && r2 != re2) {
|
|
||||||
char c1 = ::toupper(*r1);
|
|
||||||
char c2 = ::toupper(*r2);
|
|
||||||
if (c1 != c2) {
|
|
||||||
return c1 > c2 ? 1 : -1;
|
|
||||||
}
|
|
||||||
++r1;
|
|
||||||
++r2;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// s1 is already lowercase
|
// s1 is already lowercase
|
||||||
int stringlowercmp(const string& s1, const string& s2)
|
int stringlowercmp(const string& s1, const string& s2)
|
||||||
{
|
{
|
||||||
@ -193,14 +151,6 @@ bool beginswith(const std::string& big, const std::string& small)
|
|||||||
return big.compare(0, small.size(), small) == 0;
|
return big.compare(0, small.size(), small) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compare charset names, removing the more common spelling variations
|
|
||||||
bool samecharset(const string& cs1, const string& cs2)
|
|
||||||
{
|
|
||||||
auto mcs1 = std::accumulate(cs1.begin(), cs1.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; });
|
|
||||||
auto mcs2 = std::accumulate(cs2.begin(), cs2.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; });
|
|
||||||
return mcs1 == mcs2;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T> bool stringToStrings(const string& s, T& tokens,
|
template <class T> bool stringToStrings(const string& s, T& tokens,
|
||||||
const string& addseps)
|
const string& addseps)
|
||||||
{
|
{
|
||||||
@ -310,15 +260,6 @@ template <class T> bool stringToStrings(const string& s, T& tokens,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template bool stringToStrings<list<string> >(const string&,
|
|
||||||
list<string>&, const string&);
|
|
||||||
template bool stringToStrings<vector<string> >(const string&,
|
|
||||||
vector<string>&, const string&);
|
|
||||||
template bool stringToStrings<set<string> >(const string&,
|
|
||||||
set<string>&, const string&);
|
|
||||||
template bool stringToStrings<std::unordered_set<string> >
|
|
||||||
(const string&, std::unordered_set<string>&, const string&);
|
|
||||||
|
|
||||||
template <class T> void stringsToString(const T& tokens, string& s)
|
template <class T> void stringsToString(const T& tokens, string& s)
|
||||||
{
|
{
|
||||||
for (auto it = tokens.begin();
|
for (auto it = tokens.begin();
|
||||||
@ -347,20 +288,13 @@ template <class T> void stringsToString(const T& tokens, string& s)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template void stringsToString<list<string> >(const list<string>&, string&);
|
|
||||||
template void stringsToString<vector<string> >(const vector<string>&, string&);
|
|
||||||
template void stringsToString<set<string> >(const set<string>&, string&);
|
|
||||||
template void stringsToString<unordered_set<string> >(const unordered_set<string>&, string&);
|
|
||||||
template <class T> string stringsToString(const T& tokens)
|
template <class T> string stringsToString(const T& tokens)
|
||||||
{
|
{
|
||||||
string out;
|
string out;
|
||||||
stringsToString<T>(tokens, out);
|
stringsToString<T>(tokens, out);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
template string stringsToString<list<string> >(const list<string>&);
|
|
||||||
template string stringsToString<vector<string> >(const vector<string>&);
|
|
||||||
template string stringsToString<set<string> >(const set<string>&);
|
|
||||||
template string stringsToString<unordered_set<string> >(const unordered_set<string>&);
|
|
||||||
|
|
||||||
template <class T> void stringsToCSV(const T& tokens, string& s,
|
template <class T> void stringsToCSV(const T& tokens, string& s,
|
||||||
char sep)
|
char sep)
|
||||||
@ -392,9 +326,30 @@ template <class T> void stringsToCSV(const T& tokens, string& s,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef SMALLUT_EXTERNAL_INSTANTIATIONS
|
||||||
|
#include "smallut_instantiate.h"
|
||||||
|
#else
|
||||||
|
template bool stringToStrings<list<string> >(const string&,
|
||||||
|
list<string>&, const string&);
|
||||||
|
template bool stringToStrings<vector<string> >(const string&,
|
||||||
|
vector<string>&, const string&);
|
||||||
|
template bool stringToStrings<set<string> >(const string&,
|
||||||
|
set<string>&, const string&);
|
||||||
|
template bool stringToStrings<std::unordered_set<string> >
|
||||||
|
(const string&, std::unordered_set<string>&, const string&);
|
||||||
|
template void stringsToString<list<string> >(const list<string>&, string&);
|
||||||
|
template void stringsToString<vector<string> >(const vector<string>&, string&);
|
||||||
|
template void stringsToString<set<string> >(const set<string>&, string&);
|
||||||
|
template void stringsToString<unordered_set<string> >(const unordered_set<string>&, string&);
|
||||||
|
template string stringsToString<list<string> >(const list<string>&);
|
||||||
|
template string stringsToString<vector<string> >(const vector<string>&);
|
||||||
|
template string stringsToString<set<string> >(const set<string>&);
|
||||||
|
template string stringsToString<unordered_set<string> >(const unordered_set<string>&);
|
||||||
template void stringsToCSV<list<string> >(const list<string>&, string&, char);
|
template void stringsToCSV<list<string> >(const list<string>&, string&, char);
|
||||||
template void stringsToCSV<vector<string> >(const vector<string>&, string&,
|
template void stringsToCSV<vector<string> >(const vector<string>&, string&,
|
||||||
char);
|
char);
|
||||||
|
#endif
|
||||||
|
|
||||||
void stringToTokens(const string& str, vector<string>& tokens,
|
void stringToTokens(const string& str, vector<string>& tokens,
|
||||||
const string& delims, bool skipinit)
|
const string& delims, bool skipinit)
|
||||||
@ -1203,61 +1158,7 @@ void catstrerror(string *reason, const char *what, int _errno)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef SMALLUT_NO_REGEX
|
||||||
static const std::unordered_map<string, string> lang_to_code {
|
|
||||||
{"be", "cp1251"},
|
|
||||||
{"bg", "cp1251"},
|
|
||||||
{"cs", "iso-8859-2"},
|
|
||||||
{"el", "iso-8859-7"},
|
|
||||||
{"he", "iso-8859-8"},
|
|
||||||
{"hr", "iso-8859-2"},
|
|
||||||
{"hu", "iso-8859-2"},
|
|
||||||
{"ja", "eucjp"},
|
|
||||||
{"kk", "pt154"},
|
|
||||||
{"ko", "euckr"},
|
|
||||||
{"lt", "iso-8859-13"},
|
|
||||||
{"lv", "iso-8859-13"},
|
|
||||||
{"pl", "iso-8859-2"},
|
|
||||||
{"rs", "iso-8859-2"},
|
|
||||||
{"ro", "iso-8859-2"},
|
|
||||||
{"ru", "koi8-r"},
|
|
||||||
{"sk", "iso-8859-2"},
|
|
||||||
{"sl", "iso-8859-2"},
|
|
||||||
{"sr", "iso-8859-2"},
|
|
||||||
{"th", "iso-8859-11"},
|
|
||||||
{"tr", "iso-8859-9"},
|
|
||||||
{"uk", "koi8-u"},
|
|
||||||
};
|
|
||||||
static const string cstr_cp1252("CP1252");
|
|
||||||
|
|
||||||
string langtocode(const string& lang)
|
|
||||||
{
|
|
||||||
const auto it = lang_to_code.find(lang);
|
|
||||||
|
|
||||||
// Use cp1252 by default...
|
|
||||||
if (it == lang_to_code.end()) {
|
|
||||||
return cstr_cp1252;
|
|
||||||
}
|
|
||||||
|
|
||||||
return it->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
string localelang()
|
|
||||||
{
|
|
||||||
const char *lang = getenv("LANG");
|
|
||||||
|
|
||||||
if (lang == nullptr || *lang == 0 || !strcmp(lang, "C") ||
|
|
||||||
!strcmp(lang, "POSIX")) {
|
|
||||||
return "en";
|
|
||||||
}
|
|
||||||
string locale(lang);
|
|
||||||
string::size_type under = locale.find_first_of('_');
|
|
||||||
if (under == string::npos) {
|
|
||||||
return locale;
|
|
||||||
}
|
|
||||||
return locale.substr(0, under);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef USE_STD_REGEX
|
#ifdef USE_STD_REGEX
|
||||||
|
|
||||||
class SimpleRegexp::Internal {
|
class SimpleRegexp::Internal {
|
||||||
@ -1283,6 +1184,17 @@ bool SimpleRegexp::simpleMatch(const string& val) const
|
|||||||
return regex_search(val, m->res, m->expr);
|
return regex_search(val, m->res, m->expr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Substitute one instance of regular expression
|
||||||
|
std::string SimpleRegexp::simpleSub(
|
||||||
|
const std::string& in, const std::string& repl)
|
||||||
|
{
|
||||||
|
if (!ok()) {
|
||||||
|
return std::string();
|
||||||
|
}
|
||||||
|
return regex_replace(
|
||||||
|
in, m->expr, repl, std::regex_constants::format_first_only);
|
||||||
|
}
|
||||||
|
|
||||||
string SimpleRegexp::getMatch(const string&, int i) const
|
string SimpleRegexp::getMatch(const string&, int i) const
|
||||||
{
|
{
|
||||||
return m->res.str(i);
|
return m->res.str(i);
|
||||||
@ -1309,6 +1221,36 @@ public:
|
|||||||
vector<regmatch_t> matches;
|
vector<regmatch_t> matches;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Substitute one instance of regular expression
|
||||||
|
std::string SimpleRegexp::simpleSub(
|
||||||
|
const std::string& in, const std::string& repl)
|
||||||
|
{
|
||||||
|
if (!ok()) {
|
||||||
|
return std::string();
|
||||||
|
}
|
||||||
|
|
||||||
|
int err;
|
||||||
|
if ((err = regexec(&m->expr, in.c_str(),
|
||||||
|
m->nmatch + 1, &m->matches[0], 0))) {
|
||||||
|
#if SIMPLESUB_DBG
|
||||||
|
const int ERRSIZE = 200;
|
||||||
|
char errbuf[ERRSIZE + 1];
|
||||||
|
regerror(err, &expr, errbuf, ERRSIZE);
|
||||||
|
std::cerr << "simpleSub: regexec(" << sexp << ") failed: "
|
||||||
|
<< errbuf << "\n";
|
||||||
|
#endif
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
if (m->matches[0].rm_so == -1) {
|
||||||
|
// No match
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
string out = in.substr(0, m->matches[0].rm_so);
|
||||||
|
out += repl;
|
||||||
|
out += in.substr(m->matches[0].rm_eo);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
bool SimpleRegexp::simpleMatch(const string& val) const
|
bool SimpleRegexp::simpleMatch(const string& val) const
|
||||||
{
|
{
|
||||||
if (!ok())
|
if (!ok())
|
||||||
@ -1325,7 +1267,7 @@ string SimpleRegexp::getMatch(const string& val, int i) const
|
|||||||
m->matches[i].rm_eo - m->matches[i].rm_so);
|
m->matches[i].rm_eo - m->matches[i].rm_so);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // win/notwinf
|
#endif // !windows, using C regexps
|
||||||
|
|
||||||
SimpleRegexp::SimpleRegexp(const string& exp, int flags, int nmatch)
|
SimpleRegexp::SimpleRegexp(const string& exp, int flags, int nmatch)
|
||||||
: m(new Internal(exp, flags, nmatch))
|
: m(new Internal(exp, flags, nmatch))
|
||||||
@ -1346,6 +1288,7 @@ bool SimpleRegexp::operator() (const string& val) const
|
|||||||
{
|
{
|
||||||
return simpleMatch(val);
|
return simpleMatch(val);
|
||||||
}
|
}
|
||||||
|
#endif // SMALLUT_NO_REGEX
|
||||||
|
|
||||||
string flagsToString(const vector<CharFlags>& flags, unsigned int val)
|
string flagsToString(const vector<CharFlags>& flags, unsigned int val)
|
||||||
{
|
{
|
||||||
@ -1386,29 +1329,8 @@ string valToString(const vector<CharFlags>& flags, unsigned int val)
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int stringToFlags(const vector<CharFlags>& flags,
|
|
||||||
const string& input, const char *sep)
|
|
||||||
{
|
|
||||||
unsigned int out = 0;
|
|
||||||
|
|
||||||
vector<string> toks;
|
|
||||||
stringToTokens(input, toks, sep);
|
|
||||||
for (auto& tok: toks) {
|
|
||||||
trimstring(tok);
|
|
||||||
out = std::accumulate(
|
|
||||||
flags.begin(), flags.end(), out,
|
|
||||||
[&](unsigned int o, CharFlags flag) {
|
|
||||||
return tok == flag.yesname ? o | flag.value : o;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Initialization for static stuff to be called from main thread before going
|
// Initialization for static stuff to be called from main thread before going
|
||||||
// multiple
|
// multiple
|
||||||
void smallut_init_mt()
|
void smallut_init_mt()
|
||||||
{
|
{
|
||||||
// Init langtocode() static table
|
|
||||||
langtocode("");
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -23,7 +23,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
|
||||||
|
|
||||||
// Miscellaneous mostly string-oriented small utilities
|
// Miscellaneous mostly string-oriented small utilities
|
||||||
// Note that none of the following code knows about utf-8.
|
// Note that none of the following code knows about utf-8.
|
||||||
@ -71,17 +70,6 @@ extern void stringtoupper(std::string& io);
|
|||||||
extern std::string stringtoupper(const std::string& io);
|
extern std::string stringtoupper(const std::string& io);
|
||||||
extern bool beginswith(const std::string& bg, const std::string& sml);
|
extern bool beginswith(const std::string& bg, const std::string& sml);
|
||||||
|
|
||||||
// Is one string the end part of the other ?
|
|
||||||
extern int stringisuffcmp(const std::string& s1, const std::string& s2);
|
|
||||||
|
|
||||||
// Divine language from locale
|
|
||||||
extern std::string localelang();
|
|
||||||
// Divine 8bit charset from language
|
|
||||||
extern std::string langtocode(const std::string& lang);
|
|
||||||
|
|
||||||
// Compare charset names, removing the more common spelling variations
|
|
||||||
extern bool samecharset(const std::string& cs1, const std::string& cs2);
|
|
||||||
|
|
||||||
// Parse date interval specifier into pair of y,m,d dates. The format
|
// Parse date interval specifier into pair of y,m,d dates. The format
|
||||||
// for the time interval is based on a subset of iso 8601 with
|
// for the time interval is based on a subset of iso 8601 with
|
||||||
// the addition of open intervals, and removal of all time indications.
|
// the addition of open intervals, and removal of all time indications.
|
||||||
@ -104,8 +92,18 @@ struct DateInterval {
|
|||||||
extern bool parsedateinterval(const std::string& s, DateInterval *di);
|
extern bool parsedateinterval(const std::string& s, DateInterval *di);
|
||||||
extern int monthdays(int mon, int year);
|
extern int monthdays(int mon, int year);
|
||||||
|
|
||||||
|
|
||||||
|
/** Note for all templated functions:
|
||||||
|
* By default, smallut.cpp has explicit instantiations for common
|
||||||
|
* containers (list, vector, set, etc.). If this is not enough, or
|
||||||
|
* conversely, if you want to minimize the module size, you can chose
|
||||||
|
* the instantiations by defining the SMALLUT_EXTERNAL_INSTANTIATIONS
|
||||||
|
* compilation flag, and defining the instances in a file named
|
||||||
|
* smallut_instantiations.h
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse input string into list of strings.
|
* Parse input string into list of strings. See instantiation note above.
|
||||||
*
|
*
|
||||||
* Token delimiter is " \t\n" except inside dquotes. dquote inside
|
* Token delimiter is " \t\n" except inside dquotes. dquote inside
|
||||||
* dquotes can be escaped with \ etc...
|
* dquotes can be escaped with \ etc...
|
||||||
@ -118,7 +116,7 @@ template <class T> bool stringToStrings(const std::string& s, T& tokens,
|
|||||||
const std::string& addseps = "");
|
const std::string& addseps = "");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inverse operation:
|
* Inverse operation. See instantiation note above.
|
||||||
*/
|
*/
|
||||||
template <class T> void stringsToString(const T& tokens, std::string& s);
|
template <class T> void stringsToString(const T& tokens, std::string& s);
|
||||||
template <class T> std::string stringsToString(const T& tokens);
|
template <class T> std::string stringsToString(const T& tokens);
|
||||||
@ -126,12 +124,13 @@ template <class T> std::string stringsToString(const T& tokens);
|
|||||||
/**
|
/**
|
||||||
* Strings to CSV string. tokens containing the separator are quoted (")
|
* Strings to CSV string. tokens containing the separator are quoted (")
|
||||||
* " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
|
* " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""]
|
||||||
|
* See instantiation note above.
|
||||||
*/
|
*/
|
||||||
template <class T> void stringsToCSV(const T& tokens, std::string& s,
|
template <class T> void stringsToCSV(const T& tokens, std::string& s,
|
||||||
char sep = ',');
|
char sep = ',');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split input string. No handling of quoting
|
* Split input string. No handling of quoting.
|
||||||
*/
|
*/
|
||||||
extern void stringToTokens(const std::string& s,
|
extern void stringToTokens(const std::string& s,
|
||||||
std::vector<std::string>& tokens,
|
std::vector<std::string>& tokens,
|
||||||
@ -211,6 +210,7 @@ inline void leftzeropad(std::string& s, unsigned len)
|
|||||||
// (e.g. ac:23:0c:4f:46:fd)
|
// (e.g. ac:23:0c:4f:46:fd)
|
||||||
extern std::string hexprint(const std::string& in, char separ= 0);
|
extern std::string hexprint(const std::string& in, char separ= 0);
|
||||||
|
|
||||||
|
#ifndef SMALLUT_NO_REGEX
|
||||||
// A class to solve platorm/compiler issues for simple regex
|
// A class to solve platorm/compiler issues for simple regex
|
||||||
// matches. Uses the appropriate native lib under the hood.
|
// matches. Uses the appropriate native lib under the hood.
|
||||||
// This always uses extended regexp syntax.
|
// This always uses extended regexp syntax.
|
||||||
@ -227,13 +227,19 @@ public:
|
|||||||
std::string getMatch(const std::string& val, int i) const;
|
std::string getMatch(const std::string& val, int i) const;
|
||||||
/// Calls simpleMatch()
|
/// Calls simpleMatch()
|
||||||
bool operator() (const std::string& val) const;
|
bool operator() (const std::string& val) const;
|
||||||
|
|
||||||
|
/// Replace the first occurrence of regexp.
|
||||||
|
std::string simpleSub(const std::string& input, const std::string& repl);
|
||||||
|
|
||||||
/// Check after construction
|
/// Check after construction
|
||||||
bool ok() const;
|
bool ok() const;
|
||||||
|
|
||||||
|
|
||||||
class Internal;
|
class Internal;
|
||||||
private:
|
private:
|
||||||
Internal *m;
|
Internal *m;
|
||||||
};
|
};
|
||||||
|
#endif // SMALLUT_NO_REGEX
|
||||||
|
|
||||||
/// Utilities for printing names for defined values (Ex: O_RDONLY->"O_RDONLY")
|
/// Utilities for printing names for defined values (Ex: O_RDONLY->"O_RDONLY")
|
||||||
|
|
||||||
@ -257,9 +263,4 @@ extern std::string flagsToString(const std::vector<CharFlags>&,
|
|||||||
/// Translate a value into a name
|
/// Translate a value into a name
|
||||||
extern std::string valToString(const std::vector<CharFlags>&, unsigned int val);
|
extern std::string valToString(const std::vector<CharFlags>&, unsigned int val);
|
||||||
|
|
||||||
/// Reverse operation: translate string into bitfield
|
|
||||||
extern unsigned int
|
|
||||||
stringToFlags(const std::vector<CharFlags>&, const std::string& input,
|
|
||||||
const char *sep = "|");
|
|
||||||
|
|
||||||
#endif /* _SMALLUT_H_INCLUDED_ */
|
#endif /* _SMALLUT_H_INCLUDED_ */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user