diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp index 5faf6e00..5840a078 100644 --- a/src/utils/rclutil.cpp +++ b/src/utils/rclutil.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include "rclutil.h" #include "pathut.h" @@ -648,9 +649,72 @@ bool thumbPathForUrl(const string& url, int size, string& path) return false; } +// Compare charset names, removing the more common spelling variations +bool samecharset(const string& cs1, const string& cs2) +{ + auto mcs1 = std::accumulate(cs1.begin(), cs1.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; }); + auto mcs2 = std::accumulate(cs2.begin(), cs2.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; }); + return mcs1 == mcs2; +} + +static const std::unordered_map lang_to_code { + {"be", "cp1251"}, + {"bg", "cp1251"}, + {"cs", "iso-8859-2"}, + {"el", "iso-8859-7"}, + {"he", "iso-8859-8"}, + {"hr", "iso-8859-2"}, + {"hu", "iso-8859-2"}, + {"ja", "eucjp"}, + {"kk", "pt154"}, + {"ko", "euckr"}, + {"lt", "iso-8859-13"}, + {"lv", "iso-8859-13"}, + {"pl", "iso-8859-2"}, + {"rs", "iso-8859-2"}, + {"ro", "iso-8859-2"}, + {"ru", "koi8-r"}, + {"sk", "iso-8859-2"}, + {"sl", "iso-8859-2"}, + {"sr", "iso-8859-2"}, + {"th", "iso-8859-11"}, + {"tr", "iso-8859-9"}, + {"uk", "koi8-u"}, + }; + +string langtocode(const string& lang) +{ + const auto it = lang_to_code.find(lang); + + // Use cp1252 by default... + if (it == lang_to_code.end()) { + return cstr_cp1252; + } + + return it->second; +} + +string localelang() +{ + const char *lang = getenv("LANG"); + + if (lang == nullptr || *lang == 0 || !strcmp(lang, "C") || + !strcmp(lang, "POSIX")) { + return "en"; + } + string locale(lang); + string::size_type under = locale.find_first_of('_'); + if (under == string::npos) { + return locale; + } + return locale.substr(0, under); +} + void rclutil_init_mt() { path_pkgdatadir(); tmplocation(); thumbnailsdir(); + // Init langtocode() static table + langtocode(""); } diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h index de6abd3a..8f8866da 100644 --- a/src/utils/rclutil.h +++ b/src/utils/rclutil.h @@ -122,4 +122,11 @@ template void map_ss_cp_noshr(T s, T *d); template void addmeta(T& store, const std::string& nm, const std::string& value); +// Compare charset names, removing the more common spelling variations +extern bool samecharset(const std::string& cs1, const std::string& cs2); +// Divine language from locale +extern std::string localelang(); +// Divine 8bit charset from language +extern std::string langtocode(const std::string& lang); + #endif /* _RCLUTIL_H_INCLUDED_ */ diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index e16c8c23..2f14a06a 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2006-2016 J.F.Dockes +/* Copyright (C) 2006-2020 J.F.Dockes * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -15,10 +15,24 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301 USA */ +#include "smallut.h" + #include +#include +#include +#include +#include #include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifdef _WIN32 // needed for localtime_r under mingw? @@ -28,11 +42,6 @@ #endif /* _MSC_VER */ #endif /* _WIN32 */ -#include -#include -#include -#include -#include // Older compilers don't support stdc++ regex, but Windows does not // have the Linux one. Have a simple class to solve the simple cases. @@ -44,48 +53,13 @@ #include #endif -#include -#include -#include -#include -#include -#include - -#include "smallut.h" - using namespace std; int stringicmp(const string& s1, const string& s2) { - string::const_iterator it1 = s1.begin(); - string::const_iterator it2 = s2.begin(); - string::size_type size1 = s1.length(), size2 = s2.length(); - char c1, c2; - - if (size1 < size2) { - while (it1 != s1.end()) { - c1 = ::toupper(*it1); - c2 = ::toupper(*it2); - if (c1 != c2) { - return c1 > c2 ? 1 : -1; - } - ++it1; - ++it2; - } - return size1 == size2 ? 0 : -1; - } - - while (it2 != s2.end()) { - c1 = ::toupper(*it1); - c2 = ::toupper(*it2); - if (c1 != c2) { - return c1 > c2 ? 1 : -1; - } - ++it1; - ++it2; - } - return size1 == size2 ? 0 : 1; + return strcasecmp(s1.c_str(), s2.c_str()); } + void stringtolower(string& io) { std::transform(io.begin(), io.end(), io.begin(), [](unsigned char c) { return std::tolower(c); }); @@ -110,22 +84,6 @@ string stringtoupper(const string& i) return o; } -extern int stringisuffcmp(const string& s1, const string& s2) -{ - string::const_reverse_iterator r1 = s1.rbegin(), re1 = s1.rend(), - r2 = s2.rbegin(), re2 = s2.rend(); - while (r1 != re1 && r2 != re2) { - char c1 = ::toupper(*r1); - char c2 = ::toupper(*r2); - if (c1 != c2) { - return c1 > c2 ? 1 : -1; - } - ++r1; - ++r2; - } - return 0; -} - // s1 is already lowercase int stringlowercmp(const string& s1, const string& s2) { @@ -193,14 +151,6 @@ bool beginswith(const std::string& big, const std::string& small) return big.compare(0, small.size(), small) == 0; } -// Compare charset names, removing the more common spelling variations -bool samecharset(const string& cs1, const string& cs2) -{ - auto mcs1 = std::accumulate(cs1.begin(), cs1.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; }); - auto mcs2 = std::accumulate(cs2.begin(), cs2.end(), "", [](const char* m, char i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; }); - return mcs1 == mcs2; -} - template bool stringToStrings(const string& s, T& tokens, const string& addseps) { @@ -310,15 +260,6 @@ template bool stringToStrings(const string& s, T& tokens, return true; } -template bool stringToStrings >(const string&, - list&, const string&); -template bool stringToStrings >(const string&, - vector&, const string&); -template bool stringToStrings >(const string&, - set&, const string&); -template bool stringToStrings > -(const string&, std::unordered_set&, const string&); - template void stringsToString(const T& tokens, string& s) { for (auto it = tokens.begin(); @@ -347,20 +288,13 @@ template void stringsToString(const T& tokens, string& s) } } } -template void stringsToString >(const list&, string&); -template void stringsToString >(const vector&, string&); -template void stringsToString >(const set&, string&); -template void stringsToString >(const unordered_set&, string&); + template string stringsToString(const T& tokens) { string out; stringsToString(tokens, out); return out; } -template string stringsToString >(const list&); -template string stringsToString >(const vector&); -template string stringsToString >(const set&); -template string stringsToString >(const unordered_set&); template void stringsToCSV(const T& tokens, string& s, char sep) @@ -392,9 +326,30 @@ template void stringsToCSV(const T& tokens, string& s, } } } + +#ifdef SMALLUT_EXTERNAL_INSTANTIATIONS +#include "smallut_instantiate.h" +#else +template bool stringToStrings >(const string&, + list&, const string&); +template bool stringToStrings >(const string&, + vector&, const string&); +template bool stringToStrings >(const string&, + set&, const string&); +template bool stringToStrings > +(const string&, std::unordered_set&, const string&); +template void stringsToString >(const list&, string&); +template void stringsToString >(const vector&, string&); +template void stringsToString >(const set&, string&); +template void stringsToString >(const unordered_set&, string&); +template string stringsToString >(const list&); +template string stringsToString >(const vector&); +template string stringsToString >(const set&); +template string stringsToString >(const unordered_set&); template void stringsToCSV >(const list&, string&, char); template void stringsToCSV >(const vector&, string&, char); +#endif void stringToTokens(const string& str, vector& tokens, const string& delims, bool skipinit) @@ -1203,61 +1158,7 @@ void catstrerror(string *reason, const char *what, int _errno) #endif } - -static const std::unordered_map lang_to_code { - {"be", "cp1251"}, - {"bg", "cp1251"}, - {"cs", "iso-8859-2"}, - {"el", "iso-8859-7"}, - {"he", "iso-8859-8"}, - {"hr", "iso-8859-2"}, - {"hu", "iso-8859-2"}, - {"ja", "eucjp"}, - {"kk", "pt154"}, - {"ko", "euckr"}, - {"lt", "iso-8859-13"}, - {"lv", "iso-8859-13"}, - {"pl", "iso-8859-2"}, - {"rs", "iso-8859-2"}, - {"ro", "iso-8859-2"}, - {"ru", "koi8-r"}, - {"sk", "iso-8859-2"}, - {"sl", "iso-8859-2"}, - {"sr", "iso-8859-2"}, - {"th", "iso-8859-11"}, - {"tr", "iso-8859-9"}, - {"uk", "koi8-u"}, - }; -static const string cstr_cp1252("CP1252"); - -string langtocode(const string& lang) -{ - const auto it = lang_to_code.find(lang); - - // Use cp1252 by default... - if (it == lang_to_code.end()) { - return cstr_cp1252; - } - - return it->second; -} - -string localelang() -{ - const char *lang = getenv("LANG"); - - if (lang == nullptr || *lang == 0 || !strcmp(lang, "C") || - !strcmp(lang, "POSIX")) { - return "en"; - } - string locale(lang); - string::size_type under = locale.find_first_of('_'); - if (under == string::npos) { - return locale; - } - return locale.substr(0, under); -} - +#ifndef SMALLUT_NO_REGEX #ifdef USE_STD_REGEX class SimpleRegexp::Internal { @@ -1283,6 +1184,17 @@ bool SimpleRegexp::simpleMatch(const string& val) const return regex_search(val, m->res, m->expr); } +// Substitute one instance of regular expression +std::string SimpleRegexp::simpleSub( + const std::string& in, const std::string& repl) +{ + if (!ok()) { + return std::string(); + } + return regex_replace( + in, m->expr, repl, std::regex_constants::format_first_only); +} + string SimpleRegexp::getMatch(const string&, int i) const { return m->res.str(i); @@ -1309,6 +1221,36 @@ public: vector matches; }; +// Substitute one instance of regular expression +std::string SimpleRegexp::simpleSub( + const std::string& in, const std::string& repl) +{ + if (!ok()) { + return std::string(); + } + + int err; + if ((err = regexec(&m->expr, in.c_str(), + m->nmatch + 1, &m->matches[0], 0))) { +#if SIMPLESUB_DBG + const int ERRSIZE = 200; + char errbuf[ERRSIZE + 1]; + regerror(err, &expr, errbuf, ERRSIZE); + std::cerr << "simpleSub: regexec(" << sexp << ") failed: " + << errbuf << "\n"; +#endif + return in; + } + if (m->matches[0].rm_so == -1) { + // No match + return in; + } + string out = in.substr(0, m->matches[0].rm_so); + out += repl; + out += in.substr(m->matches[0].rm_eo); + return out; +} + bool SimpleRegexp::simpleMatch(const string& val) const { if (!ok()) @@ -1325,7 +1267,7 @@ string SimpleRegexp::getMatch(const string& val, int i) const m->matches[i].rm_eo - m->matches[i].rm_so); } -#endif // win/notwinf +#endif // !windows, using C regexps SimpleRegexp::SimpleRegexp(const string& exp, int flags, int nmatch) : m(new Internal(exp, flags, nmatch)) @@ -1346,6 +1288,7 @@ bool SimpleRegexp::operator() (const string& val) const { return simpleMatch(val); } +#endif // SMALLUT_NO_REGEX string flagsToString(const vector& flags, unsigned int val) { @@ -1386,29 +1329,8 @@ string valToString(const vector& flags, unsigned int val) return out; } -unsigned int stringToFlags(const vector& flags, - const string& input, const char *sep) -{ - unsigned int out = 0; - - vector toks; - stringToTokens(input, toks, sep); - for (auto& tok: toks) { - trimstring(tok); - out = std::accumulate( - flags.begin(), flags.end(), out, - [&](unsigned int o, CharFlags flag) { - return tok == flag.yesname ? o | flag.value : o; - }); - } - return out; -} - - // Initialization for static stuff to be called from main thread before going // multiple void smallut_init_mt() { - // Init langtocode() static table - langtocode(""); } diff --git a/src/utils/smallut.h b/src/utils/smallut.h index d2f0f416..e025e811 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -23,7 +23,6 @@ #include #include #include -#include // Miscellaneous mostly string-oriented small utilities // Note that none of the following code knows about utf-8. @@ -71,17 +70,6 @@ extern void stringtoupper(std::string& io); extern std::string stringtoupper(const std::string& io); extern bool beginswith(const std::string& bg, const std::string& sml); -// Is one string the end part of the other ? -extern int stringisuffcmp(const std::string& s1, const std::string& s2); - -// Divine language from locale -extern std::string localelang(); -// Divine 8bit charset from language -extern std::string langtocode(const std::string& lang); - -// Compare charset names, removing the more common spelling variations -extern bool samecharset(const std::string& cs1, const std::string& cs2); - // Parse date interval specifier into pair of y,m,d dates. The format // for the time interval is based on a subset of iso 8601 with // the addition of open intervals, and removal of all time indications. @@ -104,8 +92,18 @@ struct DateInterval { extern bool parsedateinterval(const std::string& s, DateInterval *di); extern int monthdays(int mon, int year); + +/** Note for all templated functions: + * By default, smallut.cpp has explicit instantiations for common + * containers (list, vector, set, etc.). If this is not enough, or + * conversely, if you want to minimize the module size, you can chose + * the instantiations by defining the SMALLUT_EXTERNAL_INSTANTIATIONS + * compilation flag, and defining the instances in a file named + * smallut_instantiations.h + */ + /** - * Parse input string into list of strings. + * Parse input string into list of strings. See instantiation note above. * * Token delimiter is " \t\n" except inside dquotes. dquote inside * dquotes can be escaped with \ etc... @@ -118,7 +116,7 @@ template bool stringToStrings(const std::string& s, T& tokens, const std::string& addseps = ""); /** - * Inverse operation: + * Inverse operation. See instantiation note above. */ template void stringsToString(const T& tokens, std::string& s); template std::string stringsToString(const T& tokens); @@ -126,12 +124,13 @@ template std::string stringsToString(const T& tokens); /** * Strings to CSV string. tokens containing the separator are quoted (") * " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""] + * See instantiation note above. */ template void stringsToCSV(const T& tokens, std::string& s, char sep = ','); /** - * Split input string. No handling of quoting + * Split input string. No handling of quoting. */ extern void stringToTokens(const std::string& s, std::vector& tokens, @@ -211,6 +210,7 @@ inline void leftzeropad(std::string& s, unsigned len) // (e.g. ac:23:0c:4f:46:fd) extern std::string hexprint(const std::string& in, char separ= 0); +#ifndef SMALLUT_NO_REGEX // A class to solve platorm/compiler issues for simple regex // matches. Uses the appropriate native lib under the hood. // This always uses extended regexp syntax. @@ -227,13 +227,19 @@ public: std::string getMatch(const std::string& val, int i) const; /// Calls simpleMatch() bool operator() (const std::string& val) const; + + /// Replace the first occurrence of regexp. + std::string simpleSub(const std::string& input, const std::string& repl); + /// Check after construction bool ok() const; + class Internal; private: Internal *m; }; +#endif // SMALLUT_NO_REGEX /// Utilities for printing names for defined values (Ex: O_RDONLY->"O_RDONLY") @@ -257,9 +263,4 @@ extern std::string flagsToString(const std::vector&, /// Translate a value into a name extern std::string valToString(const std::vector&, unsigned int val); -/// Reverse operation: translate string into bitfield -extern unsigned int -stringToFlags(const std::vector&, const std::string& input, - const char *sep = "|"); - #endif /* _SMALLUT_H_INCLUDED_ */