From a4fd4ee5bec20ca038670803484a5d5792c28250 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Mon, 21 Mar 2016 12:55:31 +0100 Subject: [PATCH] moved code around to make smallut and pathut less recoll-specific and reusable. No actual changes --- src/Makefile.am | 3 + src/common/rclconfig.cpp | 3 +- src/common/rclinit.cpp | 4 +- src/common/rclinit.h | 27 +- src/index/beaglequeue.cpp | 1 + src/index/fsindexer.cpp | 1 + src/index/recollindex.cpp | 1 + src/internfile/uncomp.h | 1 + src/python/recoll/pyrecoll.cpp | 1 + src/qtgui/preview_load.h | 1 + src/qtgui/recoll.h | 1 + src/query/reslistpager.cpp | 1 + src/rcldb/rcldb.cpp | 16 + src/rcldb/rcldoc.cpp | 28 + src/rcldb/rcldoc.h | 28 +- src/unac/unac.c | 4 +- src/utils/circache.cpp | 22 + src/utils/cpuconf.cpp | 6 + src/utils/hldata.cpp | 78 +++ src/utils/hldata.h | 1 + src/utils/pathut.cpp | 964 +++++++++++------------------ src/utils/pathut.h | 132 +--- src/utils/rclutil.cpp | 411 +++++++++++++ src/utils/rclutil.h | 112 ++++ src/utils/smallut.cpp | 1058 ++++++++++++++++---------------- src/utils/smallut.h | 409 ++++++------ 26 files changed, 1821 insertions(+), 1493 deletions(-) create mode 100644 src/utils/hldata.cpp create mode 100644 src/utils/rclutil.cpp create mode 100644 src/utils/rclutil.h diff --git a/src/Makefile.am b/src/Makefile.am index e32b4e66..6b0f25b7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -207,6 +207,7 @@ utils/fileudi.h \ utils/fstreewalk.cpp \ utils/fstreewalk.h \ utils/hldata.h \ +utils/hldata.cpp \ utils/idfile.cpp \ utils/idfile.h \ utils/md5.cpp \ @@ -224,6 +225,8 @@ utils/pxattr.cpp \ utils/pxattr.h \ utils/rclionice.cpp \ utils/rclionice.h \ +utils/rclutil.h \ +utils/rclutil.cpp \ utils/readfile.cpp \ utils/readfile.h \ utils/refcntr.h \ diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 638308ed..a26de74b 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -39,6 +39,7 @@ #include "cstr.h" #include "pathut.h" +#include "rclutil.h" #include "rclconfig.h" #include "conftree.h" #include "debuglog.h" @@ -144,7 +145,7 @@ RclConfig::RclConfig(const string *argcnf) } // Compute our data dir name, typically /usr/local/share/recoll - m_datadir = path_sharedatadir(); + m_datadir = path_pkgdatadir(); // We only do the automatic configuration creation thing for the default // config dir, not if it was specified through -c or RECOLL_CONFDIR bool autoconfdir = false; diff --git a/src/common/rclinit.cpp b/src/common/rclinit.cpp index 145a7520..97a08fed 100644 --- a/src/common/rclinit.cpp +++ b/src/common/rclinit.cpp @@ -32,6 +32,7 @@ #include "rclconfig.h" #include "rclinit.h" #include "pathut.h" +#include "rclutil.h" #include "unac.h" #include "smallut.h" #include "execmd.h" @@ -318,6 +319,8 @@ RclConfig *recollinit(RclInitFlags flags, // Init smallut and pathut static values pathut_init_mt(); smallut_init_mt(); + rclutil_init_mt(); + // Init execmd.h static PATH and PATHELT splitting {string bogus; ExecCmd::which("nosuchcmd", bogus); @@ -389,4 +392,3 @@ bool recoll_ismainthread() return pthread_equal(pthread_self(), mainthread_id); } - diff --git a/src/common/rclinit.h b/src/common/rclinit.h index 5a41f67e..470da903 100644 --- a/src/common/rclinit.h +++ b/src/common/rclinit.h @@ -18,40 +18,39 @@ #define _RCLINIT_H_INCLUDED_ #include -#ifndef NO_NAMESPACES -using std::string; -#endif class RclConfig; /** * Initialize by reading configuration, opening log file, etc. - * + * * This must be called from the main thread before starting any others. It sets * up the global signal handling. other threads must call recoll_threadinit() * when starting. * - * @param flags misc modifiers. These are currently only used to customize + * @param flags misc modifiers. These are currently only used to customize * the log file and verbosity. * @param cleanup function to call before exiting (atexit) - * @param sigcleanup function to call on terminal signal (INT/HUP...) This - * should typically set a flag which tells the program (recoll, - * recollindex etc.. to exit as soon as possible (after closing the db, + * @param sigcleanup function to call on terminal signal (INT/HUP...) This + * should typically set a flag which tells the program (recoll, + * recollindex etc.. to exit as soon as possible (after closing the db, * etc.). cleanup will then be called by exit(). * @param reason in case of error: output string explaining things * @param argcnf Configuration directory name from the command line (overriding * default and environment * @return the parsed configuration. */ -enum RclInitFlags {RCLINIT_NONE=0, RCLINIT_DAEMON=1, RCLINIT_IDX=2}; +enum RclInitFlags {RCLINIT_NONE = 0, RCLINIT_DAEMON = 1, RCLINIT_IDX = 2}; extern RclConfig *recollinit(RclInitFlags flags, - void (*cleanup)(void), void (*sigcleanup)(int), - string &reason, const string *argcnf = 0); -inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int), - string &reason, const string *argcnf = 0) { + void (*cleanup)(void), void (*sigcleanup)(int), + std::string& reason, const string *argcnf = 0); +inline RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int), + std::string& reason, + const std::string *argcnf = 0) +{ return recollinit(RCLINIT_NONE, cleanup, sigcleanup, reason, argcnf); } -// Threads need to call this to block signals. +// Threads need to call this to block signals. // The main thread handles all signals. extern void recoll_threadinit(); diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index f6217f70..cf78b717 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -23,6 +23,7 @@ #include "cstr.h" #include "pathut.h" +#include "rclutil.h" #include "debuglog.h" #include "fstreewalk.h" #include "beaglequeue.h" diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index 55fc338f..6b6ea995 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -28,6 +28,7 @@ #include "cstr.h" #include "pathut.h" +#include "rclutil.h" #include "conftree.h" #include "rclconfig.h" #include "fstreewalk.h" diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 977c9cce..6f9a452f 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -42,6 +42,7 @@ using namespace std; #include "smallut.h" #include "chrono.h" #include "pathut.h" +#include "rclutil.h" #include "rclmon.h" #include "x11mon.h" #include "cancelcheck.h" diff --git a/src/internfile/uncomp.h b/src/internfile/uncomp.h index 1f2f4d6b..ab7c55a4 100644 --- a/src/internfile/uncomp.h +++ b/src/internfile/uncomp.h @@ -21,6 +21,7 @@ #include #include "pathut.h" +#include "rclutil.h" #include "ptmutex.h" /// Uncompression script interface. diff --git a/src/python/recoll/pyrecoll.cpp b/src/python/recoll/pyrecoll.cpp index e414dbeb..56466d22 100644 --- a/src/python/recoll/pyrecoll.cpp +++ b/src/python/recoll/pyrecoll.cpp @@ -32,6 +32,7 @@ using namespace std; #include "searchdata.h" #include "rclquery.h" #include "pathut.h" +#include "rclutil.h" #include "wasatorcl.h" #include "debuglog.h" #include "pathut.h" diff --git a/src/qtgui/preview_load.h b/src/qtgui/preview_load.h index d1e8dad4..328c4ba1 100644 --- a/src/qtgui/preview_load.h +++ b/src/qtgui/preview_load.h @@ -23,6 +23,7 @@ #include "rcldoc.h" #include "pathut.h" +#include "rclutil.h" #include "rclconfig.h" /* diff --git a/src/qtgui/recoll.h b/src/qtgui/recoll.h index 429dab88..b4f035a3 100644 --- a/src/qtgui/recoll.h +++ b/src/qtgui/recoll.h @@ -21,6 +21,7 @@ #include "rclconfig.h" #include "rcldb.h" +#include "rclutil.h" #include "ptmutex.h" #include diff --git a/src/query/reslistpager.cpp b/src/query/reslistpager.cpp index fdcfad14..7468a5ab 100644 --- a/src/query/reslistpager.cpp +++ b/src/query/reslistpager.cpp @@ -34,6 +34,7 @@ using std::list; #include "debuglog.h" #include "rclconfig.h" #include "smallut.h" +#include "rclutil.h" #include "plaintorich.h" #include "mimehandler.h" diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index c9c412c9..6a50ffa4 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -42,6 +42,7 @@ using namespace std; #include "unacpp.h" #include "conftree.h" #include "pathut.h" +#include "rclutil.h" #include "smallut.h" #include "chrono.h" #include "utf8iter.h" @@ -126,6 +127,21 @@ static inline string make_parentterm(const string& udi) return pterm; } +static void utf8truncate(string& s, int maxlen) +{ + if (s.size() <= string::size_type(maxlen)) { + return; + } + Utf8Iter iter(s); + string::size_type pos = 0; + while (iter++ != string::npos) + if (iter.getBpos() < string::size_type(maxlen)) { + pos = iter.getBpos(); + } + + s.erase(pos); +} + Db::Native::Native(Db *db) : m_rcldb(db), m_isopen(false), m_iswritable(false), m_noversionwrite(false) diff --git a/src/rcldb/rcldoc.cpp b/src/rcldb/rcldoc.cpp index 2dcaa05f..3858529f 100644 --- a/src/rcldb/rcldoc.cpp +++ b/src/rcldb/rcldoc.cpp @@ -14,9 +14,11 @@ * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include "autoconfig.h" #include "rcldoc.h" #include "debuglog.h" +#include "rclutil.h" namespace Rcl { const string Doc::keyabs("abstract"); @@ -70,5 +72,31 @@ namespace Rcl { if (dotext) LOGDEB(("Rcl::Doc::dump: text: \n[%s]\n", text.c_str())); } + + // Copy ensuring no shared string data, for threading issues. + void Doc::copyto(Doc *d) const + { + d->url.assign(url.begin(), url.end()); + d->idxurl.assign(idxurl.begin(), idxurl.end()); + d->idxi = idxi; + d->ipath.assign(ipath.begin(), ipath.end()); + d->mimetype.assign(mimetype.begin(), mimetype.end()); + d->fmtime.assign(fmtime.begin(), fmtime.end()); + d->dmtime.assign(dmtime.begin(), dmtime.end()); + d->origcharset.assign(origcharset.begin(), origcharset.end()); + map_ss_cp_noshr(meta, &d->meta); + d->syntabs = syntabs; + d->pcbytes.assign(pcbytes.begin(), pcbytes.end()); + d->fbytes.assign(fbytes.begin(), fbytes.end()); + d->dbytes.assign(dbytes.begin(), dbytes.end()); + d->sig.assign(sig.begin(), sig.end()); + d->text.assign(text.begin(), text.end()); + d->pc = pc; + d->xdocid = xdocid; + d->idxi = idxi; + d->haspages = haspages; + d->haschildren = haschildren; + d->onlyxattr = onlyxattr; + } } diff --git a/src/rcldb/rcldoc.h b/src/rcldb/rcldoc.h index 120d9ae8..08a08dd7 100644 --- a/src/rcldb/rcldoc.h +++ b/src/rcldb/rcldoc.h @@ -163,33 +163,11 @@ class Doc { onlyxattr = false; } // Copy ensuring no shared string data, for threading issues. - void copyto(Doc *d) const { - d->url.assign(url.begin(), url.end()); - d->idxurl.assign(idxurl.begin(), idxurl.end()); - d->idxi = idxi; - d->ipath.assign(ipath.begin(), ipath.end()); - d->mimetype.assign(mimetype.begin(), mimetype.end()); - d->fmtime.assign(fmtime.begin(), fmtime.end()); - d->dmtime.assign(dmtime.begin(), dmtime.end()); - d->origcharset.assign(origcharset.begin(), origcharset.end()); - map_ss_cp_noshr(meta, &d->meta); - d->syntabs = syntabs; - d->pcbytes.assign(pcbytes.begin(), pcbytes.end()); - d->fbytes.assign(fbytes.begin(), fbytes.end()); - d->dbytes.assign(dbytes.begin(), dbytes.end()); - d->sig.assign(sig.begin(), sig.end()); - d->text.assign(text.begin(), text.end()); - d->pc = pc; - d->xdocid = xdocid; - d->idxi = idxi; - d->haspages = haspages; - d->haschildren = haschildren; - d->onlyxattr = onlyxattr; - } + void copyto(Doc *d) const; + Doc() : idxi(0), syntabs(false), pc(0), xdocid(0), - haspages(false), haschildren(false), onlyxattr(false) - { + haspages(false), haschildren(false), onlyxattr(false) { } /** Get value for named field. If value pointer is 0, just test existence */ bool getmeta(const string& nm, string *value = 0) const diff --git a/src/unac/unac.c b/src/unac/unac.c index 1c310831..d1c1bd44 100644 --- a/src/unac/unac.c +++ b/src/unac/unac.c @@ -32,10 +32,12 @@ #include #include UNORDERED_MAP_INCLUDE -using std::string; #include "smallut.h" +using std::string; +using std::vector; + /* Storage for the exception translations. These are chars which should not be translated according to what UnicodeData says, but diff --git a/src/utils/circache.cpp b/src/utils/circache.cpp index e3dc8fe0..5a54038e 100644 --- a/src/utils/circache.cpp +++ b/src/utils/circache.cpp @@ -71,6 +71,28 @@ typedef unsigned char UCHAR; typedef unsigned int UINT; typedef unsigned long ULONG; +/** Temp buffer with automatic deallocation */ +struct TempBuf { + TempBuf() + : m_buf(0) { + } + TempBuf(int n) { + m_buf = (char *)malloc(n); + } + ~TempBuf() { + if (m_buf) { + free(m_buf); + } + } + char *setsize(int n) { + return (m_buf = (char *)realloc(m_buf, n)); + } + char *buf() { + return m_buf; + } + char *m_buf; +}; + static bool inflateToDynBuf(void *inp, UINT inlen, void **outpp, UINT *outlenp); /* diff --git a/src/utils/cpuconf.cpp b/src/utils/cpuconf.cpp index 378d0c38..c715a1b5 100644 --- a/src/utils/cpuconf.cpp +++ b/src/utils/cpuconf.cpp @@ -18,10 +18,16 @@ #ifndef TEST_CPUCONF #include "autoconfig.h" + +#include + #include "cpuconf.h" #include "execmd.h" #include "smallut.h" +using std::string; +using std::vector; + #if defined(__gnu_linux__) bool getCpuConf(CpuConf& conf) { diff --git a/src/utils/hldata.cpp b/src/utils/hldata.cpp new file mode 100644 index 00000000..44fcef94 --- /dev/null +++ b/src/utils/hldata.cpp @@ -0,0 +1,78 @@ +/* Copyright (C) 2016 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include "autoconfig.h" + +#include + +#include "hldata.h" + +using std::string; +using std::map; + +void HighlightData::toString(string& out) +{ + out.append("\nUser terms (orthograph): "); + for (std::set::const_iterator it = uterms.begin(); + it != uterms.end(); it++) { + out.append(" [").append(*it).append("]"); + } + out.append("\nUser terms to Query terms:"); + for (map::const_iterator it = terms.begin(); + it != terms.end(); it++) { + out.append("[").append(it->first).append("]->["); + out.append(it->second).append("] "); + } + out.append("\nGroups: "); + char cbuf[200]; + sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d", + int(groups.size()), int(grpsugidx.size()), int(ugroups.size())); + out.append(cbuf); + + size_t ugidx = (size_t) - 1; + for (unsigned int i = 0; i < groups.size(); i++) { + if (ugidx != grpsugidx[i]) { + ugidx = grpsugidx[i]; + out.append("\n("); + for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) { + out.append("[").append(ugroups[ugidx][j]).append("] "); + } + out.append(") ->"); + } + out.append(" {"); + for (unsigned int j = 0; j < groups[i].size(); j++) { + out.append("[").append(groups[i][j]).append("]"); + } + sprintf(cbuf, "%d", slacks[i]); + out.append("}").append(cbuf); + } + out.append("\n"); +} + +void HighlightData::append(const HighlightData& hl) +{ + uterms.insert(hl.uterms.begin(), hl.uterms.end()); + terms.insert(hl.terms.begin(), hl.terms.end()); + size_t ugsz0 = ugroups.size(); + ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end()); + + groups.insert(groups.end(), hl.groups.begin(), hl.groups.end()); + slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end()); + for (std::vector::const_iterator it = hl.grpsugidx.begin(); + it != hl.grpsugidx.end(); it++) { + grpsugidx.push_back(*it + ugsz0); + } +} diff --git a/src/utils/hldata.h b/src/utils/hldata.h index 168e6896..93766d8a 100644 --- a/src/utils/hldata.h +++ b/src/utils/hldata.h @@ -4,6 +4,7 @@ #include #include #include +#include /** Store data about user search terms and their expansions. This is used * mostly for highlighting result text and walking the matches, generating diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index 468efd79..fd4d4466 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -16,38 +16,44 @@ */ #ifndef TEST_PATHUT +#ifdef BUILDING_RECOLL #include "autoconfig.h" +#else +#include "config.h" +#endif #include +#ifdef _WIN32 +#include "dirent.h" #include "safefcntl.h" #include "safeunistd.h" -#include "dirent.h" -#include "cstr.h" -#ifdef _WIN32 #include "safewindows.h" +#include "safesysstat.h" #else +#include +#include #include #include #include +#include +#include #endif #include #include #include -#include "safesysstat.h" -#include "ptmutex.h" // Let's include all files where statfs can be defined and hope for no // conflict... -#ifdef HAVE_SYS_MOUNT_H +#ifdef HAVE_SYS_MOUNT_H #include #endif -#ifdef HAVE_SYS_STATFS_H +#ifdef HAVE_SYS_STATFS_H #include #endif -#ifdef HAVE_SYS_STATVFS_H +#ifdef HAVE_SYS_STATVFS_H #include #endif -#ifdef HAVE_SYS_VFS_H +#ifdef HAVE_SYS_VFS_H #include #endif @@ -60,9 +66,6 @@ #include #include "pathut.h" -#include "transcode.h" -#include "wipedir.h" -#include "md5ut.h" using namespace std; @@ -71,8 +74,9 @@ using namespace std; void path_slashize(string& s) { for (string::size_type i = 0; i < s.size(); i++) { - if (s[i] == '\\') + if (s[i] == '\\') { s[i] = '/'; + } } } static bool path_strlookslikedrive(const string& s) @@ -82,14 +86,16 @@ static bool path_strlookslikedrive(const string& s) static bool path_hasdrive(const string& s) { - if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':') + if (s.size() >= 2 && isalpha(s[0]) && s[1] == ':') { return true; + } return false; } static bool path_isdriveabs(const string& s) { - if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/') + if (s.size() >= 3 && isalpha(s[0]) && s[1] == ':' && s[2] == '/') { return true; + } return false; } @@ -109,7 +115,7 @@ string path_tchartoutf8(TCHAR *text) int size = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL); if (size > 0) { buffer.resize(size); - WideCharToMultiByte(CP_UTF8, 0, text, -1, + WideCharToMultiByte(CP_UTF8, 0, text, -1, &buffer[0], int(buffer.size()), NULL, NULL); } else { return string(); @@ -130,17 +136,18 @@ string path_thisexecpath() PathRemoveFileSpec(text); #endif string path = path_tchartoutf8(text); - if (path.empty()) + if (path.empty()) { path = "c:/"; + } return path; } string path_wingettempfilename(TCHAR *pref) { - TCHAR buf[(MAX_PATH +1)*sizeof(TCHAR)]; - TCHAR dbuf[(MAX_PATH +1)*sizeof(TCHAR)]; - GetTempPath(MAX_PATH+1, dbuf); + TCHAR buf[(MAX_PATH + 1)*sizeof(TCHAR)]; + TCHAR dbuf[(MAX_PATH + 1)*sizeof(TCHAR)]; + GetTempPath(MAX_PATH + 1, dbuf); GetTempFileName(dbuf, pref, 0, buf); // Windows will have created a temp file, we delete it. string filename = path_tchartoutf8(buf); @@ -150,10 +157,11 @@ string path_wingettempfilename(TCHAR *pref) } #endif - -bool fsocc(const string &path, int *pc, long long *avmbs) +#if defined(HAVE_SYS_MOUNT_H) || defined(HAVE_SYS_STATFS_H) || \ + defined(HAVE_SYS_STATVFS_H) || defined(HAVE_SYS_VFS_H) +bool fsocc(const string& path, int *pc, long long *avmbs) { - static const int FSOCC_MB = 1024*1024; + static const int FSOCC_MB = 1024 * 1024; #ifdef _WIN32 ULARGE_INTEGER freebytesavail; ULARGE_INTEGER totalbytes; @@ -161,21 +169,23 @@ bool fsocc(const string &path, int *pc, long long *avmbs) &totalbytes, NULL)) { return false; } - if (pc) + if (pc) { *pc = int((100 * freebytesavail.QuadPart) / totalbytes.QuadPart); - if (avmbs) + } + if (avmbs) { *avmbs = int(totalbytes.QuadPart / FSOCC_MB); + } return true; #else #ifdef sun struct statvfs buf; if (statvfs(path.c_str(), &buf) != 0) { - return false; + return false; } #else struct statfs buf; if (statfs(path.c_str(), &buf) != 0) { - return false; + return false; } #endif @@ -184,72 +194,26 @@ bool fsocc(const string &path, int *pc, long long *avmbs) #define FSOCC_USED (double(buf.f_blocks - buf.f_bfree)) #define FSOCC_TOTAVAIL (FSOCC_USED + double(buf.f_bavail)) if (FSOCC_TOTAVAIL > 0) { - fpc = 100.0 * FSOCC_USED / FSOCC_TOTAVAIL; + fpc = 100.0 * FSOCC_USED / FSOCC_TOTAVAIL; } - if (pc) + if (pc) { *pc = int(fpc); + } if (avmbs) { - *avmbs = 0; - if (buf.f_bsize > 0) { - int ratio = buf.f_bsize > FSOCC_MB ? buf.f_bsize / FSOCC_MB : - FSOCC_MB / buf.f_bsize; + *avmbs = 0; + if (buf.f_bsize > 0) { + int ratio = buf.f_bsize > FSOCC_MB ? buf.f_bsize / FSOCC_MB : + FSOCC_MB / buf.f_bsize; - *avmbs = buf.f_bsize > FSOCC_MB ? - ((long long)buf.f_bavail) * ratio : - ((long long)buf.f_bavail) / ratio; - } + *avmbs = buf.f_bsize > FSOCC_MB ? + ((long long)buf.f_bavail) * ratio : + ((long long)buf.f_bavail) / ratio; + } } return true; #endif } - -const string& tmplocation() -{ - static string stmpdir; - if (stmpdir.empty()) { - const char *tmpdir = getenv("RECOLL_TMPDIR"); - if (tmpdir == 0) - tmpdir = getenv("TMPDIR"); - if (tmpdir == 0) - tmpdir = getenv("TMP"); - if (tmpdir == 0) - tmpdir = getenv("TEMP"); - if (tmpdir == 0) { -#ifdef _WIN32 - TCHAR bufw[(MAX_PATH+1)*sizeof(TCHAR)]; - GetTempPath(MAX_PATH+1, bufw); - stmpdir = path_tchartoutf8(bufw); -#else - stmpdir = "/tmp"; -#endif - } else { - stmpdir = tmpdir; - } - stmpdir = path_canon(stmpdir); - } - - return stmpdir; -} - -// Location for sample config, filters, etc. (e.g. /usr/share/recoll/) -const string& path_sharedatadir() -{ - static string datadir; - if (datadir.empty()) { -#ifdef _WIN32 - datadir = path_cat(path_thisexecpath(), "Share"); -#else - const char *cdatadir = getenv("RECOLL_DATADIR"); - if (cdatadir == 0) { - // If not in environment, use the compiled-in constant. - datadir = RECOLL_DATADIR; - } else { - datadir = cdatadir; - } -#endif - } - return datadir; -} +#endif // we have found an appropriate include file string path_PATHsep() { @@ -262,150 +226,17 @@ string path_PATHsep() #endif } -bool maketmpdir(string& tdir, string& reason) -{ -#ifndef _WIN32 - tdir = path_cat(tmplocation(), "rcltmpXXXXXX"); - - char *cp = strdup(tdir.c_str()); - if (!cp) { - reason = "maketmpdir: out of memory (for file name !)\n"; - tdir.erase(); - return false; - } - - // There is a race condition between name computation and - // mkdir. try to make sure that we at least don't shoot ourselves - // in the foot -#if !defined(HAVE_MKDTEMP) || defined(_WIN32) - static PTMutexInit mlock; - PTMutexLocker lock(mlock); -#endif - - if (! -#ifdef HAVE_MKDTEMP - mkdtemp(cp) -#else - mktemp(cp) -#endif // HAVE_MKDTEMP - ) { - free(cp); - reason = "maketmpdir: mktemp failed for [" + tdir + "] : " + - strerror(errno); - tdir.erase(); - return false; - } - tdir = cp; - free(cp); -#else // _WIN32 - // There is a race condition between name computation and - // mkdir. try to make sure that we at least don't shoot ourselves - // in the foot - static PTMutexInit mlock; - PTMutexLocker lock(mlock); - tdir = path_wingettempfilename(TEXT("rcltmp")); -#endif - - // At this point the directory does not exist yet except if we used - // mkdtemp - -#if !defined(HAVE_MKDTEMP) || defined(_WIN32) - if (mkdir(tdir.c_str(), 0700) < 0) { - reason = string("maketmpdir: mkdir ") + tdir + " failed"; - tdir.erase(); - return false; - } -#endif - - return true; -} - -TempFileInternal::TempFileInternal(const string& suffix) - : m_noremove(false) -{ - // Because we need a specific suffix, can't use mkstemp - // well. There is a race condition between name computation and - // file creation. try to make sure that we at least don't shoot - // our own selves in the foot. maybe we'll use mkstemps one day. - static PTMutexInit mlock; - PTMutexLocker lock(mlock); - -#ifndef _WIN32 - string filename = path_cat(tmplocation(), "rcltmpfXXXXXX"); - char *cp = strdup(filename.c_str()); - if (!cp) { - m_reason = "Out of memory (for file name !)\n"; - return; - } - - // Using mkstemp this way is awful (bot the suffix adding and - // using mkstemp() instead of mktemp just to avoid the warnings) - int fd; - if ((fd = mkstemp(cp)) < 0) { - free(cp); - m_reason = "TempFileInternal: mkstemp failed\n"; - return; - } - close(fd); - unlink(cp); - filename = cp; - free(cp); -#else - string filename = path_wingettempfilename(TEXT("recoll")); -#endif - - m_filename = filename + suffix; - if (close(open(m_filename.c_str(), O_CREAT|O_EXCL, 0600)) != 0) { - m_reason = string("Could not open/create") + m_filename; - m_filename.erase(); - } -} - -TempFileInternal::~TempFileInternal() -{ - if (!m_filename.empty() && !m_noremove) - unlink(m_filename.c_str()); -} - -TempDir::TempDir() -{ - if (!maketmpdir(m_dirname, m_reason)) { - m_dirname.erase(); - return; - } -} - -TempDir::~TempDir() -{ - if (!m_dirname.empty()) { - (void)wipedir(m_dirname, true, true); - m_dirname.erase(); - } -} - -bool TempDir::wipe() -{ - if (m_dirname.empty()) { - m_reason = "TempDir::wipe: no directory !\n"; - return false; - } - if (wipedir(m_dirname, false, true)) { - m_reason = "TempDir::wipe: wipedir failed\n"; - return false; - } - return true; -} - -void path_catslash(string &s) +void path_catslash(string& s) { #ifdef _WIN32 path_slashize(s); #endif - if (s.empty() || s[s.length() - 1] != '/') - s += '/'; + if (s.empty() || s[s.length() - 1] != '/') { + s += '/'; + } } -string path_cat(const string &s1, const string &s2) +string path_cat(const string& s1, const string& s2) { string res = s1; path_catslash(res); @@ -413,7 +244,7 @@ string path_cat(const string &s1, const string &s2) return res; } -string path_getfather(const string &s) +string path_getfather(const string& s) { string father = s; #ifdef _WIN32 @@ -421,62 +252,69 @@ string path_getfather(const string &s) #endif // ?? - if (father.empty()) - return "./"; + if (father.empty()) { + return "./"; + } - if (path_isroot(father)) + if (path_isroot(father)) { return father; - + } + if (father[father.length() - 1] == '/') { - // Input ends with /. Strip it, root special case was tested above - father.erase(father.length()-1); + // Input ends with /. Strip it, root special case was tested above + father.erase(father.length() - 1); } string::size_type slp = father.rfind('/'); - if (slp == string::npos) - return "./"; + if (slp == string::npos) { + return "./"; + } father.erase(slp); path_catslash(father); return father; } -string path_getsimple(const string &s) +string path_getsimple(const string& s) { string simple = s; #ifdef _WIN32 path_slashize(simple); #endif - if (simple.empty()) - return simple; + if (simple.empty()) { + return simple; + } string::size_type slp = simple.rfind('/'); - if (slp == string::npos) - return simple; + if (slp == string::npos) { + return simple; + } - simple.erase(0, slp+1); + simple.erase(0, slp + 1); return simple; } -string path_basename(const string &s, const string &suff) +string path_basename(const string& s, const string& suff) { string simple = path_getsimple(s); string::size_type pos = string::npos; if (suff.length() && simple.length() > suff.length()) { - pos = simple.rfind(suff); - if (pos != string::npos && pos + suff.length() == simple.length()) - return simple.substr(0, pos); - } + pos = simple.rfind(suff); + if (pos != string::npos && pos + suff.length() == simple.length()) { + return simple.substr(0, pos); + } + } return simple; } string path_suffix(const string& s) { string::size_type dotp = s.rfind('.'); - if (dotp == string::npos) - return string(); - return s.substr(dotp+1); + if (dotp == string::npos) { + return string(); + } + return s.substr(dotp + 1); } string path_home() @@ -507,11 +345,12 @@ string path_home() struct passwd *entry = getpwuid(uid); if (entry == 0) { - const char *cp = getenv("HOME"); - if (cp) - return cp; - else - return "/"; + const char *cp = getenv("HOME"); + if (cp) { + return cp; + } else { + return "/"; + } } string homedir = entry->pw_dir; @@ -539,29 +378,31 @@ string path_homedata() #endif } -string path_tildexpand(const string &s) +string path_tildexpand(const string& s) { - if (s.empty() || s[0] != '~') - return s; + if (s.empty() || s[0] != '~') { + return s; + } string o = s; #ifdef _WIN32 path_slashize(o); #endif - + if (s.length() == 1) { - o.replace(0, 1, path_home()); - } else if (s[1] == '/') { - o.replace(0, 2, path_home()); + o.replace(0, 1, path_home()); + } else if (s[1] == '/') { + o.replace(0, 2, path_home()); } else { - string::size_type pos = s.find('/'); + string::size_type pos = s.find('/'); string::size_type l = (pos == string::npos) ? s.length() - 1 : pos - 1; #ifdef _WIN32 // Dont know what this means. Just replace with HOME - o.replace(0, l+1, path_home()); + o.replace(0, l + 1, path_home()); #else - struct passwd *entry = getpwnam(s.substr(1, l).c_str()); - if (entry) - o.replace(0, l+1, entry->pw_dir); + struct passwd *entry = getpwnam(s.substr(1, l).c_str()); + if (entry) { + o.replace(0, l + 1, entry->pw_dir); + } #endif } return o; @@ -569,39 +410,42 @@ string path_tildexpand(const string &s) bool path_isroot(const string& path) { - if (path.size() == 1 && path[0] == '/') + if (path.size() == 1 && path[0] == '/') { return true; + } #ifdef _WIN32 if (path.size() == 3 && isalpha(path[0]) && path[1] == ':' && - (path[2] == '/' || path[2] == '\\')) + (path[2] == '/' || path[2] == '\\')) { return true; + } #endif return false; } -bool path_isabsolute(const string &path) +bool path_isabsolute(const string& path) { if (!path.empty() && (path[0] == '/' #ifdef _WIN32 || path_isdriveabs(path) #endif - )) { + )) { return true; - } + } return false; } - -string path_absolute(const string &is) + +string path_absolute(const string& is) { - if (is.length() == 0) - return is; + if (is.length() == 0) { + return is; + } string s = is; if (!path_isabsolute(s)) { - char buf[MAXPATHLEN]; - if (!getcwd(buf, MAXPATHLEN)) { - return string(); - } - s = path_cat(string(buf), s); + char buf[MAXPATHLEN]; + if (!getcwd(buf, MAXPATHLEN)) { + return string(); + } + s = path_cat(string(buf), s); #ifdef _WIN32 path_slashize(s); #endif @@ -610,10 +454,11 @@ string path_absolute(const string &is) } #include -string path_canon(const string &is, const string* cwd) +string path_canon(const string& is, const string* cwd) { - if (is.length() == 0) - return is; + if (is.length() == 0) { + return is; + } string s = is; #ifdef _WIN32 path_slashize(s); @@ -624,34 +469,35 @@ string path_canon(const string &is, const string* cwd) #endif if (!path_isabsolute(s)) { - char buf[MAXPATHLEN]; - const char *cwdp = buf; - if (cwd) { - cwdp = cwd->c_str(); - } else { - if (!getcwd(buf, MAXPATHLEN)) { - return string(); - } - } - s = path_cat(string(cwdp), s); + char buf[MAXPATHLEN]; + const char *cwdp = buf; + if (cwd) { + cwdp = cwd->c_str(); + } else { + if (!getcwd(buf, MAXPATHLEN)) { + return string(); + } + } + s = path_cat(string(cwdp), s); } vector elems; stringToTokens(s, elems, "/"); vector cleaned; - for (vector::const_iterator it = elems.begin(); - it != elems.end(); it++){ - if (*it == "..") { - if (!cleaned.empty()) - cleaned.pop_back(); - } else if (it->empty() || *it == ".") { - } else { - cleaned.push_back(*it); - } + for (vector::const_iterator it = elems.begin(); + it != elems.end(); it++) { + if (*it == "..") { + if (!cleaned.empty()) { + cleaned.pop_back(); + } + } else if (it->empty() || *it == ".") { + } else { + cleaned.push_back(*it); + } } string ret; if (!cleaned.empty()) { - for (vector::const_iterator it = cleaned.begin(); - it != cleaned.end(); it++) { + for (vector::const_iterator it = cleaned.begin(); + it != cleaned.end(); it++) { ret += "/"; #ifdef _WIN32 if (it == cleaned.begin() && path_strlookslikedrive(*it)) { @@ -659,10 +505,10 @@ string path_canon(const string &is, const string* cwd) ret.clear(); } #endif - ret += *it; - } + ret += *it; + } } else { - ret = "/"; + ret = "/"; } return ret; } @@ -673,21 +519,22 @@ bool makepath(const string& ipath) vector elems; stringToTokens(path, elems, "/"); path = "/"; - for (vector::const_iterator it = elems.begin(); - it != elems.end(); it++){ + for (vector::const_iterator it = elems.begin(); + it != elems.end(); it++) { #ifdef _WIN32 - if (it == elems.begin() && path_strlookslikedrive(*it)) + if (it == elems.begin() && path_strlookslikedrive(*it)) { path = ""; + } #endif - path += *it; - // Not using path_isdir() here, because this cant grok symlinks - // If we hit an existing file, no worry, mkdir will just fail. - if (access(path.c_str(), 0) != 0) { - if (mkdir(path.c_str(), 0700) != 0) { - return false; - } - } - path += "/"; + path += *it; + // Not using path_isdir() here, because this cant grok symlinks + // If we hit an existing file, no worry, mkdir will just fail. + if (access(path.c_str(), 0) != 0) { + if (mkdir(path.c_str(), 0700) != 0) { + return false; + } + } + path += "/"; } return true; } @@ -695,30 +542,35 @@ bool makepath(const string& ipath) bool path_isdir(const string& path) { struct stat st; - if (lstat(path.c_str(), &st) < 0) - return false; - if (S_ISDIR(st.st_mode)) - return true; + if (lstat(path.c_str(), &st) < 0) { + return false; + } + if (S_ISDIR(st.st_mode)) { + return true; + } return false; } long long path_filesize(const string& path) { struct stat st; - if (stat(path.c_str(), &st) < 0) - return -1; + if (stat(path.c_str(), &st) < 0) { + return -1; + } return (long long)st.st_size; } int path_fileprops(const std::string path, struct stat *stp, bool follow) { - if (!stp) + if (!stp) { return -1; + } memset(stp, 0, sizeof(struct stat)); struct stat mst; int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst); - if (ret != 0) + if (ret != 0) { return ret; + } stp->st_size = mst.st_size; stp->st_mode = mst.st_mode; stp->st_mtime = mst.st_mtime; @@ -751,7 +603,7 @@ bool path_exists(const string& path) 29 ) 2A * 2B + -2C , +2C , 2D - 2E . 2F / @@ -788,32 +640,32 @@ string url_encode(const string& url, string::size_type offs) string out = url.substr(0, offs); const char *cp = url.c_str(); for (string::size_type i = offs; i < url.size(); i++) { - unsigned int c; - const char *h = "0123456789ABCDEF"; - c = cp[i]; - if (c <= 0x20 || - c >= 0x7f || - c == '"' || - c == '#' || - c == '%' || - c == ';' || - c == '<' || - c == '>' || - c == '?' || - c == '[' || - c == '\\' || - c == ']' || - c == '^' || - c == '`' || - c == '{' || - c == '|' || - c == '}' ) { - out += '%'; - out += h[(c >> 4) & 0xf]; - out += h[c & 0xf]; - } else { - out += char(c); - } + unsigned int c; + const char *h = "0123456789ABCDEF"; + c = cp[i]; + if (c <= 0x20 || + c >= 0x7f || + c == '"' || + c == '#' || + c == '%' || + c == ';' || + c == '<' || + c == '>' || + c == '?' || + c == '[' || + c == '\\' || + c == ']' || + c == '^' || + c == '`' || + c == '{' || + c == '|' || + c == '}') { + out += '%'; + out += h[(c >> 4) & 0xf]; + out += h[c & 0xf]; + } else { + out += char(c); + } } return out; } @@ -822,45 +674,24 @@ string url_gpath(const string& url) { // Remove the access schema part (or whatever it's called) string::size_type colon = url.find_first_of(":"); - if (colon == string::npos || colon == url.size() - 1) + if (colon == string::npos || colon == url.size() - 1) { return url; + } // If there are non-alphanum chars before the ':', then there // probably is no scheme. Whatever... for (string::size_type i = 0; i < colon; i++) { - if (!isalnum(url.at(i))) + if (!isalnum(url.at(i))) { return url; + } } // In addition we canonize the path to remove empty host parts // (for compatibility with older versions of recoll where file:// // was hardcoded, but the local path was used for doc // identification. - return path_canon(url.substr(colon+1)); + return path_canon(url.substr(colon + 1)); } -string url_gpathS(const string& url) -{ -#ifdef _WIN32 - string u = url_gpath(url); - string nu; - if (path_hasdrive(u)) { - nu.append(1, '/'); - nu.append(1, u[0]); - if (path_isdriveabs(u)) { - nu.append(u.substr(2)); - } else { - // This should be an error really - nu.append(1, '/'); - nu.append(u.substr(2)); - } - } - return nu; -#else - return url_gpath(url); -#endif -} - - string url_parentfolder(const string& url) { // In general, the parent is the directory above the full path @@ -872,28 +703,20 @@ string url_parentfolder(const string& url) parenturl = url_gpath(url); } return isfileurl ? string("file://") + parenturl : - string("http://") + parenturl; + string("http://") + parenturl; } -string path_defaultrecollconfsubdir() -{ -#ifdef _WIN32 - return "Recoll"; -#else - return ".recoll"; -#endif -} - // Convert to file path if url is like file: // Note: this only works with our internal pseudo-urls which are not // encoded/escaped string fileurltolocalpath(string url) { - if (url.find("file://") == 0) + if (url.find("file://") == 0) { url = url.substr(7, string::npos); - else + } else { return string(); + } #ifdef _WIN32 // Absolute file urls are like: file:///c:/mydir/... @@ -908,23 +731,26 @@ string fileurltolocalpath(string url) // part after # if it is preceded by .html string::size_type pos; if ((pos = url.rfind(".html#")) != string::npos) { - url.erase(pos+5); + url.erase(pos + 5); } else if ((pos = url.rfind(".htm#")) != string::npos) { - url.erase(pos+4); + url.erase(pos + 4); } return url; } +static const string cstr_fileu("file://"); + string path_pathtofileurl(const string& path) { - // We're supposed to receive a canonic absolute path, but on windows we - // may need to add a '/' in front of the drive spec - string url(cstr_fileu); - if (path.empty() || path[0] != '/') - url.push_back('/'); - url += path; - return url; + // We're supposed to receive a canonic absolute path, but on windows we + // may need to add a '/' in front of the drive spec + string url(cstr_fileu); + if (path.empty() || path[0] != '/') { + url.push_back('/'); + } + url += path; + return url; } bool urlisfileurl(const string& url) @@ -932,17 +758,6 @@ bool urlisfileurl(const string& url) return url.find("file://") == 0; } -// Printable url: this is used to transcode from the system charset -// into either utf-8 if transcoding succeeds, or url-encoded -bool printableUrl(const string &fcharset, const string &in, string &out) -{ - int ecnt = 0; - if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) { - out = url_encode(in, 7); - } - return true; -} - bool readdir(const string& dir, string& reason, set& entries) { struct stat st; @@ -951,37 +766,40 @@ bool readdir(const string& dir, string& reason, set& entries) DIR *d = 0; statret = lstat(dir.c_str(), &st); if (statret == -1) { - msg << "readdir: cant stat " << dir << " errno " << errno; - goto out; + msg << "readdir: cant stat " << dir << " errno " << errno; + goto out; } if (!S_ISDIR(st.st_mode)) { - msg << "readdir: " << dir << " not a directory"; - goto out; + msg << "readdir: " << dir << " not a directory"; + goto out; } if (access(dir.c_str(), R_OK) < 0) { - msg << "readdir: no read access to " << dir; - goto out; + msg << "readdir: no read access to " << dir; + goto out; } d = opendir(dir.c_str()); if (d == 0) { - msg << "readdir: cant opendir " << dir << ", errno " << errno; - goto out; + msg << "readdir: cant opendir " << dir << ", errno " << errno; + goto out; } struct dirent *ent; while ((ent = readdir(d)) != 0) { - if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) - continue; - entries.insert(ent->d_name); + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) { + continue; + } + entries.insert(ent->d_name); } out: - if (d) - closedir(d); + if (d) { + closedir(d); + } reason = msg.str(); - if (reason.empty()) - return true; + if (reason.empty()) { + return true; + } return false; } @@ -992,36 +810,40 @@ out: // alone. Pidfile::~Pidfile() { - if (m_fd >= 0) - ::close(m_fd); + if (m_fd >= 0) { + ::close(m_fd); + } m_fd = -1; } pid_t Pidfile::read_pid() { int fd = ::open(m_path.c_str(), O_RDONLY); - if (fd == -1) - return (pid_t)-1; + if (fd == -1) { + return (pid_t) - 1; + } char buf[16]; int i = read(fd, buf, sizeof(buf) - 1); ::close(fd); - if (i <= 0) - return (pid_t)-1; + if (i <= 0) { + return (pid_t) - 1; + } buf[i] = '\0'; char *endptr; pid_t pid = strtol(buf, &endptr, 10); - if (endptr != &buf[i]) - return (pid_t)-1; + if (endptr != &buf[i]) { + return (pid_t) - 1; + } return pid; } int Pidfile::flopen() { const char *path = m_path.c_str(); - if ((m_fd = ::open(path, O_RDWR|O_CREAT, 0644)) == -1) { - m_reason = "Open failed: [" + m_path + "]: " + strerror(errno); - return -1; + if ((m_fd = ::open(path, O_RDWR | O_CREAT, 0644)) == -1) { + m_reason = "Open failed: [" + m_path + "]: " + strerror(errno); + return -1; } #ifdef sun @@ -1031,34 +853,34 @@ int Pidfile::flopen() lockdata.l_type = F_WRLCK; lockdata.l_whence = SEEK_SET; if (fcntl(m_fd, F_SETLK, &lockdata) != 0) { - int serrno = errno; - (void)::close(m_fd); - errno = serrno; - m_reason = "fcntl lock failed"; - return -1; + int serrno = errno; + (void)::close(m_fd); + errno = serrno; + m_reason = "fcntl lock failed"; + return -1; } #else #ifdef _WIN32 - return 0; + return 0; #else int operation = LOCK_EX | LOCK_NB; if (flock(m_fd, operation) == -1) { - int serrno = errno; - (void)::close(m_fd); - errno = serrno; - m_reason = "flock failed"; - return -1; + int serrno = errno; + (void)::close(m_fd); + errno = serrno; + m_reason = "flock failed"; + return -1; } #endif // ! win32 #endif // ! sun if (ftruncate(m_fd, 0) != 0) { - /* can't happen [tm] */ - int serrno = errno; - (void)::close(m_fd); - errno = serrno; - m_reason = "ftruncate failed"; - return -1; + /* can't happen [tm] */ + int serrno = errno; + (void)::close(m_fd); + errno = serrno; + m_reason = "ftruncate failed"; + return -1; } return 0; } @@ -1066,7 +888,7 @@ int Pidfile::flopen() pid_t Pidfile::open() { if (flopen() < 0) { - return read_pid(); + return read_pid(); } return (pid_t)0; } @@ -1075,15 +897,15 @@ int Pidfile::write_pid() { /* truncate to allow multiple calls */ if (ftruncate(m_fd, 0) == -1) { - m_reason = "ftruncate failed"; - return -1; + m_reason = "ftruncate failed"; + return -1; } char pidstr[20]; sprintf(pidstr, "%u", int(getpid())); lseek(m_fd, 0, 0); if (::write(m_fd, pidstr, strlen(pidstr)) != (ssize_t)strlen(pidstr)) { - m_reason = "write failed"; - return -1; + m_reason = "write failed"; + return -1; } return 0; } @@ -1098,79 +920,10 @@ int Pidfile::remove() return unlink(m_path.c_str()); } - -// Freedesktop standard paths for cache directory (thumbnails are now in there) -static const string& xdgcachedir() -{ - static string xdgcache; - if (xdgcache.empty()) { - const char *cp = getenv("XDG_CACHE_HOME"); - if (cp == 0) - xdgcache = path_cat(path_home(), ".cache"); - else - xdgcache = string(cp); - } - return xdgcache; -} -static const string& thumbnailsdir() -{ - static string thumbnailsd; - if (thumbnailsd.empty()) { - thumbnailsd = path_cat(xdgcachedir(), "thumbnails"); - if (access(thumbnailsd.c_str(), 0) != 0) { - thumbnailsd = path_cat(path_home(), ".thumbnails"); - } - } - return thumbnailsd; -} - -// Place for 256x256 files -static const string thmbdirlarge = "large"; -// 128x128 -static const string thmbdirnormal = "normal"; - -static void thumbname(const string& url, string& name) -{ - string digest; - string l_url = url_encode(url); - MD5String(l_url, digest); - MD5HexPrint(digest, name); - name += ".png"; -} - -bool thumbPathForUrl(const string& url, int size, string& path) -{ - string name; - thumbname(url, name); - if (size <= 128) { - path = path_cat(thumbnailsdir(), thmbdirnormal); - path = path_cat(path, name); - if (access(path.c_str(), R_OK) == 0) { - return true; - } - } - path = path_cat(thumbnailsdir(), thmbdirlarge); - path = path_cat(path, name); - if (access(path.c_str(), R_OK) == 0) { - return true; - } - - // File does not exist. Path corresponds to the large version at this point, - // fix it if needed. - if (size <= 128) { - path = path_cat(path_home(), thmbdirnormal); - path = path_cat(path, name); - } - return false; -} - // Call funcs that need static init (not initially reentrant) void pathut_init_mt() { path_home(); - tmplocation(); - thumbnailsdir(); - path_sharedatadir(); } @@ -1185,8 +938,9 @@ void path_to_thumb(const string& _input) { string input(_input); // Make absolute path if needed - if (input[0] != '/') + if (input[0] != '/') { input = path_absolute(input); + } input = string("file://") + path_canon(input); @@ -1197,126 +951,133 @@ void path_to_thumb(const string& _input) } const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2", - "/dir1/dir2", - "./dir", "./dir1/", "dir", "../dir", "/dir/toto.c", - "/dir/.c", "/dir/toto.txt", "toto.txt1" -}; + "/dir1/dir2", + "./dir", "./dir1/", "dir", "../dir", "/dir/toto.c", + "/dir/.c", "/dir/toto.txt", "toto.txt1" + }; const string ttvec[] = {"/dir", "", "~", "~/sub", "~root", "~root/sub", - "~nosuch", "~nosuch/sub"}; + "~nosuch", "~nosuch/sub" + }; int nttvec = sizeof(ttvec) / sizeof(string); const char *thisprog; int main(int argc, const char **argv) { - thisprog = *argv++;argc--; + thisprog = *argv++; + argc--; string s; vector::const_iterator it; #if 0 - for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) { - cout << tstvec[i] << " Father " << path_getfather(tstvec[i]) << endl; + for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) { + cout << tstvec[i] << " Father " << path_getfather(tstvec[i]) << endl; } - for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) { - cout << tstvec[i] << " Simple " << path_getsimple(tstvec[i]) << endl; + for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) { + cout << tstvec[i] << " Simple " << path_getsimple(tstvec[i]) << endl; } - for (unsigned int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) { - cout << tstvec[i] << " Basename " << - path_basename(tstvec[i], ".txt") << endl; + for (unsigned int i = 0; i < sizeof(tstvec) / sizeof(char *); i++) { + cout << tstvec[i] << " Basename " << + path_basename(tstvec[i], ".txt") << endl; } #endif #if 0 for (int i = 0; i < nttvec; i++) { - cout << "tildexp: '" << ttvec[i] << "' -> '" << - path_tildexpand(ttvec[i]) << "'" << endl; + cout << "tildexp: '" << ttvec[i] << "' -> '" << + path_tildexpand(ttvec[i]) << "'" << endl; } #endif #if 0 - const string canontst[] = {"/dir1/../../..", "/////", "", - "/dir1/../../.././/////dir2///////", - "../../", - "../../../../../../../../../../" - }; + const string canontst[] = {"/dir1/../../..", "/////", "", + "/dir1/../../.././/////dir2///////", + "../../", + "../../../../../../../../../../" + }; unsigned int nttvec = sizeof(canontst) / sizeof(string); for (unsigned int i = 0; i < nttvec; i++) { - cout << "canon: '" << canontst[i] << "' -> '" << - path_canon(canontst[i]) << "'" << endl; + cout << "canon: '" << canontst[i] << "' -> '" << + path_canon(canontst[i]) << "'" << endl; } -#endif +#endif #if 0 if (argc != 2) { - cerr << "Usage: trpathut " << endl; - exit(1); + cerr << "Usage: trpathut " << endl; + exit(1); } - string dir = *argv++;argc--; - string pattern = *argv++;argc--; + string dir = *argv++; + argc--; + string pattern = *argv++; + argc--; vector matched = path_dirglob(dir, pattern); - for (it = matched.begin(); it != matched.end();it++) { - cout << *it << endl; + for (it = matched.begin(); it != matched.end(); it++) { + cout << *it << endl; } #endif #if 0 if (argc != 1) { - fprintf(stderr, "Usage: fsocc: trpathut \n"); - exit(1); + fprintf(stderr, "Usage: fsocc: trpathut \n"); + exit(1); } - string path = *argv++;argc--; + string path = *argv++; + argc--; - int pc; - long long blocks; - if (!fsocc(path, &pc, &blocks)) { - fprintf(stderr, "fsocc failed\n"); - return 1; - } - printf("pc %d, megabytes %ld\n", pc, blocks); + int pc; + long long blocks; + if (!fsocc(path, &pc, &blocks)) { + fprintf(stderr, "fsocc failed\n"); + return 1; + } + printf("pc %d, megabytes %ld\n", pc, blocks); #endif #if 0 - Pidfile pidfile("/tmp/pathutpidfile"); - pid_t pid; - if ((pid = pidfile.open()) != 0) { - cerr << "open failed. reason: " << pidfile.getreason() << - " return " << pid << endl; - exit(1); - } - pidfile.write_pid(); - sleep(10); - pidfile.close(); - pidfile.remove(); + Pidfile pidfile("/tmp/pathutpidfile"); + pid_t pid; + if ((pid = pidfile.open()) != 0) { + cerr << "open failed. reason: " << pidfile.getreason() << + " return " << pid << endl; + exit(1); + } + pidfile.write_pid(); + sleep(10); + pidfile.close(); + pidfile.remove(); #endif #if 0 - if (argc > 1) { - cerr << "Usage: thumbpath " << endl; - exit(1); - } - string input; - if (argc == 1) { - input = *argv++; - if (input.empty()) { - cerr << "Usage: thumbpath " << endl; - exit(1); - } - path_to_thumb(input); - } else { - while (getline(cin, input)) - path_to_thumb(input); - } + if (argc > 1) { + cerr << "Usage: thumbpath " << endl; + exit(1); + } + string input; + if (argc == 1) { + input = *argv++; + if (input.empty()) { + cerr << "Usage: thumbpath " << endl; + exit(1); + } + path_to_thumb(input); + } else { + while (getline(cin, input)) { + path_to_thumb(input); + } + } - - exit(0); + + exit(0); #endif #if 0 if (argc != 1) { - cerr << "Usage: trpathut " << endl; - exit(1); + cerr << "Usage: trpathut " << endl; + exit(1); } - string fn = *argv++;argc--; + string fn = *argv++; + argc--; string ext = path_suffix(fn); cout << "Suffix: [" << ext << "]" << endl; return 0; @@ -1324,10 +1085,11 @@ int main(int argc, const char **argv) #if 1 if (argc != 1) { - cerr << "Usage: trpathut url" << endl; - exit(1); + cerr << "Usage: trpathut url" << endl; + exit(1); } - string url = *argv++;argc--; + string url = *argv++; + argc--; cout << "File: [" << fileurltolocalpath(url) << "]\n"; return 0; diff --git a/src/utils/pathut.h b/src/utils/pathut.h index 10991edf..cf5ecd8d 100644 --- a/src/utils/pathut.h +++ b/src/utils/pathut.h @@ -16,63 +16,54 @@ */ #ifndef _PATHUT_H_INCLUDED_ #define _PATHUT_H_INCLUDED_ -#include "autoconfig.h" #include #include #include -#include MEMORY_INCLUDE +// Must be called in main thread before starting other threads +extern void pathut_init_mt(); /// Add a / at the end if none there yet. -extern void path_catslash(std::string &s); +extern void path_catslash(std::string& s); /// Concatenate 2 paths -extern std::string path_cat(const std::string &s1, const std::string &s2); +extern std::string path_cat(const std::string& s1, const std::string& s2); /// Get the simple file name (get rid of any directory path prefix -extern std::string path_getsimple(const std::string &s); +extern std::string path_getsimple(const std::string& s); /// Simple file name + optional suffix stripping -extern std::string path_basename(const std::string &s, - const std::string &suff = std::string()); +extern std::string path_basename(const std::string& s, + const std::string& suff = std::string()); /// Component after last '.' -extern std::string path_suffix(const std::string &s); +extern std::string path_suffix(const std::string& s); /// Get the father directory -extern std::string path_getfather(const std::string &s); +extern std::string path_getfather(const std::string& s); /// Get the current user's home directory extern std::string path_home(); -/// Expand ~ at the beginning of std::string -extern std::string path_tildexpand(const std::string &s); +/// Expand ~ at the beginning of std::string +extern std::string path_tildexpand(const std::string& s); /// Use getcwd() to make absolute path if needed. Beware: ***this can fail*** /// we return an empty path in this case. -extern std::string path_absolute(const std::string &s); +extern std::string path_absolute(const std::string& s); /// Clean up path by removing duplicated / and resolving ../ + make it absolute -extern std::string path_canon(const std::string &s, const std::string *cwd=0); +extern std::string path_canon(const std::string& s, const std::string *cwd = 0); /// Use glob(3) to return the file names matching pattern inside dir -extern std::vector path_dirglob(const std::string &dir, - const std::string pattern); +extern std::vector path_dirglob(const std::string& dir, + const std::string pattern); /// Encode according to rfc 1738 -extern std::string url_encode(const std::string& url, - std::string::size_type offs = 0); -/// Transcode to utf-8 if possible or url encoding, for display. -extern bool printableUrl(const std::string &fcharset, - const std::string &in, std::string &out); +extern std::string url_encode(const std::string& url, + std::string::size_type offs = 0); //// Convert to file path if url is like file://. This modifies the //// input (and returns a copy for convenience) extern std::string fileurltolocalpath(std::string url); /// Test for file:/// url extern bool urlisfileurl(const std::string& url); -/// +/// extern std::string url_parentfolder(const std::string& url); /// Return the host+path part of an url. This is not a general /// routine, it does the right thing only in the recoll context extern std::string url_gpath(const std::string& url); -/// Same but, in the case of a Windows local path, also turn "c:/" into -/// "/c/" This should be used only for splitting the path in rcldb, it -/// would better be local in there, but I prefer to keep all the -/// system-specific path stuff in pathut -extern std::string url_gpathS(const std::string& url); - /// Stat parameter and check if it's a directory extern bool path_isdir(const std::string& path); @@ -92,37 +83,28 @@ extern int path_fileprops(const std::string path, struct stat *stp, /// Check that path is traversable and last element exists /// Returns true if last elt could be checked to exist. False may mean that -/// the file/dir does not exist or that an error occurred. +/// the file/dir does not exist or that an error occurred. extern bool path_exists(const std::string& path); /// Return separator for PATH environment variable extern std::string path_PATHsep(); /// Dump directory -extern bool readdir(const std::string& dir, std::string& reason, - std::set& entries); +extern bool readdir(const std::string& dir, std::string& reason, + std::set& entries); /** A small wrapper around statfs et al, to return percentage of disk - occupation */ -bool fsocc(const std::string &path, int *pc, // Percent occupied - long long *avmbs = 0 // Mbs available to non-superuser. Mb=1024*1024 - ); - -/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp -extern const std::string& tmplocation(); - -/// Create temporary directory (inside the temp location) -extern bool maketmpdir(std::string& tdir, std::string& reason); + occupation + @param[output] pc percent occupied + @param[output] avmbs Mbs available to non-superuser. Mb=1024*1024 +*/ +bool fsocc(const std::string& path, int *pc, long long *avmbs = 0); /// mkdir -p extern bool makepath(const std::string& path); -/// Sub-directory for default recoll config (e.g: .recoll) -extern std::string path_defaultrecollconfsubdir(); /// Where we create the user data subdirs extern std::string path_homedata(); -/// e.g. /usr/share/recoll. Depends on OS and config -extern const std::string& path_sharedatadir(); /// Test if path is absolute extern bool path_isabsolute(const std::string& s); @@ -137,58 +119,12 @@ extern std::string path_pathtofileurl(const std::string& path); void path_slashize(std::string& s); #endif -/// Temporary file class -class TempFileInternal { -public: - TempFileInternal(const std::string& suffix); - ~TempFileInternal(); - const char *filename() - { - return m_filename.c_str(); - } - const std::string &getreason() - { - return m_reason; - } - void setnoremove(bool onoff) - { - m_noremove = onoff; - } - bool ok() - { - return !m_filename.empty(); - } -private: - std::string m_filename; - std::string m_reason; - bool m_noremove; -}; - -typedef STD_SHARED_PTR TempFile; - -/// Temporary directory class. Recursively deleted by destructor. -class TempDir { -public: - TempDir(); - ~TempDir(); - const char *dirname() {return m_dirname.c_str();} - const std::string &getreason() {return m_reason;} - bool ok() {return !m_dirname.empty();} - /// Recursively delete contents but not self. - bool wipe(); -private: - std::string m_dirname; - std::string m_reason; - TempDir(const TempDir &) {} - TempDir& operator=(const TempDir &) {return *this;}; -}; - /// Lock/pid file class. This is quite close to the pidfile_xxx /// utilities in FreeBSD with a bit more encapsulation. I'd have used /// the freebsd code if it was available elsewhere class Pidfile { public: - Pidfile(const std::string& path) : m_path(path), m_fd(-1) {} + Pidfile(const std::string& path) : m_path(path), m_fd(-1) {} ~Pidfile(); /// Open/create the pid file. /// @return 0 if ok, > 0 for pid of existing process, -1 for other error. @@ -200,7 +136,9 @@ public: int close(); /// Delete the pid file int remove(); - const std::string& getreason() {return m_reason;} + const std::string& getreason() { + return m_reason; + } private: std::string m_path; int m_fd; @@ -209,14 +147,4 @@ private: int flopen(); }; - - -// Freedesktop thumbnail standard path routine -// On return, path will have the appropriate value in all cases, -// returns true if the file already exists -extern bool thumbPathForUrl(const std::string& url, int size, std::string& path); - -// Must be called in main thread before starting other threads -extern void pathut_init_mt(); - #endif /* _PATHUT_H_INCLUDED_ */ diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp new file mode 100644 index 00000000..cd4778b8 --- /dev/null +++ b/src/utils/rclutil.cpp @@ -0,0 +1,411 @@ +/* Copyright (C) 2016 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef TEST_RCLUTIL +#include "autoconfig.h" + +#include +#include +#include +#include "safefcntl.h" +#include "safeunistd.h" +#include "dirent.h" +#include "cstr.h" +#ifdef _WIN32 +#include "safewindows.h" +#else +#include +#include +#include +#endif +#include +#include +#include +#include "safesysstat.h" +#include "ptmutex.h" + +#include "rclutil.h" +#include "pathut.h" +#include "wipedir.h" +#include "transcode.h" +#include "md5ut.h" + +using namespace std; + + +void map_ss_cp_noshr(const map s, map *d) +{ + for (map::const_iterator it = s.begin(); + it != s.end(); it++) { + d->insert( + pair(string(it->first.begin(), it->first.end()), + string(it->second.begin(), it->second.end()))); + } +} + +string path_defaultrecollconfsubdir() +{ +#ifdef _WIN32 + return "Recoll"; +#else + return ".recoll"; +#endif +} + +// Location for sample config, filters, etc. (e.g. /usr/share/recoll/) +const string& path_pkgdatadir() +{ + static string datadir; + if (datadir.empty()) { +#ifdef _WIN32 + datadir = path_cat(path_thisexecpath(), "Share"); +#else + const char *cdatadir = getenv("RECOLL_DATADIR"); + if (cdatadir == 0) { + // If not in environment, use the compiled-in constant. + datadir = RECOLL_DATADIR; + } else { + datadir = cdatadir; + } +#endif + } + return datadir; +} + +// Printable url: this is used to transcode from the system charset +// into either utf-8 if transcoding succeeds, or url-encoded +bool printableUrl(const string& fcharset, const string& in, string& out) +{ + int ecnt = 0; + if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) { + out = url_encode(in, 7); + } + return true; +} + +string url_gpathS(const string& url) +{ +#ifdef _WIN32 + string u = url_gpath(url); + string nu; + if (path_hasdrive(u)) { + nu.append(1, '/'); + nu.append(1, u[0]); + if (path_isdriveabs(u)) { + nu.append(u.substr(2)); + } else { + // This should be an error really + nu.append(1, '/'); + nu.append(u.substr(2)); + } + } + return nu; +#else + return url_gpath(url); +#endif +} + +const string& tmplocation() +{ + static string stmpdir; + if (stmpdir.empty()) { + const char *tmpdir = getenv("RECOLL_TMPDIR"); + if (tmpdir == 0) { + tmpdir = getenv("TMPDIR"); + } + if (tmpdir == 0) { + tmpdir = getenv("TMP"); + } + if (tmpdir == 0) { + tmpdir = getenv("TEMP"); + } + if (tmpdir == 0) { +#ifdef _WIN32 + TCHAR bufw[(MAX_PATH + 1)*sizeof(TCHAR)]; + GetTempPath(MAX_PATH + 1, bufw); + stmpdir = path_tchartoutf8(bufw); +#else + stmpdir = "/tmp"; +#endif + } else { + stmpdir = tmpdir; + } + stmpdir = path_canon(stmpdir); + } + + return stmpdir; +} + +bool maketmpdir(string& tdir, string& reason) +{ +#ifndef _WIN32 + tdir = path_cat(tmplocation(), "rcltmpXXXXXX"); + + char *cp = strdup(tdir.c_str()); + if (!cp) { + reason = "maketmpdir: out of memory (for file name !)\n"; + tdir.erase(); + return false; + } + + // There is a race condition between name computation and + // mkdir. try to make sure that we at least don't shoot ourselves + // in the foot +#if !defined(HAVE_MKDTEMP) || defined(_WIN32) + static PTMutexInit mlock; + PTMutexLocker lock(mlock); +#endif + + if (! +#ifdef HAVE_MKDTEMP + mkdtemp(cp) +#else + mktemp(cp) +#endif // HAVE_MKDTEMP + ) { + free(cp); + reason = "maketmpdir: mktemp failed for [" + tdir + "] : " + + strerror(errno); + tdir.erase(); + return false; + } + tdir = cp; + free(cp); +#else // _WIN32 + // There is a race condition between name computation and + // mkdir. try to make sure that we at least don't shoot ourselves + // in the foot + static PTMutexInit mlock; + PTMutexLocker lock(mlock); + tdir = path_wingettempfilename(TEXT("rcltmp")); +#endif + + // At this point the directory does not exist yet except if we used + // mkdtemp + +#if !defined(HAVE_MKDTEMP) || defined(_WIN32) + if (mkdir(tdir.c_str(), 0700) < 0) { + reason = string("maketmpdir: mkdir ") + tdir + " failed"; + tdir.erase(); + return false; + } +#endif + + return true; +} + +TempFileInternal::TempFileInternal(const string& suffix) + : m_noremove(false) +{ + // Because we need a specific suffix, can't use mkstemp + // well. There is a race condition between name computation and + // file creation. try to make sure that we at least don't shoot + // our own selves in the foot. maybe we'll use mkstemps one day. + static PTMutexInit mlock; + PTMutexLocker lock(mlock); + +#ifndef _WIN32 + string filename = path_cat(tmplocation(), "rcltmpfXXXXXX"); + char *cp = strdup(filename.c_str()); + if (!cp) { + m_reason = "Out of memory (for file name !)\n"; + return; + } + + // Using mkstemp this way is awful (bot the suffix adding and + // using mkstemp() instead of mktemp just to avoid the warnings) + int fd; + if ((fd = mkstemp(cp)) < 0) { + free(cp); + m_reason = "TempFileInternal: mkstemp failed\n"; + return; + } + close(fd); + unlink(cp); + filename = cp; + free(cp); +#else + string filename = path_wingettempfilename(TEXT("recoll")); +#endif + + m_filename = filename + suffix; + if (close(open(m_filename.c_str(), O_CREAT | O_EXCL, 0600)) != 0) { + m_reason = string("Could not open/create") + m_filename; + m_filename.erase(); + } +} + +TempFileInternal::~TempFileInternal() +{ + if (!m_filename.empty() && !m_noremove) { + unlink(m_filename.c_str()); + } +} + +TempDir::TempDir() +{ + if (!maketmpdir(m_dirname, m_reason)) { + m_dirname.erase(); + return; + } +} + +TempDir::~TempDir() +{ + if (!m_dirname.empty()) { + (void)wipedir(m_dirname, true, true); + m_dirname.erase(); + } +} + +bool TempDir::wipe() +{ + if (m_dirname.empty()) { + m_reason = "TempDir::wipe: no directory !\n"; + return false; + } + if (wipedir(m_dirname, false, true)) { + m_reason = "TempDir::wipe: wipedir failed\n"; + return false; + } + return true; +} + +// Freedesktop standard paths for cache directory (thumbnails are now in there) +static const string& xdgcachedir() +{ + static string xdgcache; + if (xdgcache.empty()) { + const char *cp = getenv("XDG_CACHE_HOME"); + if (cp == 0) { + xdgcache = path_cat(path_home(), ".cache"); + } else { + xdgcache = string(cp); + } + } + return xdgcache; +} +static const string& thumbnailsdir() +{ + static string thumbnailsd; + if (thumbnailsd.empty()) { + thumbnailsd = path_cat(xdgcachedir(), "thumbnails"); + if (access(thumbnailsd.c_str(), 0) != 0) { + thumbnailsd = path_cat(path_home(), ".thumbnails"); + } + } + return thumbnailsd; +} + +// Place for 256x256 files +static const string thmbdirlarge = "large"; +// 128x128 +static const string thmbdirnormal = "normal"; + +static void thumbname(const string& url, string& name) +{ + string digest; + string l_url = url_encode(url); + MD5String(l_url, digest); + MD5HexPrint(digest, name); + name += ".png"; +} + +bool thumbPathForUrl(const string& url, int size, string& path) +{ + string name; + thumbname(url, name); + if (size <= 128) { + path = path_cat(thumbnailsdir(), thmbdirnormal); + path = path_cat(path, name); + if (access(path.c_str(), R_OK) == 0) { + return true; + } + } + path = path_cat(thumbnailsdir(), thmbdirlarge); + path = path_cat(path, name); + if (access(path.c_str(), R_OK) == 0) { + return true; + } + + // File does not exist. Path corresponds to the large version at this point, + // fix it if needed. + if (size <= 128) { + path = path_cat(path_home(), thmbdirnormal); + path = path_cat(path, name); + } + return false; +} + +void rclutil_init_mt() +{ + path_pkgdatadir(); + tmplocation(); + thumbnailsdir(); +} + +#else // TEST_RCLUTIL + +void path_to_thumb(const string& _input) +{ + string input(_input); + // Make absolute path if needed + if (input[0] != '/') { + input = path_absolute(input); + } + + input = string("file://") + path_canon(input); + + string path; + //path = url_encode(input, 7); + thumbPathForUrl(input, 7, path); + cout << path << endl; +} + +const char *thisprog; + +int main(int argc, const char **argv) +{ + thisprog = *argv++; + argc--; + + string s; + vector::const_iterator it; + +#if 0 + if (argc > 1) { + cerr << "Usage: thumbpath " << endl; + exit(1); + } + string input; + if (argc == 1) { + input = *argv++; + if (input.empty()) { + cerr << "Usage: thumbpath " << endl; + exit(1); + } + path_to_thumb(input); + } else { + while (getline(cin, input)) { + path_to_thumb(input); + } + } + exit(0); +#endif +} + +#endif // TEST_RCLUTIL + diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h new file mode 100644 index 00000000..3084b9a3 --- /dev/null +++ b/src/utils/rclutil.h @@ -0,0 +1,112 @@ +/* Copyright (C) 2016 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _RCLUTIL_H_INCLUDED_ +#define _RCLUTIL_H_INCLUDED_ +#include "autoconfig.h" + +// Misc stuff not generic enough to get into smallut or pathut + +#include +#include +#include MEMORY_INCLUDE + + +extern void rclutil_init_mt(); + +/// Sub-directory for default recoll config (e.g: .recoll) +extern std::string path_defaultrecollconfsubdir(); + +/// e.g. /usr/share/recoll. Depends on OS and config +extern const std::string& path_pkgdatadir(); + +/// Transcode to utf-8 if possible or url encoding, for display. +extern bool printableUrl(const std::string& fcharset, + const std::string& in, std::string& out); +/// Same but, in the case of a Windows local path, also turn "c:/" into +/// "/c/" This should be used only for splitting the path in rcldb. +extern std::string url_gpathS(const std::string& url); + +/// Retrieve the temp dir location: $RECOLL_TMPDIR else $TMPDIR else /tmp +extern const std::string& tmplocation(); + +/// Create temporary directory (inside the temp location) +extern bool maketmpdir(std::string& tdir, std::string& reason); + +/// Temporary file class +class TempFileInternal { +public: + TempFileInternal(const std::string& suffix); + ~TempFileInternal(); + const char *filename() { + return m_filename.c_str(); + } + const std::string& getreason() { + return m_reason; + } + void setnoremove(bool onoff) { + m_noremove = onoff; + } + bool ok() { + return !m_filename.empty(); + } +private: + std::string m_filename; + std::string m_reason; + bool m_noremove; +}; + +typedef STD_SHARED_PTR TempFile; + +/// Temporary directory class. Recursively deleted by destructor. +class TempDir { +public: + TempDir(); + ~TempDir(); + const char *dirname() { + return m_dirname.c_str(); + } + const std::string& getreason() { + return m_reason; + } + bool ok() { + return !m_dirname.empty(); + } + /// Recursively delete contents but not self. + bool wipe(); +private: + std::string m_dirname; + std::string m_reason; + TempDir(const TempDir&) {} + TempDir& operator=(const TempDir&) { + return *this; + }; +}; + +// Freedesktop thumbnail standard path routine +// On return, path will have the appropriate value in all cases, +// returns true if the file already exists +extern bool thumbPathForUrl(const std::string& url, int size, + std::string& path); + +// Duplicate map while ensuring no shared string data (to pass +// to other thread): +void map_ss_cp_noshr(const std::map s, + std::map *d); + + +#endif /* _RCLUTIL_H_INCLUDED_ */ diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 8a4bb550..0700ffc7 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2016 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,7 +16,11 @@ */ #ifndef TEST_SMALLUT +#ifdef BUILDING_RECOLL #include "autoconfig.h" +#else +#include "config.h" +#endif #include #include @@ -34,23 +38,10 @@ #include UNORDERED_SET_INCLUDE #include "smallut.h" -#include "utf8iter.h" -#include "hldata.h" -#include "cstr.h" using namespace std; -void map_ss_cp_noshr(const map s, map *d) -{ - for (map::const_iterator it= s.begin(); - it != s.end(); it++) { - d->insert( - pair(string(it->first.begin(), it->first.end()), - string(it->second.begin(), it->second.end()))); - } -} - -int stringicmp(const string & s1, const string& s2) +int stringicmp(const string& s1, const string& s2) { string::const_iterator it1 = s1.begin(); string::const_iterator it2 = s2.begin(); @@ -58,25 +49,27 @@ int stringicmp(const string & s1, const string& s2) char c1, c2; if (size1 < size2) { - while (it1 != s1.end()) { - c1 = ::toupper(*it1); - c2 = ::toupper(*it2); - if (c1 != c2) { - return c1 > c2 ? 1 : -1; - } - ++it1; ++it2; - } - return size1 == size2 ? 0 : -1; + while (it1 != s1.end()) { + c1 = ::toupper(*it1); + c2 = ::toupper(*it2); + if (c1 != c2) { + return c1 > c2 ? 1 : -1; + } + ++it1; + ++it2; + } + return size1 == size2 ? 0 : -1; } else { - while (it2 != s2.end()) { - c1 = ::toupper(*it1); - c2 = ::toupper(*it2); - if (c1 != c2) { - return c1 > c2 ? 1 : -1; - } - ++it1; ++it2; - } - return size1 == size2 ? 0 : 1; + while (it2 != s2.end()) { + c1 = ::toupper(*it1); + c2 = ::toupper(*it2); + if (c1 != c2) { + return c1 > c2 ? 1 : -1; + } + ++it1; + ++it2; + } + return size1 == size2 ? 0 : 1; } } void stringtolower(string& io) @@ -84,8 +77,8 @@ void stringtolower(string& io) string::iterator it = io.begin(); string::iterator ite = io.end(); while (it != ite) { - *it = ::tolower(*it); - it++; + *it = ::tolower(*it); + it++; } } string stringtolower(const string& i) @@ -97,20 +90,21 @@ string stringtolower(const string& i) extern int stringisuffcmp(const string& s1, const string& s2) { string::const_reverse_iterator r1 = s1.rbegin(), re1 = s1.rend(), - r2 = s2.rbegin(), re2 = s2.rend(); + r2 = s2.rbegin(), re2 = s2.rend(); while (r1 != re1 && r2 != re2) { - char c1 = ::toupper(*r1); - char c2 = ::toupper(*r2); - if (c1 != c2) { - return c1 > c2 ? 1 : -1; - } - ++r1; ++r2; + char c1 = ::toupper(*r1); + char c2 = ::toupper(*r2); + if (c1 != c2) { + return c1 > c2 ? 1 : -1; + } + ++r1; + ++r2; } return 0; } // s1 is already lowercase -int stringlowercmp(const string & s1, const string& s2) +int stringlowercmp(const string& s1, const string& s2) { string::const_iterator it1 = s1.begin(); string::const_iterator it2 = s2.begin(); @@ -118,28 +112,30 @@ int stringlowercmp(const string & s1, const string& s2) char c2; if (size1 < size2) { - while (it1 != s1.end()) { - c2 = ::tolower(*it2); - if (*it1 != c2) { - return *it1 > c2 ? 1 : -1; - } - ++it1; ++it2; - } - return size1 == size2 ? 0 : -1; + while (it1 != s1.end()) { + c2 = ::tolower(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; + ++it2; + } + return size1 == size2 ? 0 : -1; } else { - while (it2 != s2.end()) { - c2 = ::tolower(*it2); - if (*it1 != c2) { - return *it1 > c2 ? 1 : -1; - } - ++it1; ++it2; - } - return size1 == size2 ? 0 : 1; + while (it2 != s2.end()) { + c2 = ::tolower(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; + ++it2; + } + return size1 == size2 ? 0 : 1; } } // s1 is already uppercase -int stringuppercmp(const string & s1, const string& s2) +int stringuppercmp(const string& s1, const string& s2) { string::const_iterator it1 = s1.begin(); string::const_iterator it2 = s2.begin(); @@ -147,45 +143,47 @@ int stringuppercmp(const string & s1, const string& s2) char c2; if (size1 < size2) { - while (it1 != s1.end()) { - c2 = ::toupper(*it2); - if (*it1 != c2) { - return *it1 > c2 ? 1 : -1; - } - ++it1; ++it2; - } - return size1 == size2 ? 0 : -1; + while (it1 != s1.end()) { + c2 = ::toupper(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; + ++it2; + } + return size1 == size2 ? 0 : -1; } else { - while (it2 != s2.end()) { - c2 = ::toupper(*it2); - if (*it1 != c2) { - return *it1 > c2 ? 1 : -1; - } - ++it1; ++it2; - } - return size1 == size2 ? 0 : 1; + while (it2 != s2.end()) { + c2 = ::toupper(*it2); + if (*it1 != c2) { + return *it1 > c2 ? 1 : -1; + } + ++it1; + ++it2; + } + return size1 == size2 ? 0 : 1; } } // Compare charset names, removing the more common spelling variations -bool samecharset(const string &cs1, const string &cs2) +bool samecharset(const string& cs1, const string& cs2) { string mcs1, mcs2; // Remove all - and _, turn to lowecase - for (unsigned int i = 0; i < cs1.length();i++) { - if (cs1[i] != '_' && cs1[i] != '-') { - mcs1 += ::tolower(cs1[i]); - } + for (unsigned int i = 0; i < cs1.length(); i++) { + if (cs1[i] != '_' && cs1[i] != '-') { + mcs1 += ::tolower(cs1[i]); + } } - for (unsigned int i = 0; i < cs2.length();i++) { - if (cs2[i] != '_' && cs2[i] != '-') { - mcs2 += ::tolower(cs2[i]); - } + for (unsigned int i = 0; i < cs2.length(); i++) { + if (cs2[i] != '_' && cs2[i] != '-') { + mcs2 += ::tolower(cs2[i]); + } } return mcs1 == mcs2; } -template bool stringToStrings(const string &s, T &tokens, +template bool stringToStrings(const string& s, T& tokens, const string& addseps) { string current; @@ -193,350 +191,351 @@ template bool stringToStrings(const string &s, T &tokens, enum states {SPACE, TOKEN, INQUOTE, ESCAPE}; states state = SPACE; for (unsigned int i = 0; i < s.length(); i++) { - switch (s[i]) { - case '"': - switch(state) { - case SPACE: - state=INQUOTE; continue; - case TOKEN: - current += '"'; - continue; - case INQUOTE: + switch (s[i]) { + case '"': + switch (state) { + case SPACE: + state = INQUOTE; + continue; + case TOKEN: + current += '"'; + continue; + case INQUOTE: tokens.insert(tokens.end(), current); - current.clear(); - state = SPACE; - continue; + current.clear(); + state = SPACE; + continue; case ESCAPE: - current += '"'; - state = INQUOTE; + current += '"'; + state = INQUOTE; continue; - } - break; - case '\\': - switch(state) { - case SPACE: - case TOKEN: + } + break; + case '\\': + switch (state) { + case SPACE: + case TOKEN: current += '\\'; - state=TOKEN; + state = TOKEN; continue; - case INQUOTE: + case INQUOTE: state = ESCAPE; continue; case ESCAPE: current += '\\'; state = INQUOTE; continue; - } - break; + } + break; - case ' ': - case '\t': - case '\n': - case '\r': - switch(state) { - case SPACE: + case ' ': + case '\t': + case '\n': + case '\r': + switch (state) { + case SPACE: continue; - case TOKEN: - tokens.insert(tokens.end(), current); - current.clear(); - state = SPACE; - continue; - case INQUOTE: + case TOKEN: + tokens.insert(tokens.end(), current); + current.clear(); + state = SPACE; + continue; + case INQUOTE: case ESCAPE: current += s[i]; continue; - } - break; + } + break; default: if (!addseps.empty() && addseps.find(s[i]) != string::npos) { - switch(state) { + switch (state) { case ESCAPE: state = INQUOTE; break; - case INQUOTE: + case INQUOTE: break; - case SPACE: + case SPACE: tokens.insert(tokens.end(), string(1, s[i])); continue; - case TOKEN: + case TOKEN: tokens.insert(tokens.end(), current); current.erase(); tokens.insert(tokens.end(), string(1, s[i])); state = SPACE; continue; } - } else switch(state) { + } else switch (state) { case ESCAPE: state = INQUOTE; break; - case SPACE: + case SPACE: state = TOKEN; break; - case TOKEN: - case INQUOTE: + case TOKEN: + case INQUOTE: break; } - current += s[i]; - } + current += s[i]; + } } - switch(state) { - case SPACE: - break; - case TOKEN: - tokens.insert(tokens.end(), current); - break; - case INQUOTE: + switch (state) { + case SPACE: + break; + case TOKEN: + tokens.insert(tokens.end(), current); + break; + case INQUOTE: case ESCAPE: - return false; + return false; } return true; } -template bool stringToStrings >(const string &, - list &, const string&); -template bool stringToStrings >(const string &, - vector &,const string&); -template bool stringToStrings >(const string &, - set &, const string&); +template bool stringToStrings >(const string&, + list&, const string&); +template bool stringToStrings >(const string&, + vector&, const string&); +template bool stringToStrings >(const string&, + set&, const string&); template bool stringToStrings > -(const string &, STD_UNORDERED_SET &, const string&); +(const string&, STD_UNORDERED_SET&, const string&); -template void stringsToString(const T &tokens, string &s) +template void stringsToString(const T& tokens, string& s) { for (typename T::const_iterator it = tokens.begin(); - it != tokens.end(); it++) { - bool hasblanks = false; - if (it->find_first_of(" \t\n") != string::npos) - hasblanks = true; - if (it != tokens.begin()) - s.append(1, ' '); - if (hasblanks) - s.append(1, '"'); - for (unsigned int i = 0; i < it->length(); i++) { - char car = it->at(i); - if (car == '"') { - s.append(1, '\\'); - s.append(1, car); - } else { - s.append(1, car); - } - } - if (hasblanks) - s.append(1, '"'); + it != tokens.end(); it++) { + bool hasblanks = false; + if (it->find_first_of(" \t\n") != string::npos) { + hasblanks = true; + } + if (it != tokens.begin()) { + s.append(1, ' '); + } + if (hasblanks) { + s.append(1, '"'); + } + for (unsigned int i = 0; i < it->length(); i++) { + char car = it->at(i); + if (car == '"') { + s.append(1, '\\'); + s.append(1, car); + } else { + s.append(1, car); + } + } + if (hasblanks) { + s.append(1, '"'); + } } } -template void stringsToString >(const list &, string &); -template void stringsToString >(const vector &,string &); -template void stringsToString >(const set &, string &); -template string stringsToString(const T &tokens) +template void stringsToString >(const list&, string&); +template void stringsToString >(const vector&, string&); +template void stringsToString >(const set&, string&); +template string stringsToString(const T& tokens) { string out; stringsToString(tokens, out); return out; } -template string stringsToString >(const list &); -template string stringsToString >(const vector &); -template string stringsToString >(const set &); +template string stringsToString >(const list&); +template string stringsToString >(const vector&); +template string stringsToString >(const set&); -template void stringsToCSV(const T &tokens, string &s, - char sep) +template void stringsToCSV(const T& tokens, string& s, + char sep) { s.erase(); for (typename T::const_iterator it = tokens.begin(); - it != tokens.end(); it++) { - bool needquotes = false; - if (it->empty() || - it->find_first_of(string(1, sep) + "\"\n") != string::npos) - needquotes = true; - if (it != tokens.begin()) - s.append(1, sep); - if (needquotes) - s.append(1, '"'); - for (unsigned int i = 0; i < it->length(); i++) { - char car = it->at(i); - if (car == '"') { - s.append(2, '"'); - } else { - s.append(1, car); - } - } - if (needquotes) - s.append(1, '"'); + it != tokens.end(); it++) { + bool needquotes = false; + if (it->empty() || + it->find_first_of(string(1, sep) + "\"\n") != string::npos) { + needquotes = true; + } + if (it != tokens.begin()) { + s.append(1, sep); + } + if (needquotes) { + s.append(1, '"'); + } + for (unsigned int i = 0; i < it->length(); i++) { + char car = it->at(i); + if (car == '"') { + s.append(2, '"'); + } else { + s.append(1, car); + } + } + if (needquotes) { + s.append(1, '"'); + } } } -template void stringsToCSV >(const list &, string &, char); -template void stringsToCSV >(const vector &,string &, - char); +template void stringsToCSV >(const list&, string&, char); +template void stringsToCSV >(const vector&, string&, + char); void stringToTokens(const string& str, vector& tokens, - const string& delims, bool skipinit) + const string& delims, bool skipinit) { string::size_type startPos = 0, pos; // Skip initial delims, return empty if this eats all. - if (skipinit && - (startPos = str.find_first_not_of(delims, 0)) == string::npos) { - return; + if (skipinit && + (startPos = str.find_first_not_of(delims, 0)) == string::npos) { + return; } - while (startPos < str.size()) { + while (startPos < str.size()) { // Find next delimiter or end of string (end of token) pos = str.find_first_of(delims, startPos); // Add token to the vector and adjust start - if (pos == string::npos) { - tokens.push_back(str.substr(startPos)); - break; - } else if (pos == startPos) { - // Dont' push empty tokens after first - if (tokens.empty()) - tokens.push_back(string()); - startPos = ++pos; - } else { - tokens.push_back(str.substr(startPos, pos - startPos)); - startPos = ++pos; - } + if (pos == string::npos) { + tokens.push_back(str.substr(startPos)); + break; + } else if (pos == startPos) { + // Dont' push empty tokens after first + if (tokens.empty()) { + tokens.push_back(string()); + } + startPos = ++pos; + } else { + tokens.push_back(str.substr(startPos, pos - startPos)); + startPos = ++pos; + } } } -bool stringToBool(const string &s) +bool stringToBool(const string& s) { - if (s.empty()) - return false; - if (isdigit(s[0])) { - int val = atoi(s.c_str()); - return val ? true : false; + if (s.empty()) { + return false; + } + if (isdigit(s[0])) { + int val = atoi(s.c_str()); + return val ? true : false; + } + if (s.find_first_of("yYtT") == 0) { + return true; } - if (s.find_first_of("yYtT") == 0) - return true; return false; } -void trimstring(string &s, const char *ws) +void trimstring(string& s, const char *ws) { string::size_type pos = s.find_first_not_of(ws); if (pos == string::npos) { - s.clear(); - return; + s.clear(); + return; } s.replace(0, pos, string()); pos = s.find_last_not_of(ws); - if (pos != string::npos && pos != s.length()-1) - s.replace(pos+1, string::npos, string()); + if (pos != string::npos && pos != s.length() - 1) { + s.replace(pos + 1, string::npos, string()); + } } // Remove some chars and replace them with spaces -string neutchars(const string &str, const string &chars) +string neutchars(const string& str, const string& chars) { string out; neutchars(str, out, chars); return out; } -void neutchars(const string &str, string &out, const string& chars) +void neutchars(const string& str, string& out, const string& chars) { string::size_type startPos, pos; - for (pos = 0;;) { + for (pos = 0;;) { // Skip initial chars, break if this eats all. - if ((startPos = str.find_first_not_of(chars, pos)) == string::npos) - break; + if ((startPos = str.find_first_not_of(chars, pos)) == string::npos) { + break; + } // Find next delimiter or end of string (end of token) pos = str.find_first_of(chars, startPos); // Add token to the output. Note: token cant be empty here - if (pos == string::npos) { - out += str.substr(startPos); - } else { - out += str.substr(startPos, pos - startPos) + " "; - } + if (pos == string::npos) { + out += str.substr(startPos); + } else { + out += str.substr(startPos, pos - startPos) + " "; + } } } /* Truncate a string to a given maxlength, avoiding cutting off midword * if reasonably possible. Note: we could also use textsplit, stopping when - * we have enough, this would be cleanly utf8-aware but would remove + * we have enough, this would be cleanly utf8-aware but would remove * punctuation */ static const string cstr_SEPAR = " \t\n\r-:.;,/[]{}"; -string truncate_to_word(const string &input, string::size_type maxlen) +string truncate_to_word(const string& input, string::size_type maxlen) { string output; if (input.length() <= maxlen) { - output = input; + output = input; } else { - output = input.substr(0, maxlen); - string::size_type space = output.find_last_of(cstr_SEPAR); - // Original version only truncated at space if space was found after - // maxlen/2. But we HAVE to truncate at space, else we'd need to do - // utf8 stuff to avoid truncating at multibyte char. In any case, - // not finding space means that the text probably has no value. - // Except probably for Asian languages, so we may want to fix this - // one day - if (space == string::npos) { - output.erase(); - } else { - output.erase(space); - } + output = input.substr(0, maxlen); + string::size_type space = output.find_last_of(cstr_SEPAR); + // Original version only truncated at space if space was found after + // maxlen/2. But we HAVE to truncate at space, else we'd need to do + // utf8 stuff to avoid truncating at multibyte char. In any case, + // not finding space means that the text probably has no value. + // Except probably for Asian languages, so we may want to fix this + // one day + if (space == string::npos) { + output.erase(); + } else { + output.erase(space); + } } return output; } -void utf8truncate(string &s, int maxlen) -{ - if (s.size() <= string::size_type(maxlen)) - return; - Utf8Iter iter(s); - string::size_type pos = 0; - while (iter++ != string::npos) - if (iter.getBpos() < string::size_type(maxlen)) - pos = iter.getBpos(); - - s.erase(pos); -} - // Escape things that would look like markup -string escapeHtml(const string &in) +string escapeHtml(const string& in) { string out; for (string::size_type pos = 0; pos < in.length(); pos++) { - switch(in.at(pos)) { - case '<': - out += "<"; - break; - case '&': - out += "&"; - break; - default: - out += in.at(pos); - } + switch (in.at(pos)) { + case '<': + out += "<"; + break; + case '&': + out += "&"; + break; + default: + out += in.at(pos); + } } return out; } -string escapeShell(const string &in) +string escapeShell(const string& in) { string out; out += "\""; for (string::size_type pos = 0; pos < in.length(); pos++) { - switch(in.at(pos)) { - case '$': - out += "\\$"; - break; - case '`': - out += "\\`"; - break; - case '"': - out += "\\\""; - break; - case '\n': - out += "\\\n"; - break; - case '\\': - out += "\\\\"; - break; - default: - out += in.at(pos); - } + switch (in.at(pos)) { + case '$': + out += "\\$"; + break; + case '`': + out += "\\`"; + break; + case '"': + out += "\\\""; + break; + case '\n': + out += "\\\n"; + break; + case '\\': + out += "\\\\"; + break; + default: + out += in.at(pos); + } } out += "\""; return out; @@ -547,26 +546,26 @@ string escapeShell(const string &in) bool pcSubst(const string& in, string& out, const map& subs) { string::const_iterator it; - for (it = in.begin(); it != in.end();it++) { - if (*it == '%') { - if (++it == in.end()) { - out += '%'; - break; - } - if (*it == '%') { - out += '%'; - continue; - } - map::const_iterator tr; - if ((tr = subs.find(*it)) != subs.end()) { - out += tr->second; - } else { - // We used to do "out += *it;" here but this does not make + for (it = in.begin(); it != in.end(); it++) { + if (*it == '%') { + if (++it == in.end()) { + out += '%'; + break; + } + if (*it == '%') { + out += '%'; + continue; + } + map::const_iterator tr; + if ((tr = subs.find(*it)) != subs.end()) { + out += tr->second; + } else { + // We used to do "out += *it;" here but this does not make // sense - } - } else { - out += *it; - } + } + } else { + out += *it; + } } return true; } @@ -576,15 +575,15 @@ bool pcSubst(const string& in, string& out, const map& subs) out.erase(); string::size_type i; for (i = 0; i < in.size(); i++) { - if (in[i] == '%') { - if (++i == in.size()) { - out += '%'; - break; - } - if (in[i] == '%') { - out += '%'; - continue; - } + if (in[i] == '%') { + if (++i == in.size()) { + out += '%'; + break; + } + if (in[i] == '%') { + out += '%'; + continue; + } string key = ""; if (in[i] == '(') { if (++i == in.size()) { @@ -594,25 +593,25 @@ bool pcSubst(const string& in, string& out, const map& subs) string::size_type j = in.find_first_of(")", i); if (j == string::npos) { // ??concatenate remaining part and stop - out += in.substr(i-2); + out += in.substr(i - 2); break; } - key = in.substr(i, j-i); + key = in.substr(i, j - i); i = j; } else { key = in[i]; } - map::const_iterator tr; - if ((tr = subs.find(key)) != subs.end()) { - out += tr->second; - } else { + map::const_iterator tr; + if ((tr = subs.find(key)) != subs.end()) { + out += tr->second; + } else { // Substitute to nothing, that's the reasonable thing to do // instead of keeping the %(key) // out += key.size()==1? key : string("(") + key + string(")"); - } - } else { - out += in[i]; - } + } + } else { + out += in[i]; + } } return true; } @@ -622,14 +621,15 @@ inline static int ulltorbuf(unsigned long long val, char *rbuf) for (idx = 0; val; idx++) { rbuf[idx] = '0' + val % 10; val /= 10; - } while (val); + } + while (val); rbuf[idx] = 0; return idx; } inline static void ullcopyreverse(const char *rbuf, string& buf, int idx) { - buf.reserve(idx+1); + buf.reserve(idx + 1); for (int i = idx - 1; i >= 0; i--) { buf.push_back(rbuf[i]); } @@ -659,14 +659,16 @@ void lltodecstr(long long val, string& buf) } bool neg = val < 0; - if (neg) + if (neg) { val = -val; + } char rbuf[30]; int idx = ulltorbuf(val, rbuf); - if (neg) + if (neg) { rbuf[idx++] = '-'; + } rbuf[idx] = 0; ullcopyreverse(rbuf, buf, idx); @@ -691,93 +693,96 @@ string ulltodecstr(unsigned long long val) string displayableBytes(off_t size) { const char *unit; - + double roundable = 0; if (size < 1000) { - unit = " B "; - roundable = double(size); + unit = " B "; + roundable = double(size); } else if (size < 1E6) { - unit = " KB "; - roundable = double(size) / 1E3; + unit = " KB "; + roundable = double(size) / 1E3; } else if (size < 1E9) { - unit = " MB "; - roundable = double(size) / 1E6; + unit = " MB "; + roundable = double(size) / 1E6; } else { - unit = " GB "; - roundable = double(size) / 1E9; + unit = " GB "; + roundable = double(size) / 1E9; } size = off_t(round(roundable)); return lltodecstr(size).append(unit); } -string breakIntoLines(const string& in, unsigned int ll, - unsigned int maxlines) +string breakIntoLines(const string& in, unsigned int ll, + unsigned int maxlines) { string query = in; string oq; unsigned int nlines = 0; while (query.length() > 0) { - string ss = query.substr(0, ll); - if (ss.length() == ll) { - string::size_type pos = ss.find_last_of(" "); - if (pos == string::npos) { - pos = query.find_first_of(" "); - if (pos != string::npos) - ss = query.substr(0, pos+1); - else - ss = query; - } else { - ss = ss.substr(0, pos+1); - } - } - // This cant happen, but anyway. Be very sure to avoid an infinite loop - if (ss.length() == 0) { - oq = query; - break; - } - oq += ss + "\n"; - if (nlines++ >= maxlines) { - oq += " ... \n"; - break; - } - query= query.substr(ss.length()); + string ss = query.substr(0, ll); + if (ss.length() == ll) { + string::size_type pos = ss.find_last_of(" "); + if (pos == string::npos) { + pos = query.find_first_of(" "); + if (pos != string::npos) { + ss = query.substr(0, pos + 1); + } else { + ss = query; + } + } else { + ss = ss.substr(0, pos + 1); + } + } + // This cant happen, but anyway. Be very sure to avoid an infinite loop + if (ss.length() == 0) { + oq = query; + break; + } + oq += ss + "\n"; + if (nlines++ >= maxlines) { + oq += " ... \n"; + break; + } + query = query.substr(ss.length()); } return oq; } // Date is Y[-M[-D]] -static bool parsedate(vector::const_iterator& it, - vector::const_iterator end, DateInterval *dip) +static bool parsedate(vector::const_iterator& it, + vector::const_iterator end, DateInterval *dip) { dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0; - if (it->length() > 4 || !it->length() || - it->find_first_not_of("0123456789") != string::npos) { + if (it->length() > 4 || !it->length() || + it->find_first_not_of("0123456789") != string::npos) { return false; } if (it == end || sscanf(it++->c_str(), "%d", &dip->y1) != 1) { return false; } - if (it == end || *it == "/") + if (it == end || *it == "/") { return true; + } if (*it++ != "-") { return false; } - if (it->length() > 2 || !it->length() || - it->find_first_not_of("0123456789") != string::npos) { + if (it->length() > 2 || !it->length() || + it->find_first_not_of("0123456789") != string::npos) { return false; } if (it == end || sscanf(it++->c_str(), "%d", &dip->m1) != 1) { return false; } - if (it == end || *it == "/") + if (it == end || *it == "/") { return true; + } if (*it++ != "-") { return false; } - if (it->length() > 2 || !it->length() || - it->find_first_not_of("0123456789") != string::npos) { + if (it->length() > 2 || !it->length() || + it->find_first_not_of("0123456789") != string::npos) { return false; } if (it == end || sscanf(it++->c_str(), "%d", &dip->d1) != 1) { @@ -790,7 +795,7 @@ static bool parsedate(vector::const_iterator& it, // Called with the 'P' already processed. Period ends at end of string // or at '/'. We dont' do a lot effort at validation and will happily // accept 10Y1Y4Y (the last wins) -static bool parseperiod(vector::const_iterator& it, +static bool parseperiod(vector::const_iterator& it, vector::const_iterator end, DateInterval *dip) { dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0; @@ -802,17 +807,29 @@ static bool parseperiod(vector::const_iterator& it, if (sscanf(it++->c_str(), "%d", &value) != 1) { return false; } - if (it == end || it->empty()) + if (it == end || it->empty()) { return false; + } switch (it->at(0)) { - case 'Y': case 'y': dip->y1 = value;break; - case 'M': case 'm': dip->m1 = value;break; - case 'D': case 'd': dip->d1 = value;break; - default: return false; + case 'Y': + case 'y': + dip->y1 = value; + break; + case 'M': + case 'm': + dip->m1 = value; + break; + case 'D': + case 'd': + dip->d1 = value; + break; + default: + return false; } it++; - if (it == end) + if (it == end) { return true; + } if (*it == "/") { return true; } @@ -823,10 +840,11 @@ static bool parseperiod(vector::const_iterator& it, #ifdef _WIN32 int setenv(const char *name, const char *value, int overwrite) { - if(!overwrite) { + if (!overwrite) { const char *cp = getenv(name); - if (cp) + if (cp) { return -1; + } } return _putenv_s(name, value); } @@ -845,10 +863,11 @@ time_t portable_timegm(struct tm *tm) setenv("TZ", "", 1); tzset(); ret = mktime(tm); - if (tz) + if (tz) { setenv("TZ", tz, 1); - else + } else { unsetenv("TZ"); + } tzset(); return ret; } @@ -857,12 +876,12 @@ time_t portable_timegm(struct tm *tm) static void cerrdip(const string& s, DateInterval *dip) { cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/" - << dip->y2 << "-" << dip->m2 << "-" << dip->d2 + << dip->y2 << "-" << dip->m2 << "-" << dip->d2 << endl; } #endif -// Compute date + period. Won't work out of the unix era. +// Compute date + period. Won't work out of the unix era. // or pre-1970 dates. Just convert everything to unixtime and // seconds (with average durations for months/years), add and convert // back @@ -873,7 +892,7 @@ static bool addperiod(DateInterval *dp, DateInterval *pp) // timegm sort it out memset(&tm, 0, sizeof(tm)); tm.tm_year = dp->y1 - 1900 + pp->y1; - tm.tm_mon = dp->m1 + pp->m1 -1; + tm.tm_mon = dp->m1 + pp->m1 - 1; tm.tm_mday = dp->d1 + pp->d1; time_t tres = mktime(&tm); localtime_r(&tres, &tm); @@ -886,10 +905,19 @@ static bool addperiod(DateInterval *dp, DateInterval *pp) int monthdays(int mon, int year) { switch (mon) { - // We are returning a few two many 29 days februaries, no problem - case 2: return (year % 4) == 0 ? 29 : 28; - case 1:case 3:case 5:case 7: case 8:case 10:case 12: return 31; - default: return 30; + // We are returning a few too many 29 days februaries, no problem + case 2: + return (year % 4) == 0 ? 29 : 28; + case 1: + case 3: + case 5: + case 7: + case 8: + case 10: + case 12: + return 31; + default: + return 30; } } bool parsedateinterval(const string& s, DateInterval *dip) @@ -898,14 +926,15 @@ bool parsedateinterval(const string& s, DateInterval *dip) dip->y1 = dip->m1 = dip->d1 = dip->y2 = dip->m2 = dip->d2 = 0; DateInterval p1, p2, d1, d2; p1 = p2 = d1 = d2 = *dip; - bool hasp1 = false, hasp2 = false, hasd1 = false, hasd2 = false, - hasslash = false; + bool hasp1 = false, hasp2 = false, hasd1 = false, hasd2 = false, + hasslash = false; if (!stringToStrings(s, vs, "PYMDpymd-/")) { return false; } - if (vs.empty()) + if (vs.empty()) { return false; + } vector::const_iterator it = vs.begin(); if (*it == "P" || *it == "p") { @@ -943,7 +972,7 @@ secondelt: if (!parseperiod(it, vs.end(), &p2)) { return false; } - hasp2 = true; + hasp2 = true; } else { if (!parsedate(it, vs.end(), &d2)) { return false; @@ -983,7 +1012,7 @@ secondelt: // If there is no explicit period, an incomplete date indicates a // period of the size of the uncompleted elements. Ex: 1999 // actually means 1999/P12M - // + // // If there is a period, the incomplete date should be extended // to the beginning or end of the unspecified portion. Ex: 1999/ // means 1999-01-01/ and /1999 means /1999-12-31 @@ -1042,10 +1071,12 @@ secondelt: void catstrerror(string *reason, const char *what, int _errno) { - if (!reason) - return; - if (what) - reason->append(what); + if (!reason) { + return; + } + if (what) { + reason->append(what); + } reason->append(": errno: "); @@ -1059,16 +1090,16 @@ void catstrerror(string *reason, const char *what, int _errno) // Note: sun strerror is noted mt-safe ?? reason->append(strerror(_errno)); #else -#define ERRBUFSZ 200 +#define ERRBUFSZ 200 char errbuf[ERRBUFSZ]; - // There are 2 versions of strerror_r. + // There are 2 versions of strerror_r. // - The GNU one returns a pointer to the message (maybe // static storage or supplied buffer). // - The POSIX one always stores in supplied buffer and // returns 0 on success. As the possibility of error and // error code are not specified, we're basically doomed // cause we can't use a test on the 0 value to know if we - // were returned a pointer... + // were returned a pointer... // Also couldn't find an easy way to disable the gnu version without // changing the cxxflags globally, so forget it. Recent gnu lib versions // normally default to the posix version. @@ -1080,59 +1111,6 @@ void catstrerror(string *reason, const char *what, int _errno) #endif } -void HighlightData::toString(std::string& out) -{ - out.append("\nUser terms (orthograph): "); - for (std::set::const_iterator it = uterms.begin(); - it != uterms.end(); it++) { - out.append(" [").append(*it).append("]"); - } - out.append("\nUser terms to Query terms:"); - for (map::const_iterator it = terms.begin(); - it != terms.end(); it++) { - out.append("[").append(it->first).append("]->["); - out.append(it->second).append("] "); - } - out.append("\nGroups: "); - char cbuf[200]; - sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d", - int(groups.size()), int(grpsugidx.size()), int(ugroups.size())); - out.append(cbuf); - - size_t ugidx = (size_t)-1; - for (unsigned int i = 0; i < groups.size(); i++) { - if (ugidx != grpsugidx[i]) { - ugidx = grpsugidx[i]; - out.append("\n("); - for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) { - out.append("[").append(ugroups[ugidx][j]).append("] "); - } - out.append(") ->"); - } - out.append(" {"); - for (unsigned int j = 0; j < groups[i].size(); j++) { - out.append("[").append(groups[i][j]).append("]"); - } - sprintf(cbuf, "%d", slacks[i]); - out.append("}").append(cbuf); - } - out.append("\n"); -} - -void HighlightData::append(const HighlightData& hl) -{ - uterms.insert(hl.uterms.begin(), hl.uterms.end()); - terms.insert(hl.terms.begin(), hl.terms.end()); - size_t ugsz0 = ugroups.size(); - ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end()); - - groups.insert(groups.end(), hl.groups.begin(), hl.groups.end()); - slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end()); - for (std::vector::const_iterator it = hl.grpsugidx.begin(); - it != hl.grpsugidx.end(); it++) { - grpsugidx.push_back(*it + ugsz0); - } -} static const char *vlang_to_code[] = { "be", "cp1251", @@ -1159,21 +1137,24 @@ static const char *vlang_to_code[] = { "uk", "koi8-u", }; +static const string cstr_cp1252("CP1252"); + string langtocode(const string& lang) { static STD_UNORDERED_MAP lang_to_code; if (lang_to_code.empty()) { - for (unsigned int i = 0; - i < sizeof(vlang_to_code) / sizeof(char *); i += 2) { - lang_to_code[vlang_to_code[i]] = vlang_to_code[i+1]; - } + for (unsigned int i = 0; + i < sizeof(vlang_to_code) / sizeof(char *); i += 2) { + lang_to_code[vlang_to_code[i]] = vlang_to_code[i + 1]; + } } - STD_UNORDERED_MAP::const_iterator it = - lang_to_code.find(lang); + STD_UNORDERED_MAP::const_iterator it = + lang_to_code.find(lang); // Use cp1252 by default... - if (it == lang_to_code.end()) - return cstr_cp1252; + if (it == lang_to_code.end()) { + return cstr_cp1252; + } return it->second; } @@ -1182,16 +1163,19 @@ string localelang() { const char *lang = getenv("LANG"); - if (lang == 0 || *lang == 0 || !strcmp(lang, "C") || !strcmp(lang, "POSIX")) - return "en"; + if (lang == 0 || *lang == 0 || !strcmp(lang, "C") || + !strcmp(lang, "POSIX")) { + return "en"; + } string locale(lang); string::size_type under = locale.find_first_of("_"); - if (under == string::npos) - return locale; + if (under == string::npos) { + return locale; + } return locale.substr(0, under); } -// Initialization for static stuff to be called from main thread before going +// Initialization for static stuff to be called from main thread before going // multiple void smallut_init_mt() { @@ -1242,7 +1226,7 @@ int nsuffpairs = sizeof(suffpairs) / sizeof(struct spair); // Periods test strings const char* periods[] = { "2001", // Year 2001 - "2001/", // 2001 or later + "2001/", // 2001 or later "2001/P3Y", // 2001 -> 2004 or 2005, ambiguous "2001-01-01/P3Y", // 01-2001 -> 01 2004 "2001-03-03/2001-05-01", // Explicit one @@ -1256,47 +1240,53 @@ const char *thisprog; static void cerrdip(const string& s, DateInterval *dip) { cerr << s << dip->y1 << "-" << dip->m1 << "-" << dip->d1 << "/" - << dip->y2 << "-" << dip->m2 << "-" << dip->d2 + << dip->y2 << "-" << dip->m2 << "-" << dip->d2 << endl; } int main(int argc, char **argv) { - thisprog = *argv++;argc--; + thisprog = *argv++; + argc--; #if 1 - if (argc <=0 ) { + if (argc <= 0) { cerr << "Usage: smallut " << endl; exit(1); } - string s = *argv++;argc--; + string s = *argv++; + argc--; vector vs; stringToTokens(s, vs, "/"); - for (vector::const_iterator it = vs.begin(); it != vs.end(); it++) + for (vector::const_iterator it = vs.begin(); it != vs.end(); it++) { cerr << "[" << *it << "] "; + } cerr << endl; exit(0); #elif 0 - if (argc <=0 ) { + if (argc <= 0) { cerr << "Usage: smallut " << endl; exit(1); } - string s = *argv++;argc--; + string s = *argv++; + argc--; vector vs; if (!stringToStrings(s, vs, ":-()")) { cerr << "Bad entry" << endl; exit(1); } - for (vector::const_iterator it = vs.begin(); it != vs.end(); it++) + for (vector::const_iterator it = vs.begin(); it != vs.end(); it++) { cerr << "[" << *it << "] "; + } cerr << endl; exit(0); #elif 0 - if (argc <=0 ) { + if (argc <= 0) { cerr << "Usage: smallut " << endl; exit(1); } - string s = *argv++;argc--; + string s = *argv++; + argc--; DateInterval di; if (!parsedateinterval(s, &di)) { cerr << "Parse failed" << endl; @@ -1316,34 +1306,34 @@ int main(int argc, char **argv) exit(0); #elif 0 for (int i = 0; i < npairs; i++) { - { - int c = stringicmp(pairs[i].s1, pairs[i].s2); - printf("'%s' %s '%s' ", pairs[i].s1, - c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2); - } - { - int cl = stringlowercmp(pairs[i].s1, pairs[i].s2); - printf("L '%s' %s '%s' ", pairs[i].s1, - cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2); - } - { - int cu = stringuppercmp(pairs[i].s1, pairs[i].s2); - printf("U '%s' %s '%s' ", pairs[i].s1, - cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2); - } - printf("\n"); + { + int c = stringicmp(pairs[i].s1, pairs[i].s2); + printf("'%s' %s '%s' ", pairs[i].s1, + c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2); + } + { + int cl = stringlowercmp(pairs[i].s1, pairs[i].s2); + printf("L '%s' %s '%s' ", pairs[i].s1, + cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2); + } + { + int cu = stringuppercmp(pairs[i].s1, pairs[i].s2); + printf("U '%s' %s '%s' ", pairs[i].s1, + cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2); + } + printf("\n"); } #elif 0 for (int i = 0; i < nsuffpairs; i++) { - int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2); - printf("[%s] %s [%s] \n", suffpairs[i].s1, - c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2); + int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2); + printf("[%s] %s [%s] \n", suffpairs[i].s1, + c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2); } #elif 0 std::string testit("\303\251l\303\251gant"); for (int sz = 10; sz >= 0; sz--) { - utf8truncate(testit, sz); - cout << testit << endl; + utf8truncate(testit, sz); + cout << testit << endl; } #elif 0 std::string testit("ligne\ndeuxieme ligne\r3eme ligne\r\n"); @@ -1385,30 +1375,30 @@ int main(int argc, char **argv) string sshort("ABC"); string slong("ABCD"); string sshortsmaller("ABB"); - - vector > cmps; - cmps.push_back(pair(sshort,sshort)); - cmps.push_back(pair(sshort,slong)); - cmps.push_back(pair(slong,sshort)); - cmps.push_back(pair(sshortsmaller,sshort)); - cmps.push_back(pair(sshort, sshortsmaller)); - for (vector >::const_iterator it = cmps.begin(); - it != cmps.end(); it++) { - cout << it->first << " " << it->second << " " << - stringicmp(it->first, it->second) << endl; + vector > cmps; + cmps.push_back(pair(sshort, sshort)); + cmps.push_back(pair(sshort, slong)); + cmps.push_back(pair(slong, sshort)); + cmps.push_back(pair(sshortsmaller, sshort)); + cmps.push_back(pair(sshort, sshortsmaller)); + + for (vector >::const_iterator it = cmps.begin(); + it != cmps.end(); it++) { + cout << it->first << " " << it->second << " " << + stringicmp(it->first, it->second) << endl; } cout << endl; - for (vector >::const_iterator it = cmps.begin(); - it != cmps.end(); it++) { - cout << it->first << " " << it->second << " " << - stringlowercmp(stringtolower(it->first), it->second) << endl; + for (vector >::const_iterator it = cmps.begin(); + it != cmps.end(); it++) { + cout << it->first << " " << it->second << " " << + stringlowercmp(stringtolower(it->first), it->second) << endl; } cout << endl; - for (vector >::const_iterator it = cmps.begin(); - it != cmps.end(); it++) { - cout << it->first << " " << it->second << " " << - stringuppercmp(it->first, it->second) << endl; + for (vector >::const_iterator it = cmps.begin(); + it != cmps.end(); it++) { + cout << it->first << " " << it->second << " " << + stringuppercmp(it->first, it->second) << endl; } #endif diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 9b2a4b04..976d26ca 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2004 J.F.Dockes +/* Copyright (C) 2004-2016 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -17,226 +17,20 @@ #ifndef _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_ -#include +#include #include #include #include #include -using std::string; -using std::vector; -using std::map; -using std::set; +// Miscellaneous mostly string-oriented small utilities +// Note that none of the following code knows about utf-8. -// Note these are all ascii routines -extern int stringicmp(const string& s1, const string& s2); -// For find_if etc. -struct StringIcmpPred { - StringIcmpPred(const string& s1) - : m_s1(s1) - {} - bool operator()(const string& s2) { - return stringicmp(m_s1, s2) == 0; - } - const string& m_s1; -}; - -extern int stringlowercmp(const string& alreadylower, const string& s2); -extern int stringuppercmp(const string& alreadyupper, const string& s2); - -extern void stringtolower(string& io); -extern string stringtolower(const string& io); - -// Is one string the end part of the other ? -extern int stringisuffcmp(const string& s1, const string& s2); - -// Divine language from locale -extern std::string localelang(); -// Divine 8bit charset from language -extern std::string langtocode(const string& lang); - -// Compare charset names, removing the more common spelling variations -extern bool samecharset(const string &cs1, const string &cs2); - -// Parse date interval specifier into pair of y,m,d dates. The format -// for the time interval is based on a subset of iso 8601 with -// the addition of open intervals, and removal of all time indications. -// 'P' is the Period indicator, it's followed by a length in -// years/months/days (or any subset thereof) -// Dates: YYYY-MM-DD YYYY-MM YYYY -// Periods: P[nY][nM][nD] where n is an integer value. -// At least one of YMD must be specified -// The separator for the interval is /. Interval examples -// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc. -// This returns a pair of y,m,d dates. -struct DateInterval { - int y1;int m1;int d1; int y2;int m2;int d2; -}; -extern bool parsedateinterval(const string&s, DateInterval *di); -extern int monthdays(int mon, int year); - -/** - * Parse input string into list of strings. - * - * Token delimiter is " \t\n" except inside dquotes. dquote inside - * dquotes can be escaped with \ etc... - * Input is handled a byte at a time, things will work as long as space tab etc. - * have the ascii values and can't appear as part of a multibyte char. utf-8 ok - * but so are the iso-8859-x and surely others. addseps do have to be - * single-bytes - */ -template bool stringToStrings(const string& s, T &tokens, - const string& addseps = ""); - -/** - * Inverse operation: - */ -template void stringsToString(const T &tokens, string &s); -template std::string stringsToString(const T &tokens); - -/** - * Strings to CSV string. tokens containing the separator are quoted (") - * " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""] - */ -template void stringsToCSV(const T &tokens, string &s, - char sep = ','); - -/** - * Split input string. No handling of quoting - */ -extern void stringToTokens(const string &s, vector &tokens, - const string &delims = " \t", bool skipinit=true); - -/** Convert string to boolean */ -extern bool stringToBool(const string &s); - -/** Remove instances of characters belonging to set (default {space, - tab}) at beginning and end of input string */ -extern void trimstring(string &s, const char *ws = " \t"); - -/** Escape things like < or & by turning them into entities */ -extern string escapeHtml(const string &in); - -/** Replace some chars with spaces (ie: newline chars). This is not utf8-aware - * so chars should only contain ascii */ -extern string neutchars(const string &str, const string &chars); -extern void neutchars(const string &str, string& out, const string &chars); - -/** Turn string into something that won't be expanded by a shell. In practise - * quote with double-quotes and escape $`\ */ -extern string escapeShell(const string &str); - -/** Truncate a string to a given maxlength, avoiding cutting off midword - * if reasonably possible. */ -extern string truncate_to_word(const string &input, string::size_type maxlen); - -/** Truncate in place in an utf8-legal way */ -extern void utf8truncate(string &s, int maxlen); - -void ulltodecstr(unsigned long long val, string& buf); -void lltodecstr(long long val, string& buf); -string lltodecstr(long long val); -string ulltodecstr(unsigned long long val); - -/** Convert byte count into unit (KB/MB...) appropriate for display */ -string displayableBytes(off_t size); - -/** Break big string into lines */ -string breakIntoLines(const string& in, unsigned int ll = 100, - unsigned int maxlines= 50); -/** Small utility to substitute printf-like percents cmds in a string */ -bool pcSubst(const string& in, string& out, const map& subs); -/** Substitute printf-like percents and also %(key) */ -bool pcSubst(const string& in, string& out, const map& subs); - -/** Append system error message */ -void catstrerror(string *reason, const char *what, int _errno); - -/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which - * makes it inaccessible */ -struct tm; -time_t portable_timegm(struct tm *tm); - -/** Temp buffer with automatic deallocation */ -struct TempBuf { - TempBuf() - : m_buf(0) - {} - TempBuf(int n) - { - m_buf = (char *)malloc(n); - } - ~TempBuf() - { - if (m_buf) - free(m_buf); - } - char *setsize(int n) { return (m_buf = (char *)realloc(m_buf, n)); } - char *buf() {return m_buf;} - char *m_buf; -}; - -inline void leftzeropad(string& s, unsigned len) -{ - if (s.length() && s.length() < len) - s = s.insert(0, len - s.length(), '0'); -} - -// Duplicate map while ensuring no shared string data (to pass -// to other thread): -void map_ss_cp_noshr(const std::map s, - std::map *d); - -// Code for static initialization of an stl map. Somewhat like Boost.assign. -// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c -// Example use: map m = create_map (1,2) (3,4) (5,6) (7,8); - -template -class create_map -{ -private: - std::map m_map; -public: - create_map(const T& key, const U& val) - { - m_map[key] = val; - } - - create_map& operator()(const T& key, const U& val) - { - m_map[key] = val; - return *this; - } - - operator std::map() - { - return m_map; - } -}; -template -class create_vector -{ -private: - std::vector m_vector; -public: - create_vector(const T& val) - { - m_vector.push_back(val); - } - - create_vector& operator()(const T& val) - { - m_vector.push_back(val); - return *this; - } - - operator std::vector() - { - return m_vector; - } -}; +// Call this before going multithread. +void smallut_init_mt(); +#ifndef SMALLUT_DISABLE_MACROS #ifndef MIN #define MIN(A,B) (((A)<(B)) ? (A) : (B)) #endif @@ -246,7 +40,194 @@ public: #ifndef deleteZ #define deleteZ(X) {delete X;X = 0;} #endif +#endif /* SMALLUT_DISABLE_MACROS */ -void smallut_init_mt(); +// Case-insensitive compare. ASCII ONLY ! +extern int stringicmp(const std::string& s1, const std::string& s2); + +// For find_if etc. +struct StringIcmpPred { + StringIcmpPred(const std::string& s1) + : m_s1(s1) { + } + bool operator()(const std::string& s2) { + return stringicmp(m_s1, s2) == 0; + } + const std::string& m_s1; +}; + +extern int stringlowercmp(const std::string& alreadylower, + const std::string& s2); +extern int stringuppercmp(const std::string& alreadyupper, + const std::string& s2); + +extern void stringtolower(std::string& io); +extern std::string stringtolower(const std::string& io); + +// Is one string the end part of the other ? +extern int stringisuffcmp(const std::string& s1, const std::string& s2); + +// Divine language from locale +extern std::string localelang(); +// Divine 8bit charset from language +extern std::string langtocode(const std::string& lang); + +// Compare charset names, removing the more common spelling variations +extern bool samecharset(const std::string& cs1, const std::string& cs2); + +// Parse date interval specifier into pair of y,m,d dates. The format +// for the time interval is based on a subset of iso 8601 with +// the addition of open intervals, and removal of all time indications. +// 'P' is the Period indicator, it's followed by a length in +// years/months/days (or any subset thereof) +// Dates: YYYY-MM-DD YYYY-MM YYYY +// Periods: P[nY][nM][nD] where n is an integer value. +// At least one of YMD must be specified +// The separator for the interval is /. Interval examples +// YYYY/ (from YYYY) YYYY-MM-DD/P3Y (3 years after date) etc. +// This returns a pair of y,m,d dates. +struct DateInterval { + int y1; + int m1; + int d1; + int y2; + int m2; + int d2; +}; +extern bool parsedateinterval(const std::string& s, DateInterval *di); +extern int monthdays(int mon, int year); + +/** + * Parse input string into list of strings. + * + * Token delimiter is " \t\n" except inside dquotes. dquote inside + * dquotes can be escaped with \ etc... + * Input is handled a byte at a time, things will work as long as + * space tab etc. have the ascii values and can't appear as part of a + * multibyte char. utf-8 ok but so are the iso-8859-x and surely + * others. addseps do have to be single-bytes + */ +template bool stringToStrings(const std::string& s, T& tokens, + const std::string& addseps = ""); + +/** + * Inverse operation: + */ +template void stringsToString(const T& tokens, std::string& s); +template std::string stringsToString(const T& tokens); + +/** + * Strings to CSV string. tokens containing the separator are quoted (") + * " inside tokens is escaped as "" ([word "quote"] =>["word ""quote"""] + */ +template void stringsToCSV(const T& tokens, std::string& s, + char sep = ','); + +/** + * Split input string. No handling of quoting + */ +extern void stringToTokens(const std::string& s, + std::vector& tokens, + const std::string& delims = " \t", + bool skipinit = true); + +/** Convert string to boolean */ +extern bool stringToBool(const std::string& s); + +/** Remove instances of characters belonging to set (default {space, + tab}) at beginning and end of input string */ +extern void trimstring(std::string& s, const char *ws = " \t"); + +/** Escape things like < or & by turning them into entities */ +extern std::string escapeHtml(const std::string& in); + +/** Replace some chars with spaces (ie: newline chars). */ +extern std::string neutchars(const std::string& str, const std::string& chars); +extern void neutchars(const std::string& str, std::string& out, + const std::string& chars); + +/** Turn string into something that won't be expanded by a shell. In practise + * quote with double-quotes and escape $`\ */ +extern std::string escapeShell(const std::string& str); + +/** Truncate a string to a given maxlength, avoiding cutting off midword + * if reasonably possible. */ +extern std::string truncate_to_word(const std::string& input, + std::string::size_type maxlen); + +void ulltodecstr(unsigned long long val, std::string& buf); +void lltodecstr(long long val, std::string& buf); +std::string lltodecstr(long long val); +std::string ulltodecstr(unsigned long long val); + +/** Convert byte count into unit (KB/MB...) appropriate for display */ +std::string displayableBytes(off_t size); + +/** Break big string into lines */ +std::string breakIntoLines(const std::string& in, unsigned int ll = 100, + unsigned int maxlines = 50); + +/** Small utility to substitute printf-like percents cmds in a string */ +bool pcSubst(const std::string& in, std::string& out, + const std::map& subs); +/** Substitute printf-like percents and also %(key) */ +bool pcSubst(const std::string& in, std::string& out, + const std::map& subs); + +/** Append system error message */ +void catstrerror(std::string *reason, const char *what, int _errno); + +/** Portable timegm. MS C has _mkgmtime, but there is a bug in Gminw which + * makes it inaccessible */ +struct tm; +time_t portable_timegm(struct tm *tm); + +inline void leftzeropad(std::string& s, unsigned len) +{ + if (s.length() && s.length() < len) { + s = s.insert(0, len - s.length(), '0'); + } +} + +// Code for static initialization of an stl map. Somewhat like Boost.assign. +// Ref: http://stackoverflow.com/questions/138600/initializing-a-static-stdmapint-int-in-c +// Example use: map m = create_map (1,2) (3,4) (5,6) (7,8); + +template +class create_map { +private: + std::map m_map; +public: + create_map(const T& key, const U& val) { + m_map[key] = val; + } + + create_map& operator()(const T& key, const U& val) { + m_map[key] = val; + return *this; + } + + operator std::map() { + return m_map; + } +}; +template +class create_vector { +private: + std::vector m_vector; +public: + create_vector(const T& val) { + m_vector.push_back(val); + } + + create_vector& operator()(const T& val) { + m_vector.push_back(val); + return *this; + } + + operator std::vector() { + return m_vector; + } +}; #endif /* _SMALLUT_H_INCLUDED_ */