diff --git a/src/common/utf8fn.cpp b/src/common/utf8fn.cpp index e427f27c..9c2f4140 100644 --- a/src/common/utf8fn.cpp +++ b/src/common/utf8fn.cpp @@ -7,6 +7,11 @@ using namespace std; string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple) { +#ifdef _WIN32 + // On windows file names are read as UTF16 wchar_t and converted to UTF-8 + // while scanning directories + return ifn; +#else string charset = config->getDefCharset(true); string utf8fn; int ercnt; @@ -21,4 +26,5 @@ string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple) LOGDEB1("compute_utf8fn: transcoded from [" << lfn << "] to [" << utf8fn << "] (" << charset << "->" << "UTF-8)\n"); return utf8fn; +#endif } diff --git a/src/utils/fstreewalk.cpp b/src/utils/fstreewalk.cpp index 532f3ae6..7573247a 100644 --- a/src/utils/fstreewalk.cpp +++ b/src/utils/fstreewalk.cpp @@ -36,6 +36,7 @@ #include "log.h" #include "pathut.h" #include "fstreewalk.h" +#include "transcode.h" using namespace std; @@ -310,6 +311,20 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top, return FtwOk; } +#ifdef _WIN32 +#define DIRENT _wdirent +#define DIRHDL _WDIR +#define OPENDIR _wopendir +#define CLOSEDIR _wclosedir +#define READDIR _wreaddir +#else +#define DIRENT dirent +#define DIRHDL DIR +#define OPENDIR opendir +#define CLOSEDIR closedir +#define READDIR readdir +#endif + // Note that the 'norecurse' flag is handled as part of the directory read. // This means that we always go into the top 'walk()' parameter if it is a // directory, even if norecurse is set. Bug or Feature ? @@ -341,24 +356,25 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, // This is a directory, read it and process entries: +#ifndef _WIN32 // Detect if directory already seen. This could just be several // symlinks pointing to the same place (if FtwFollow is set), it // could also be some other kind of cycle. In any case, there is // no point in entering again. // For now, we'll ignore the "other kind of cycle" part and only monitor // this is FtwFollow is set -#ifndef _WIN32 if (data->options & FtwFollow) { DirId dirid(stp->st_dev, stp->st_ino); if (data->donedirs.find(dirid) != data->donedirs.end()) { - LOGINFO("Not processing [" << (top) << "] (already seen as other path)\n" ); + LOGINFO("Not processing [" << top << + "] (already seen as other path)\n"); return status; } data->donedirs.insert(dirid); } #endif - - DIR *d = opendir(top.c_str()); + SYSPATH(top, systop); + DIRHDL *d = OPENDIR(systop); if (d == 0) { data->logsyserr("opendir", top); switch (errno) { @@ -376,42 +392,38 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, } } - struct dirent *ent; - while ((ent = readdir(d)) != 0) { + struct DIRENT *ent; + while ((ent = READDIR(d)) != 0) { string fn; struct stat st; +#ifdef _WIN32 + string sdname; + if (!wchartoutf8(ent->d_name, sdname)) { + LOGERR("wchartoutf8 failed in " << top << endl); + continue; + } + const char *dname = sdname.c_str(); +#else + const char *dname = ent->d_name; +#endif // Maybe skip dotfiles - if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.') + if ((data->options & FtwSkipDotFiles) && dname[0] == '.') continue; // Skip . and .. - if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + if (!strcmp(dname, ".") || !strcmp(dname, "..")) continue; // Skipped file names match ? if (!data->skippedNames.empty()) { - if (inSkippedNames(ent->d_name)) + if (inSkippedNames(dname)) continue; } - - fn = path_cat(top, ent->d_name); -#ifdef _WIN32 - // readdir gets the useful attrs, no inode indirection on windows, - // spare the path_fileprops() call, but make sure we mimick it. - memset(&st, 0, sizeof(st)); - st.st_mtime = ent->d_mtime; - st.st_size = ent->d_size; - st.st_mode = ent->d_mode; - // ctime is really creation time on Windows. Just use mtime - // for all. We only use ctime on Unix to catch xattr changes - // anyway. - st.st_ctime = st.st_mtime; -#else + fn = path_cat(top, dname); int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow); if (statret == -1) { data->logsyserr("stat", fn); continue; } -#endif if (!data->skippedPaths.empty()) { // We do not check the ancestors. This means that you can have @@ -461,7 +473,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top, out: if (d) - closedir(d); + CLOSEDIR(d); return status; } diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index b32b7dc7..aaa25790 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -24,13 +24,19 @@ #include #include #include +#include #ifdef _WIN32 -#include "dirent.h" #include "safefcntl.h" #include "safeunistd.h" #include "safewindows.h" #include "safesysstat.h" +#include "transcode.h" + +#define STAT _wstat +#define LSTAT _wstat +#define STATBUF _stat +#define ACCESS _waccess #else // Not windows -> #include @@ -39,10 +45,13 @@ #include #include #include -#include #include #include +#define STAT stat +#define LSTAT lstat +#define STATBUF stat +#define ACCESS access #endif #include @@ -56,6 +65,7 @@ #include "pathut.h" #include "smallut.h" +#include "log.h" using namespace std; @@ -506,8 +516,9 @@ bool path_makepath(const string& ipath, int mode) bool path_isdir(const string& path) { - struct stat st; - if (lstat(path.c_str(), &st) < 0) { + struct STATBUF st; + SYSPATH(path, syspath); + if (LSTAT(syspath, &st) < 0) { return false; } if (S_ISDIR(st.st_mode)) { @@ -518,8 +529,9 @@ bool path_isdir(const string& path) long long path_filesize(const string& path) { - struct stat st; - if (stat(path.c_str(), &st) < 0) { + struct STATBUF st; + SYSPATH(path, syspath); + if (STAT(syspath, &st) < 0) { return -1; } return (long long)st.st_size; @@ -531,8 +543,9 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow) return -1; } memset(stp, 0, sizeof(struct stat)); - struct stat mst; - int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst); + struct STATBUF mst; + SYSPATH(path, syspath); + int ret = follow ? STAT(syspath, &mst) : LSTAT(syspath, &mst); if (ret != 0) { return ret; } @@ -551,7 +564,8 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow) bool path_exists(const string& path) { - return access(path.c_str(), 0) == 0; + SYSPATH(path, syspath); + return ACCESS(syspath, 0) == 0; } // Allowed punctuation in the path part of an URI according to RFC2396 diff --git a/src/utils/pathut.h b/src/utils/pathut.h index 3e6c73d0..986811f2 100644 --- a/src/utils/pathut.h +++ b/src/utils/pathut.h @@ -91,6 +91,14 @@ extern bool path_exists(const std::string& path); /// Return separator for PATH environment variable extern std::string path_PATHsep(); +#ifdef _WIN32 +#define SYSPATH(PATH, SPATH) wchar_t PATH ## _buf[2048]; \ + utf8towchar(PATH, PATH ## _buf, 2048); \ + wchar_t *SPATH = PATH ## _buf; +#else +#define SYSPATH(PATH, SPATH) const char *SPATH = PATH.c_str() +#endif + /// Dump directory extern bool readdir(const std::string& dir, std::string& reason, std::set& entries); diff --git a/src/utils/readfile.cpp b/src/utils/readfile.cpp index d51da8b2..7866d31c 100644 --- a/src/utils/readfile.cpp +++ b/src/utils/readfile.cpp @@ -20,22 +20,31 @@ #include "config.h" #endif +#include "readfile.h" + #include #include + #ifdef _WIN32 #include "safefcntl.h" #include "safesysstat.h" #include "safeunistd.h" +#include "transcode.h" +#define OPEN _wopen + #else #define O_BINARY 0 #include #include #include +#define OPEN open + #endif + #include -#include "readfile.h" #include "smallut.h" +#include "pathut.h" #include "md5.h" #ifdef MDU_INCLUDE_LOG @@ -295,7 +304,8 @@ public: // If we have a file name, open it, else use stdin. if (!m_fn.empty()) { - fd = open(m_fn.c_str(), O_RDONLY | O_BINARY); + SYSPATH(m_fn, realpath); + fd = OPEN(realpath, O_RDONLY | O_BINARY); if (fd < 0 || fstat(fd, &st) < 0) { catstrerror(m_reason, "open/stat", errno); return false; diff --git a/src/utils/transcode.cpp b/src/utils/transcode.cpp index a4e7c611..76e6d359 100644 --- a/src/utils/transcode.cpp +++ b/src/utils/transcode.cpp @@ -21,14 +21,16 @@ #include #include #include -using std::string; #include #include +#include #include "transcode.h" #include "log.h" +using namespace std; + // We gain approximately 25% exec time for word at a time conversions by // caching the iconv_open thing. // @@ -42,7 +44,7 @@ using std::string; bool transcode(const string &in, string &out, const string &icode, const string &ocode, int *ecnt) { - LOGDEB2("Transcode: " << (icode) << " -> " << (ocode) << "\n" ); + LOGDEB2("Transcode: " << icode << " -> " << ocode << "\n"); #ifdef ICONV_CACHE_OPEN static iconv_t ic = (iconv_t)-1; static string cachedicode; @@ -100,8 +102,9 @@ bool transcode(const string &in, string &out, const string &icode, " : " + strerror(errno); #endif if (errno == EILSEQ) { - LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n" ); - LOGDEB1(" Input consumed " << (ip - in) << " output produced " << (out.length() + OBSIZ - osiz) << "\n" ); + LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n"); + LOGDEB1(" Input consumed " << ip - in << " output produced " << + out.length() + OBSIZ - osiz << "\n"); out.append(obuf, OBSIZ - osiz); out += "?"; mecnt++; @@ -144,14 +147,67 @@ error: } if (mecnt) - LOGDEB("transcode: [" << (icode) << "]->[" << (ocode) << "] " << (mecnt) << " errors\n" ); + LOGDEB("transcode: [" << icode << "]->[" << ocode << "] " << + mecnt << " errors\n"); if (ecnt) *ecnt = mecnt; return ret; } +bool wchartoutf8(const wchar_t *in, std::string& out) +{ + static iconv_t ic = (iconv_t)-1; + if (ic == (iconv_t)-1) { + if((ic = iconv_open("UTF-8", "WCHAR_T")) == (iconv_t)-1) { + LOGERR("wchartoutf8: iconv_open failed\n"); + return false; + } + } + const int OBSIZ = 8192; + char obuf[OBSIZ], *op; + out.erase(); + size_t isiz = 2 * wcslen(in); + out.reserve(isiz); + const char *ip = (const char *)in; -#else + while (isiz > 0) { + size_t osiz; + op = obuf; + osiz = OBSIZ; + + if(iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1 + && errno != E2BIG) { + LOGERR("wchartoutf8: iconv error, errno: " << errno << endl); + return false; + } + out.append(obuf, OBSIZ - osiz); + } + return true; +} + +bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap) +{ + static iconv_t ic = (iconv_t)-1; + if (ic == (iconv_t)-1) { + if((ic = iconv_open("WCHAR_T", "UTF-8")) == (iconv_t)-1) { + LOGERR("utf8towchar: iconv_open failed\n"); + return false; + } + } + size_t isiz = in.size(); + const char *ip = in.c_str(); + size_t osiz = (size_t)obytescap-2; + char *op = (char *)out; + if (iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1) { + LOGERR("utf8towchar: iconv error, errno: " << errno << endl); + return false; + } + *op++ = 0; + *op = 0; + return true; +} + +#else // -> TEST #include #include @@ -222,4 +278,3 @@ int main(int argc, char **argv) exit(0); } #endif - diff --git a/src/utils/transcode.h b/src/utils/transcode.h index 41e9139a..10b436d6 100644 --- a/src/utils/transcode.h +++ b/src/utils/transcode.h @@ -36,4 +36,9 @@ extern bool transcode(const std::string &in, std::string &out, const std::string &ocode, int *ecnt = 0); +#ifdef _WIN32 +extern bool wchartoutf8(const wchar_t *in, std::string& out); +extern bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap); +#endif + #endif /* _TRANSCODE_H_INCLUDED_ */