Windows: use wide char interfaces to read directories and access files

This commit is contained in:
Jean-Francois Dockes 2019-01-23 15:05:11 +01:00
parent 5365490d94
commit 096ab4454b
7 changed files with 153 additions and 43 deletions

View File

@ -7,6 +7,11 @@ using namespace std;
string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
{
#ifdef _WIN32
// On windows file names are read as UTF16 wchar_t and converted to UTF-8
// while scanning directories
return ifn;
#else
string charset = config->getDefCharset(true);
string utf8fn;
int ercnt;
@ -21,4 +26,5 @@ string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
LOGDEB1("compute_utf8fn: transcoded from [" << lfn << "] to [" <<
utf8fn << "] (" << charset << "->" << "UTF-8)\n");
return utf8fn;
#endif
}

View File

@ -36,6 +36,7 @@
#include "log.h"
#include "pathut.h"
#include "fstreewalk.h"
#include "transcode.h"
using namespace std;
@ -310,6 +311,20 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
return FtwOk;
}
#ifdef _WIN32
#define DIRENT _wdirent
#define DIRHDL _WDIR
#define OPENDIR _wopendir
#define CLOSEDIR _wclosedir
#define READDIR _wreaddir
#else
#define DIRENT dirent
#define DIRHDL DIR
#define OPENDIR opendir
#define CLOSEDIR closedir
#define READDIR readdir
#endif
// Note that the 'norecurse' flag is handled as part of the directory read.
// This means that we always go into the top 'walk()' parameter if it is a
// directory, even if norecurse is set. Bug or Feature ?
@ -341,24 +356,25 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
// This is a directory, read it and process entries:
#ifndef _WIN32
// Detect if directory already seen. This could just be several
// symlinks pointing to the same place (if FtwFollow is set), it
// could also be some other kind of cycle. In any case, there is
// no point in entering again.
// For now, we'll ignore the "other kind of cycle" part and only monitor
// this is FtwFollow is set
#ifndef _WIN32
if (data->options & FtwFollow) {
DirId dirid(stp->st_dev, stp->st_ino);
if (data->donedirs.find(dirid) != data->donedirs.end()) {
LOGINFO("Not processing [" << (top) << "] (already seen as other path)\n" );
LOGINFO("Not processing [" << top <<
"] (already seen as other path)\n");
return status;
}
data->donedirs.insert(dirid);
}
#endif
DIR *d = opendir(top.c_str());
SYSPATH(top, systop);
DIRHDL *d = OPENDIR(systop);
if (d == 0) {
data->logsyserr("opendir", top);
switch (errno) {
@ -376,42 +392,38 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
}
}
struct dirent *ent;
while ((ent = readdir(d)) != 0) {
struct DIRENT *ent;
while ((ent = READDIR(d)) != 0) {
string fn;
struct stat st;
#ifdef _WIN32
string sdname;
if (!wchartoutf8(ent->d_name, sdname)) {
LOGERR("wchartoutf8 failed in " << top << endl);
continue;
}
const char *dname = sdname.c_str();
#else
const char *dname = ent->d_name;
#endif
// Maybe skip dotfiles
if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.')
if ((data->options & FtwSkipDotFiles) && dname[0] == '.')
continue;
// Skip . and ..
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
if (!strcmp(dname, ".") || !strcmp(dname, ".."))
continue;
// Skipped file names match ?
if (!data->skippedNames.empty()) {
if (inSkippedNames(ent->d_name))
if (inSkippedNames(dname))
continue;
}
fn = path_cat(top, ent->d_name);
#ifdef _WIN32
// readdir gets the useful attrs, no inode indirection on windows,
// spare the path_fileprops() call, but make sure we mimick it.
memset(&st, 0, sizeof(st));
st.st_mtime = ent->d_mtime;
st.st_size = ent->d_size;
st.st_mode = ent->d_mode;
// ctime is really creation time on Windows. Just use mtime
// for all. We only use ctime on Unix to catch xattr changes
// anyway.
st.st_ctime = st.st_mtime;
#else
fn = path_cat(top, dname);
int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow);
if (statret == -1) {
data->logsyserr("stat", fn);
continue;
}
#endif
if (!data->skippedPaths.empty()) {
// We do not check the ancestors. This means that you can have
@ -461,7 +473,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
out:
if (d)
closedir(d);
CLOSEDIR(d);
return status;
}

View File

@ -24,13 +24,19 @@
#include <stdio.h>
#include <math.h>
#include <errno.h>
#include <dirent.h>
#ifdef _WIN32
#include "dirent.h"
#include "safefcntl.h"
#include "safeunistd.h"
#include "safewindows.h"
#include "safesysstat.h"
#include "transcode.h"
#define STAT _wstat
#define LSTAT _wstat
#define STATBUF _stat
#define ACCESS _waccess
#else // Not windows ->
#include <fcntl.h>
@ -39,10 +45,13 @@
#include <pwd.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <dirent.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#define STAT stat
#define LSTAT lstat
#define STATBUF stat
#define ACCESS access
#endif
#include <cstdlib>
@ -56,6 +65,7 @@
#include "pathut.h"
#include "smallut.h"
#include "log.h"
using namespace std;
@ -506,8 +516,9 @@ bool path_makepath(const string& ipath, int mode)
bool path_isdir(const string& path)
{
struct stat st;
if (lstat(path.c_str(), &st) < 0) {
struct STATBUF st;
SYSPATH(path, syspath);
if (LSTAT(syspath, &st) < 0) {
return false;
}
if (S_ISDIR(st.st_mode)) {
@ -518,8 +529,9 @@ bool path_isdir(const string& path)
long long path_filesize(const string& path)
{
struct stat st;
if (stat(path.c_str(), &st) < 0) {
struct STATBUF st;
SYSPATH(path, syspath);
if (STAT(syspath, &st) < 0) {
return -1;
}
return (long long)st.st_size;
@ -531,8 +543,9 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
return -1;
}
memset(stp, 0, sizeof(struct stat));
struct stat mst;
int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst);
struct STATBUF mst;
SYSPATH(path, syspath);
int ret = follow ? STAT(syspath, &mst) : LSTAT(syspath, &mst);
if (ret != 0) {
return ret;
}
@ -551,7 +564,8 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
bool path_exists(const string& path)
{
return access(path.c_str(), 0) == 0;
SYSPATH(path, syspath);
return ACCESS(syspath, 0) == 0;
}
// Allowed punctuation in the path part of an URI according to RFC2396

View File

@ -91,6 +91,14 @@ extern bool path_exists(const std::string& path);
/// Return separator for PATH environment variable
extern std::string path_PATHsep();
#ifdef _WIN32
#define SYSPATH(PATH, SPATH) wchar_t PATH ## _buf[2048]; \
utf8towchar(PATH, PATH ## _buf, 2048); \
wchar_t *SPATH = PATH ## _buf;
#else
#define SYSPATH(PATH, SPATH) const char *SPATH = PATH.c_str()
#endif
/// Dump directory
extern bool readdir(const std::string& dir, std::string& reason,
std::set<std::string>& entries);

View File

@ -20,22 +20,31 @@
#include "config.h"
#endif
#include "readfile.h"
#include <errno.h>
#include <sys/types.h>
#ifdef _WIN32
#include "safefcntl.h"
#include "safesysstat.h"
#include "safeunistd.h"
#include "transcode.h"
#define OPEN _wopen
#else
#define O_BINARY 0
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#define OPEN open
#endif
#include <string>
#include "readfile.h"
#include "smallut.h"
#include "pathut.h"
#include "md5.h"
#ifdef MDU_INCLUDE_LOG
@ -295,7 +304,8 @@ public:
// If we have a file name, open it, else use stdin.
if (!m_fn.empty()) {
fd = open(m_fn.c_str(), O_RDONLY | O_BINARY);
SYSPATH(m_fn, realpath);
fd = OPEN(realpath, O_RDONLY | O_BINARY);
if (fd < 0 || fstat(fd, &st) < 0) {
catstrerror(m_reason, "open/stat", errno);
return false;

View File

@ -21,14 +21,16 @@
#include <string>
#include <iostream>
#include <mutex>
using std::string;
#include <errno.h>
#include <iconv.h>
#include <wchar.h>
#include "transcode.h"
#include "log.h"
using namespace std;
// We gain approximately 25% exec time for word at a time conversions by
// caching the iconv_open thing.
//
@ -42,7 +44,7 @@ using std::string;
bool transcode(const string &in, string &out, const string &icode,
const string &ocode, int *ecnt)
{
LOGDEB2("Transcode: " << (icode) << " -> " << (ocode) << "\n" );
LOGDEB2("Transcode: " << icode << " -> " << ocode << "\n");
#ifdef ICONV_CACHE_OPEN
static iconv_t ic = (iconv_t)-1;
static string cachedicode;
@ -100,8 +102,9 @@ bool transcode(const string &in, string &out, const string &icode,
" : " + strerror(errno);
#endif
if (errno == EILSEQ) {
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n" );
LOGDEB1(" Input consumed " << (ip - in) << " output produced " << (out.length() + OBSIZ - osiz) << "\n" );
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n");
LOGDEB1(" Input consumed " << ip - in << " output produced " <<
out.length() + OBSIZ - osiz << "\n");
out.append(obuf, OBSIZ - osiz);
out += "?";
mecnt++;
@ -144,14 +147,67 @@ error:
}
if (mecnt)
LOGDEB("transcode: [" << (icode) << "]->[" << (ocode) << "] " << (mecnt) << " errors\n" );
LOGDEB("transcode: [" << icode << "]->[" << ocode << "] " <<
mecnt << " errors\n");
if (ecnt)
*ecnt = mecnt;
return ret;
}
bool wchartoutf8(const wchar_t *in, std::string& out)
{
static iconv_t ic = (iconv_t)-1;
if (ic == (iconv_t)-1) {
if((ic = iconv_open("UTF-8", "WCHAR_T")) == (iconv_t)-1) {
LOGERR("wchartoutf8: iconv_open failed\n");
return false;
}
}
const int OBSIZ = 8192;
char obuf[OBSIZ], *op;
out.erase();
size_t isiz = 2 * wcslen(in);
out.reserve(isiz);
const char *ip = (const char *)in;
#else
while (isiz > 0) {
size_t osiz;
op = obuf;
osiz = OBSIZ;
if(iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1
&& errno != E2BIG) {
LOGERR("wchartoutf8: iconv error, errno: " << errno << endl);
return false;
}
out.append(obuf, OBSIZ - osiz);
}
return true;
}
bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap)
{
static iconv_t ic = (iconv_t)-1;
if (ic == (iconv_t)-1) {
if((ic = iconv_open("WCHAR_T", "UTF-8")) == (iconv_t)-1) {
LOGERR("utf8towchar: iconv_open failed\n");
return false;
}
}
size_t isiz = in.size();
const char *ip = in.c_str();
size_t osiz = (size_t)obytescap-2;
char *op = (char *)out;
if (iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1) {
LOGERR("utf8towchar: iconv error, errno: " << errno << endl);
return false;
}
*op++ = 0;
*op = 0;
return true;
}
#else // -> TEST
#include <stdio.h>
#include <stdlib.h>
@ -222,4 +278,3 @@ int main(int argc, char **argv)
exit(0);
}
#endif

View File

@ -36,4 +36,9 @@ extern bool transcode(const std::string &in, std::string &out,
const std::string &ocode,
int *ecnt = 0);
#ifdef _WIN32
extern bool wchartoutf8(const wchar_t *in, std::string& out);
extern bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap);
#endif
#endif /* _TRANSCODE_H_INCLUDED_ */