Windows: use wide char interfaces to read directories and access files
This commit is contained in:
parent
5365490d94
commit
096ab4454b
@ -7,6 +7,11 @@ using namespace std;
|
||||
|
||||
string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// On windows file names are read as UTF16 wchar_t and converted to UTF-8
|
||||
// while scanning directories
|
||||
return ifn;
|
||||
#else
|
||||
string charset = config->getDefCharset(true);
|
||||
string utf8fn;
|
||||
int ercnt;
|
||||
@ -21,4 +26,5 @@ string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
|
||||
LOGDEB1("compute_utf8fn: transcoded from [" << lfn << "] to [" <<
|
||||
utf8fn << "] (" << charset << "->" << "UTF-8)\n");
|
||||
return utf8fn;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -36,6 +36,7 @@
|
||||
#include "log.h"
|
||||
#include "pathut.h"
|
||||
#include "fstreewalk.h"
|
||||
#include "transcode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -310,6 +311,20 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
|
||||
return FtwOk;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DIRENT _wdirent
|
||||
#define DIRHDL _WDIR
|
||||
#define OPENDIR _wopendir
|
||||
#define CLOSEDIR _wclosedir
|
||||
#define READDIR _wreaddir
|
||||
#else
|
||||
#define DIRENT dirent
|
||||
#define DIRHDL DIR
|
||||
#define OPENDIR opendir
|
||||
#define CLOSEDIR closedir
|
||||
#define READDIR readdir
|
||||
#endif
|
||||
|
||||
// Note that the 'norecurse' flag is handled as part of the directory read.
|
||||
// This means that we always go into the top 'walk()' parameter if it is a
|
||||
// directory, even if norecurse is set. Bug or Feature ?
|
||||
@ -341,24 +356,25 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
||||
|
||||
// This is a directory, read it and process entries:
|
||||
|
||||
#ifndef _WIN32
|
||||
// Detect if directory already seen. This could just be several
|
||||
// symlinks pointing to the same place (if FtwFollow is set), it
|
||||
// could also be some other kind of cycle. In any case, there is
|
||||
// no point in entering again.
|
||||
// For now, we'll ignore the "other kind of cycle" part and only monitor
|
||||
// this is FtwFollow is set
|
||||
#ifndef _WIN32
|
||||
if (data->options & FtwFollow) {
|
||||
DirId dirid(stp->st_dev, stp->st_ino);
|
||||
if (data->donedirs.find(dirid) != data->donedirs.end()) {
|
||||
LOGINFO("Not processing [" << (top) << "] (already seen as other path)\n" );
|
||||
LOGINFO("Not processing [" << top <<
|
||||
"] (already seen as other path)\n");
|
||||
return status;
|
||||
}
|
||||
data->donedirs.insert(dirid);
|
||||
}
|
||||
#endif
|
||||
|
||||
DIR *d = opendir(top.c_str());
|
||||
SYSPATH(top, systop);
|
||||
DIRHDL *d = OPENDIR(systop);
|
||||
if (d == 0) {
|
||||
data->logsyserr("opendir", top);
|
||||
switch (errno) {
|
||||
@ -376,42 +392,38 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
||||
}
|
||||
}
|
||||
|
||||
struct dirent *ent;
|
||||
while ((ent = readdir(d)) != 0) {
|
||||
struct DIRENT *ent;
|
||||
while ((ent = READDIR(d)) != 0) {
|
||||
string fn;
|
||||
struct stat st;
|
||||
#ifdef _WIN32
|
||||
string sdname;
|
||||
if (!wchartoutf8(ent->d_name, sdname)) {
|
||||
LOGERR("wchartoutf8 failed in " << top << endl);
|
||||
continue;
|
||||
}
|
||||
const char *dname = sdname.c_str();
|
||||
#else
|
||||
const char *dname = ent->d_name;
|
||||
#endif
|
||||
// Maybe skip dotfiles
|
||||
if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.')
|
||||
if ((data->options & FtwSkipDotFiles) && dname[0] == '.')
|
||||
continue;
|
||||
// Skip . and ..
|
||||
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
||||
if (!strcmp(dname, ".") || !strcmp(dname, ".."))
|
||||
continue;
|
||||
|
||||
// Skipped file names match ?
|
||||
if (!data->skippedNames.empty()) {
|
||||
if (inSkippedNames(ent->d_name))
|
||||
if (inSkippedNames(dname))
|
||||
continue;
|
||||
}
|
||||
|
||||
fn = path_cat(top, ent->d_name);
|
||||
#ifdef _WIN32
|
||||
// readdir gets the useful attrs, no inode indirection on windows,
|
||||
// spare the path_fileprops() call, but make sure we mimick it.
|
||||
memset(&st, 0, sizeof(st));
|
||||
st.st_mtime = ent->d_mtime;
|
||||
st.st_size = ent->d_size;
|
||||
st.st_mode = ent->d_mode;
|
||||
// ctime is really creation time on Windows. Just use mtime
|
||||
// for all. We only use ctime on Unix to catch xattr changes
|
||||
// anyway.
|
||||
st.st_ctime = st.st_mtime;
|
||||
#else
|
||||
fn = path_cat(top, dname);
|
||||
int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow);
|
||||
if (statret == -1) {
|
||||
data->logsyserr("stat", fn);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!data->skippedPaths.empty()) {
|
||||
// We do not check the ancestors. This means that you can have
|
||||
@ -461,7 +473,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
||||
|
||||
out:
|
||||
if (d)
|
||||
closedir(d);
|
||||
CLOSEDIR(d);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@ -24,13 +24,19 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <errno.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "dirent.h"
|
||||
#include "safefcntl.h"
|
||||
#include "safeunistd.h"
|
||||
#include "safewindows.h"
|
||||
#include "safesysstat.h"
|
||||
#include "transcode.h"
|
||||
|
||||
#define STAT _wstat
|
||||
#define LSTAT _wstat
|
||||
#define STATBUF _stat
|
||||
#define ACCESS _waccess
|
||||
|
||||
#else // Not windows ->
|
||||
#include <fcntl.h>
|
||||
@ -39,10 +45,13 @@
|
||||
#include <pwd.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
#include <sys/statvfs.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define STAT stat
|
||||
#define LSTAT lstat
|
||||
#define STATBUF stat
|
||||
#define ACCESS access
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
@ -56,6 +65,7 @@
|
||||
|
||||
#include "pathut.h"
|
||||
#include "smallut.h"
|
||||
#include "log.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -506,8 +516,9 @@ bool path_makepath(const string& ipath, int mode)
|
||||
|
||||
bool path_isdir(const string& path)
|
||||
{
|
||||
struct stat st;
|
||||
if (lstat(path.c_str(), &st) < 0) {
|
||||
struct STATBUF st;
|
||||
SYSPATH(path, syspath);
|
||||
if (LSTAT(syspath, &st) < 0) {
|
||||
return false;
|
||||
}
|
||||
if (S_ISDIR(st.st_mode)) {
|
||||
@ -518,8 +529,9 @@ bool path_isdir(const string& path)
|
||||
|
||||
long long path_filesize(const string& path)
|
||||
{
|
||||
struct stat st;
|
||||
if (stat(path.c_str(), &st) < 0) {
|
||||
struct STATBUF st;
|
||||
SYSPATH(path, syspath);
|
||||
if (STAT(syspath, &st) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return (long long)st.st_size;
|
||||
@ -531,8 +543,9 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
|
||||
return -1;
|
||||
}
|
||||
memset(stp, 0, sizeof(struct stat));
|
||||
struct stat mst;
|
||||
int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst);
|
||||
struct STATBUF mst;
|
||||
SYSPATH(path, syspath);
|
||||
int ret = follow ? STAT(syspath, &mst) : LSTAT(syspath, &mst);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
@ -551,7 +564,8 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
|
||||
|
||||
bool path_exists(const string& path)
|
||||
{
|
||||
return access(path.c_str(), 0) == 0;
|
||||
SYSPATH(path, syspath);
|
||||
return ACCESS(syspath, 0) == 0;
|
||||
}
|
||||
|
||||
// Allowed punctuation in the path part of an URI according to RFC2396
|
||||
|
||||
@ -91,6 +91,14 @@ extern bool path_exists(const std::string& path);
|
||||
/// Return separator for PATH environment variable
|
||||
extern std::string path_PATHsep();
|
||||
|
||||
#ifdef _WIN32
|
||||
#define SYSPATH(PATH, SPATH) wchar_t PATH ## _buf[2048]; \
|
||||
utf8towchar(PATH, PATH ## _buf, 2048); \
|
||||
wchar_t *SPATH = PATH ## _buf;
|
||||
#else
|
||||
#define SYSPATH(PATH, SPATH) const char *SPATH = PATH.c_str()
|
||||
#endif
|
||||
|
||||
/// Dump directory
|
||||
extern bool readdir(const std::string& dir, std::string& reason,
|
||||
std::set<std::string>& entries);
|
||||
|
||||
@ -20,22 +20,31 @@
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "readfile.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "safefcntl.h"
|
||||
#include "safesysstat.h"
|
||||
#include "safeunistd.h"
|
||||
#include "transcode.h"
|
||||
#define OPEN _wopen
|
||||
|
||||
#else
|
||||
#define O_BINARY 0
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#define OPEN open
|
||||
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "readfile.h"
|
||||
#include "smallut.h"
|
||||
#include "pathut.h"
|
||||
#include "md5.h"
|
||||
|
||||
#ifdef MDU_INCLUDE_LOG
|
||||
@ -295,7 +304,8 @@ public:
|
||||
|
||||
// If we have a file name, open it, else use stdin.
|
||||
if (!m_fn.empty()) {
|
||||
fd = open(m_fn.c_str(), O_RDONLY | O_BINARY);
|
||||
SYSPATH(m_fn, realpath);
|
||||
fd = OPEN(realpath, O_RDONLY | O_BINARY);
|
||||
if (fd < 0 || fstat(fd, &st) < 0) {
|
||||
catstrerror(m_reason, "open/stat", errno);
|
||||
return false;
|
||||
|
||||
@ -21,14 +21,16 @@
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
using std::string;
|
||||
|
||||
#include <errno.h>
|
||||
#include <iconv.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include "transcode.h"
|
||||
#include "log.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// We gain approximately 25% exec time for word at a time conversions by
|
||||
// caching the iconv_open thing.
|
||||
//
|
||||
@ -42,7 +44,7 @@ using std::string;
|
||||
bool transcode(const string &in, string &out, const string &icode,
|
||||
const string &ocode, int *ecnt)
|
||||
{
|
||||
LOGDEB2("Transcode: " << (icode) << " -> " << (ocode) << "\n" );
|
||||
LOGDEB2("Transcode: " << icode << " -> " << ocode << "\n");
|
||||
#ifdef ICONV_CACHE_OPEN
|
||||
static iconv_t ic = (iconv_t)-1;
|
||||
static string cachedicode;
|
||||
@ -100,8 +102,9 @@ bool transcode(const string &in, string &out, const string &icode,
|
||||
" : " + strerror(errno);
|
||||
#endif
|
||||
if (errno == EILSEQ) {
|
||||
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n" );
|
||||
LOGDEB1(" Input consumed " << (ip - in) << " output produced " << (out.length() + OBSIZ - osiz) << "\n" );
|
||||
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n");
|
||||
LOGDEB1(" Input consumed " << ip - in << " output produced " <<
|
||||
out.length() + OBSIZ - osiz << "\n");
|
||||
out.append(obuf, OBSIZ - osiz);
|
||||
out += "?";
|
||||
mecnt++;
|
||||
@ -144,14 +147,67 @@ error:
|
||||
}
|
||||
|
||||
if (mecnt)
|
||||
LOGDEB("transcode: [" << (icode) << "]->[" << (ocode) << "] " << (mecnt) << " errors\n" );
|
||||
LOGDEB("transcode: [" << icode << "]->[" << ocode << "] " <<
|
||||
mecnt << " errors\n");
|
||||
if (ecnt)
|
||||
*ecnt = mecnt;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool wchartoutf8(const wchar_t *in, std::string& out)
|
||||
{
|
||||
static iconv_t ic = (iconv_t)-1;
|
||||
if (ic == (iconv_t)-1) {
|
||||
if((ic = iconv_open("UTF-8", "WCHAR_T")) == (iconv_t)-1) {
|
||||
LOGERR("wchartoutf8: iconv_open failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const int OBSIZ = 8192;
|
||||
char obuf[OBSIZ], *op;
|
||||
out.erase();
|
||||
size_t isiz = 2 * wcslen(in);
|
||||
out.reserve(isiz);
|
||||
const char *ip = (const char *)in;
|
||||
|
||||
#else
|
||||
while (isiz > 0) {
|
||||
size_t osiz;
|
||||
op = obuf;
|
||||
osiz = OBSIZ;
|
||||
|
||||
if(iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1
|
||||
&& errno != E2BIG) {
|
||||
LOGERR("wchartoutf8: iconv error, errno: " << errno << endl);
|
||||
return false;
|
||||
}
|
||||
out.append(obuf, OBSIZ - osiz);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap)
|
||||
{
|
||||
static iconv_t ic = (iconv_t)-1;
|
||||
if (ic == (iconv_t)-1) {
|
||||
if((ic = iconv_open("WCHAR_T", "UTF-8")) == (iconv_t)-1) {
|
||||
LOGERR("utf8towchar: iconv_open failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
size_t isiz = in.size();
|
||||
const char *ip = in.c_str();
|
||||
size_t osiz = (size_t)obytescap-2;
|
||||
char *op = (char *)out;
|
||||
if (iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1) {
|
||||
LOGERR("utf8towchar: iconv error, errno: " << errno << endl);
|
||||
return false;
|
||||
}
|
||||
*op++ = 0;
|
||||
*op = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
#else // -> TEST
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -222,4 +278,3 @@ int main(int argc, char **argv)
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -36,4 +36,9 @@ extern bool transcode(const std::string &in, std::string &out,
|
||||
const std::string &ocode,
|
||||
int *ecnt = 0);
|
||||
|
||||
#ifdef _WIN32
|
||||
extern bool wchartoutf8(const wchar_t *in, std::string& out);
|
||||
extern bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap);
|
||||
#endif
|
||||
|
||||
#endif /* _TRANSCODE_H_INCLUDED_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user