Windows: use wide char interfaces

Exchange file names and command line parameters with the system using
wchar_t interfaces: allows preserving values which can be reversibly
transcoded in the current multibyte charset (which can't be UTF-8). Store
all file paths internally in UTF-8
This commit is contained in:
Jean-Francois Dockes 2019-01-25 15:28:24 +01:00
parent 969b351813
commit 7ea3936420
12 changed files with 78 additions and 25 deletions

View File

@ -61,7 +61,7 @@ class Executor(RclBaseHandler):
return True, postproc.wrapData() return True, postproc.wrapData()
else: else:
try: try:
fullcmd = cmd + [filename] fullcmd = cmd + [rclexecm.subprocfile(filename)]
proc = subprocess.Popen(fullcmd, proc = subprocess.Popen(fullcmd,
stdout = subprocess.PIPE) stdout = subprocess.PIPE)
stdout = proc.stdout stdout = proc.stdout

View File

@ -29,13 +29,25 @@ import shutil
import getopt import getopt
import rclconfig import rclconfig
PY3 = sys.version > '3' PY3 = (sys.version > '3')
_mswindows = (sys.platform == "win32")
def makebytes(data): def makebytes(data):
if type(data) == type(u''): if type(data) == type(u''):
return data.encode("UTF-8") return data.encode("UTF-8")
return data return data
def subprocfile(fn):
# On Windows PY3 the list2cmdline() method in subprocess assumes that
# all args are str, and we receive file names as UTF-8. So we need
# to convert.
# On Unix all list elements get converted to bytes in the C
# _posixsubprocess module, nothing to do
if PY3 and _mswindows:
return fn.decode('UTF-8')
else:
return fn
my_config = rclconfig.RclConfig() my_config = rclconfig.RclConfig()
############################################ ############################################

View File

@ -479,7 +479,8 @@ class PDFExtractor:
print("RECFILTERROR HELPERNOTFOUND pdftotext") print("RECFILTERROR HELPERNOTFOUND pdftotext")
sys.exit(1); sys.exit(1);
self.filename = params["filename:"] self.filename = rclexecm.subprocfile(params["filename:"])
#self.em.rclog("openfile: [%s]" % self.filename) #self.em.rclog("openfile: [%s]" % self.filename)
self.currentindex = -1 self.currentindex = -1
self.attextractdone = False self.attextractdone = False

View File

@ -341,7 +341,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
// If using an actual file, check that it exists, and if it is // If using an actual file, check that it exists, and if it is
// compressed, we may need an uncompressed version // compressed, we may need an uncompressed version
if (!fn.empty() && theconfig->mimeViewerNeedsUncomp(doc.mimetype)) { if (!fn.empty() && theconfig->mimeViewerNeedsUncomp(doc.mimetype)) {
if (access(fn.c_str(), R_OK) != 0) { if (!path_readable(fn)) {
QMessageBox::warning(0, "Recoll", QMessageBox::warning(0, "Recoll",
tr("Can't access file: ") + u8s2qs(fn)); tr("Can't access file: ") + u8s2qs(fn));
return; return;
@ -445,9 +445,13 @@ void RclMain::execViewer(const map<string, string>& subs, bool enterHistory,
#endif #endif
QStatusBar *stb = statusBar(); QStatusBar *stb = statusBar();
if (stb) { if (stb) {
string fcharset = theconfig->getDefCharset(true);
string prcmd; string prcmd;
#ifdef _WIN32
prcmd = ncmd;
#else
string fcharset = theconfig->getDefCharset(true);
transcode(ncmd, prcmd, fcharset, "UTF-8"); transcode(ncmd, prcmd, fcharset, "UTF-8");
#endif
QString msg = tr("Executing: [") + QString msg = tr("Executing: [") +
QString::fromUtf8(prcmd.c_str()) + "]"; QString::fromUtf8(prcmd.c_str()) + "]";
stb->showMessage(msg, 10000); stb->showMessage(msg, 10000);

View File

@ -2970,6 +2970,7 @@ static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream)
#ifndef MINIZ_NO_TIME #ifndef MINIZ_NO_TIME
#include <sys/utime.h> #include <sys/utime.h>
#endif #endif
#define MZ_FOPENREAD(f, m) _wfopen(f, m)
#define MZ_FOPEN(f, m) fopen(f, m) #define MZ_FOPEN(f, m) fopen(f, m)
#define MZ_FCLOSE fclose #define MZ_FCLOSE fclose
#define MZ_FREAD fread #define MZ_FREAD fread
@ -3845,20 +3846,23 @@ static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBu
return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile);
} }
mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags) mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const WCHAR_TYPE *pFilename, mz_uint32 flags)
{ {
return mz_zip_reader_init_file_v2(pZip, pFilename, flags, 0, 0); return mz_zip_reader_init_file_v2(pZip, pFilename, flags, 0, 0);
} }
mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size) mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const WCHAR_TYPE *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size)
{ {
mz_uint64 file_size; mz_uint64 file_size;
MZ_FILE *pFile; MZ_FILE *pFile;
if ((!pZip) || (!pFilename) || ((archive_size) && (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) if ((!pZip) || (!pFilename) || ((archive_size) && (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)))
return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);
#ifdef _WIN32
pFile = MZ_FOPEN(pFilename, "rb"); pFile = MZ_FOPENREAD(pFilename, L"rb");
#else
pFile = MZ_FOPENREAD(pFilename, "rb");
#endif
if (!pFile) if (!pFile)
return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED);
@ -5400,7 +5404,7 @@ mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags
} }
#ifndef MINIZ_NO_STDIO #ifndef MINIZ_NO_STDIO
mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr) mz_bool mz_zip_validate_file_archive(const WCHAR_TYPE *pFilename, mz_uint flags, mz_zip_error *pErr)
{ {
mz_bool success = MZ_TRUE; mz_bool success = MZ_TRUE;
mz_zip_archive zip; mz_zip_archive zip;

View File

@ -1097,9 +1097,13 @@ mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t si
/* Read a archive from a disk file. */ /* Read a archive from a disk file. */
/* file_start_ofs is the file offset where the archive actually begins, or 0. */ /* file_start_ofs is the file offset where the archive actually begins, or 0. */
/* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */ /* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */
mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); #ifdef _WIN32
mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size); #define WCHAR_TYPE wchar_t
#else
#define WCHAR_TYPE char
#endif
mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const WCHAR_TYPE *pFilename, mz_uint32 flags);
mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const WCHAR_TYPE *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size);
/* Read an archive from an already opened FILE, beginning at the current file position. */ /* Read an archive from an already opened FILE, beginning at the current file position. */
/* The archive is assumed to be archive_size bytes long. If archive_size is < 0, then the entire rest of the file is assumed to contain the archive. */ /* The archive is assumed to be archive_size bytes long. If archive_size is < 0, then the entire rest of the file is assumed to contain the archive. */
/* The FILE will NOT be closed when mz_zip_reader_end() is called. */ /* The FILE will NOT be closed when mz_zip_reader_end() is called. */
@ -1222,7 +1226,7 @@ mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags);
/* Misc utils/helpers, valid for ZIP reading or writing */ /* Misc utils/helpers, valid for ZIP reading or writing */
mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr); mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr);
mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr); mz_bool mz_zip_validate_file_archive(const WCHAR_TYPE *pFilename, mz_uint flags, mz_zip_error *pErr);
/* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */ /* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */
mz_bool mz_zip_end(mz_zip_archive *pZip); mz_bool mz_zip_end(mz_zip_archive *pZip);

View File

@ -567,6 +567,11 @@ bool path_exists(const string& path)
SYSPATH(path, syspath); SYSPATH(path, syspath);
return ACCESS(syspath, 0) == 0; return ACCESS(syspath, 0) == 0;
} }
bool path_readable(const string& path)
{
SYSPATH(path, syspath);
return ACCESS(syspath, R_OK) == 0;
}
// Allowed punctuation in the path part of an URI according to RFC2396 // Allowed punctuation in the path part of an URI according to RFC2396
// -_.!~*'():@&=+$, // -_.!~*'():@&=+$,

View File

@ -87,6 +87,8 @@ extern int path_fileprops(const std::string path, struct stat *stp,
/// Returns true if last elt could be checked to exist. False may mean that /// Returns true if last elt could be checked to exist. False may mean that
/// the file/dir does not exist or that an error occurred. /// the file/dir does not exist or that an error occurred.
extern bool path_exists(const std::string& path); extern bool path_exists(const std::string& path);
/// Same but must be readable
extern bool path_readable(const std::string& path);
/// Return separator for PATH environment variable /// Return separator for PATH environment variable
extern std::string path_PATHsep(); extern std::string path_PATHsep();

View File

@ -164,10 +164,15 @@ const string& path_pkgdatadir()
// into either utf-8 if transcoding succeeds, or url-encoded // into either utf-8 if transcoding succeeds, or url-encoded
bool printableUrl(const string& fcharset, const string& in, string& out) bool printableUrl(const string& fcharset, const string& in, string& out)
{ {
#ifdef _WIN32
// On windows our paths are always utf-8
out = in;
#else
int ecnt = 0; int ecnt = 0;
if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) { if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
out = url_encode(in, 7); out = url_encode(in, 7);
} }
#endif
return true; return true;
} }

View File

@ -411,7 +411,8 @@ public:
if (m_fn.empty()) { if (m_fn.empty()) {
ret1 = mz_zip_reader_init_mem(&zip, m_data, m_cnt, 0); ret1 = mz_zip_reader_init_mem(&zip, m_data, m_cnt, 0);
} else { } else {
ret1 = mz_zip_reader_init_file(&zip, m_fn.c_str(), 0); SYSPATH(m_fn, realpath);
ret1 = mz_zip_reader_init_file(&zip, realpath, 0);
} }
if (!ret1) { if (!ret1) {
if (m_reason) { if (m_reason) {

View File

@ -32,6 +32,7 @@
#include <psapi.h> #include <psapi.h>
#include "smallut.h" #include "smallut.h"
#include "pathut.h" #include "pathut.h"
#include "transcode.h"
using namespace std; using namespace std;
@ -752,7 +753,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
return false; return false;
} }
STARTUPINFO siStartInfo; STARTUPINFOW siStartInfo;
BOOL bSuccess = FALSE; BOOL bSuccess = FALSE;
// Set up members of the PROCESS_INFORMATION structure. // Set up members of the PROCESS_INFORMATION structure.
@ -760,8 +761,8 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
// Set up members of the STARTUPINFO structure. // Set up members of the STARTUPINFO structure.
// This structure specifies the STDIN and STDOUT handles for redirection. // This structure specifies the STDIN and STDOUT handles for redirection.
ZeroMemory(&siStartInfo, sizeof(STARTUPINFO)); ZeroMemory(&siStartInfo, sizeof(siStartInfo));
siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.cb = sizeof(siStartInfo);
if (m->m_flags & EXF_SHOWWINDOW) { if (m->m_flags & EXF_SHOWWINDOW) {
siStartInfo.dwFlags |= STARTF_USESTDHANDLES; siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
if (m->m_flags & EXF_MAXIMIZED) { if (m->m_flags & EXF_MAXIMIZED) {
@ -782,12 +783,15 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
// Create the child process. // Create the child process.
// Need a writable buffer for the command line, for some reason. // Need a writable buffer for the command line, for some reason.
LOGDEB1("ExecCmd:startExec: cmdline [" << (cmdline) << "]\n" ); LOGDEB("ExecCmd:startExec: cmdline [" << cmdline << "]\n");
#if 0
LPSTR buf = (LPSTR)malloc(cmdline.size() + 1); LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
memcpy(buf, cmdline.c_str(), cmdline.size()); memcpy(buf, cmdline.c_str(), cmdline.size());
buf[cmdline.size()] = 0; buf[cmdline.size()] = 0;
bSuccess = CreateProcess(NULL, #endif
buf, // command line SYSPATH(cmdline, wcmdline);
bSuccess = CreateProcessW(NULL,
wcmdline, // command line
NULL, // process security attributes NULL, // process security attributes
NULL, // primary thread security attrs NULL, // primary thread security attrs
TRUE, // handles are inherited TRUE, // handles are inherited
@ -798,9 +802,10 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
&m->m_piProcInfo); // PROCESS_INFORMATION &m->m_piProcInfo); // PROCESS_INFORMATION
if (!bSuccess) { if (!bSuccess) {
printError("ExecCmd::doexec: CreateProcess"); printError("ExecCmd::doexec: CreateProcess");
} }
free(envir); free(envir);
free(buf); // free(buf);
// Close child-side handles else we'll never see eofs // Close child-side handles else we'll never see eofs
if (!CloseHandle(hOutputWrite)) if (!CloseHandle(hOutputWrite))
printError("CloseHandle"); printError("CloseHandle");

View File

@ -19,6 +19,9 @@
#include <shellapi.h> #include <shellapi.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "safewindows.h"
#include "pathut.h"
#include "transcode.h"
using namespace std; using namespace std;
@ -41,6 +44,10 @@ int op_flags;
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int wargc;
wchar_t **wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
// Yes we could use wargv
thisprog = argv[0]; thisprog = argv[0];
argc--; argv++; argc--; argv++;
int imode = 0; int imode = 0;
@ -62,7 +69,9 @@ int main(int argc, char *argv[])
if (argc != 1) { if (argc != 1) {
Usage(); Usage();
} }
char *fn = strdup(argv[0]);
wchar_t *wfn = wargv[1];
// Do we need this ? // Do we need this ?
//https://msdn.microsoft.com/en-us/library/windows/desktop/bb762153%28v=vs.85%29.aspx //https://msdn.microsoft.com/en-us/library/windows/desktop/bb762153%28v=vs.85%29.aspx
//CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE); //CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
@ -73,9 +82,10 @@ int main(int argc, char *argv[])
default: wmode = SW_SHOWNORMAL; break; default: wmode = SW_SHOWNORMAL; break;
} }
int ret = (int)ShellExecute(NULL, "open", fn, NULL, NULL, wmode); int ret = (int)ShellExecuteW(NULL, L"open", wfn, NULL, NULL, wmode);
if (ret) { if (ret) {
fprintf(stderr, "ShellExecute returned %d\n", ret); fprintf(stderr, "ShellExecute returned %d\n", ret);
} }
LocalFree(wargv);
return ret; return ret;
} }