ptrans: do not transform the translations input by the user (no path_canon()),
these could be foreign (windows x unix). fileurltolocalpath: remove the '/' in front of a windows drive letter path even when not built on Windows Move all the dubious recoll-specific url code from the generic pathut to rclutil
This commit is contained in:
parent
fbfa818a3a
commit
be12db218b
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2006 J.F.Dockes
|
/* Copyright (C) 2006-2022 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -44,7 +44,7 @@ void EditTrans::init(const string& dbdir)
|
|||||||
{
|
{
|
||||||
m_dbdir = path_canon(dbdir);
|
m_dbdir = path_canon(dbdir);
|
||||||
connect(transTW, SIGNAL(itemDoubleClicked(QTableWidgetItem *)),
|
connect(transTW, SIGNAL(itemDoubleClicked(QTableWidgetItem *)),
|
||||||
this, SLOT(onItemDoubleClicked(QTableWidgetItem *)));
|
this, SLOT(onItemDoubleClicked(QTableWidgetItem *)));
|
||||||
connect(cancelPB, SIGNAL(clicked()), this, SLOT(close()));
|
connect(cancelPB, SIGNAL(clicked()), this, SLOT(close()));
|
||||||
|
|
||||||
QString lab = whatIdxLA->text();
|
QString lab = whatIdxLA->text();
|
||||||
@ -57,18 +57,17 @@ void EditTrans::init(const string& dbdir)
|
|||||||
|
|
||||||
ConfSimple *conftrans = theconfig->getPTrans();
|
ConfSimple *conftrans = theconfig->getPTrans();
|
||||||
if (!conftrans)
|
if (!conftrans)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int row = 0;
|
int row = 0;
|
||||||
vector<string> opaths = conftrans->getNames(m_dbdir);
|
vector<string> opaths = conftrans->getNames(m_dbdir);
|
||||||
for (vector<string>::const_iterator it = opaths.begin();
|
for (const auto& opath : opaths) {
|
||||||
it != opaths.end(); it++) {
|
transTW->setRowCount(row+1);
|
||||||
transTW->setRowCount(row+1);
|
transTW->setItem(row, 0, new QTableWidgetItem(path2qs(opath)));
|
||||||
transTW->setItem(row, 0, new QTableWidgetItem(path2qs(*it)));
|
string npath;
|
||||||
string npath;
|
conftrans->get(opath, npath, m_dbdir);
|
||||||
conftrans->get(*it, npath, m_dbdir);
|
transTW->setItem(row, 1, new QTableWidgetItem(path2qs(npath)));
|
||||||
transTW->setItem(row, 1, new QTableWidgetItem(path2qs(npath)));
|
row++;
|
||||||
row++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
resize(QSize(640, 300).expandedTo(minimumSizeHint()));
|
resize(QSize(640, 300).expandedTo(minimumSizeHint()));
|
||||||
@ -83,18 +82,18 @@ void EditTrans::on_savePB_clicked()
|
|||||||
{
|
{
|
||||||
ConfSimple *conftrans = theconfig->getPTrans();
|
ConfSimple *conftrans = theconfig->getPTrans();
|
||||||
if (!conftrans) {
|
if (!conftrans) {
|
||||||
QMessageBox::warning(0, "Recoll", tr("Config error"));
|
QMessageBox::warning(0, "Recoll", tr("Config error"));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
conftrans->holdWrites(true);
|
conftrans->holdWrites(true);
|
||||||
conftrans->eraseKey(m_dbdir);
|
conftrans->eraseKey(m_dbdir);
|
||||||
|
|
||||||
for (int row = 0; row < transTW->rowCount(); row++) {
|
for (int row = 0; row < transTW->rowCount(); row++) {
|
||||||
QTableWidgetItem *item0 = transTW->item(row, 0);
|
QTableWidgetItem *item0 = transTW->item(row, 0);
|
||||||
string from = path_canon(qs2path(item0->text()));
|
string from = qs2path(item0->text());
|
||||||
QTableWidgetItem *item1 = transTW->item(row, 1);
|
QTableWidgetItem *item1 = transTW->item(row, 1);
|
||||||
string to = path_canon(qs2path(item1->text()));
|
string to = qs2path(item1->text());
|
||||||
conftrans->set(from, to, m_dbdir);
|
conftrans->set(from, to, m_dbdir);
|
||||||
}
|
}
|
||||||
conftrans->holdWrites(false);
|
conftrans->holdWrites(false);
|
||||||
// The rcldb does not use the same configuration object, but a
|
// The rcldb does not use the same configuration object, but a
|
||||||
@ -118,12 +117,12 @@ void EditTrans::on_delPB_clicked()
|
|||||||
QModelIndexList indexes = transTW->selectionModel()->selectedIndexes();
|
QModelIndexList indexes = transTW->selectionModel()->selectedIndexes();
|
||||||
vector<int> rows;
|
vector<int> rows;
|
||||||
for (int i = 0; i < indexes.size(); i++) {
|
for (int i = 0; i < indexes.size(); i++) {
|
||||||
rows.push_back(indexes.at(i).row());
|
rows.push_back(indexes.at(i).row());
|
||||||
}
|
}
|
||||||
sort(rows.begin(), rows.end());
|
sort(rows.begin(), rows.end());
|
||||||
rows.resize(unique(rows.begin(), rows.end()) - rows.begin());
|
rows.resize(unique(rows.begin(), rows.end()) - rows.begin());
|
||||||
for (int i = rows.size()-1; i >= 0; i--) {
|
for (int i = rows.size()-1; i >= 0; i--) {
|
||||||
transTW->removeRow(rows[i]);
|
transTW->removeRow(rows[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,8 +130,8 @@ void EditTrans::on_transTW_itemSelectionChanged()
|
|||||||
{
|
{
|
||||||
QModelIndexList indexes = transTW->selectionModel()->selectedIndexes();
|
QModelIndexList indexes = transTW->selectionModel()->selectedIndexes();
|
||||||
if(indexes.size() < 1)
|
if(indexes.size() < 1)
|
||||||
delPB->setEnabled(0);
|
delPB->setEnabled(0);
|
||||||
else
|
else
|
||||||
delPB->setEnabled(1);
|
delPB->setEnabled(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -39,7 +39,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
|||||||
$(DEFS)
|
$(DEFS)
|
||||||
|
|
||||||
noinst_PROGRAMS = plaintorich textsplit fstreewalk rclconfig hldata unac mbox \
|
noinst_PROGRAMS = plaintorich textsplit fstreewalk rclconfig hldata unac mbox \
|
||||||
circache wipedir mimetype fileudi x11mon trqrstore ecrontab rcldb
|
circache wipedir mimetype fileudi x11mon trqrstore ecrontab rcldb rclutil
|
||||||
|
|
||||||
ecrontab_SOURCES = trecrontab.cpp
|
ecrontab_SOURCES = trecrontab.cpp
|
||||||
ecrontab_LDADD = ../librecoll.la
|
ecrontab_LDADD = ../librecoll.la
|
||||||
@ -68,6 +68,9 @@ rclconfig_LDADD = ../librecoll.la
|
|||||||
rcldb_SOURCES = trrcldb.cpp
|
rcldb_SOURCES = trrcldb.cpp
|
||||||
rcldb_LDADD = ../librecoll.la
|
rcldb_LDADD = ../librecoll.la
|
||||||
|
|
||||||
|
rclutil_SOURCES = trrclutil.cpp
|
||||||
|
rclutil_LDADD = ../librecoll.la
|
||||||
|
|
||||||
textsplit_SOURCES = trtextsplit.cpp
|
textsplit_SOURCES = trtextsplit.cpp
|
||||||
textsplit_LDADD = ../librecoll.la
|
textsplit_LDADD = ../librecoll.la
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,30 @@
|
|||||||
|
|
||||||
#include "rclutil.h"
|
#include "rclutil.h"
|
||||||
|
|
||||||
|
#include <getopt.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "pathut.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
static std::map<std::string, int> options {
|
||||||
|
{"path_to_thumb", 0},
|
||||||
|
{"url_encode", 0},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char *thisprog;
|
||||||
|
static void Usage(void)
|
||||||
|
{
|
||||||
|
string sopts;
|
||||||
|
for (const auto& opt: options) {
|
||||||
|
sopts += "--" + opt.first + "\n";
|
||||||
|
}
|
||||||
|
fprintf(stderr, "%s: usage: %s\n%s", thisprog, thisprog, sopts.c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
void path_to_thumb(const string& _input)
|
void path_to_thumb(const string& _input)
|
||||||
{
|
{
|
||||||
@ -17,35 +41,52 @@ void path_to_thumb(const string& _input)
|
|||||||
thumbPathForUrl(input, 7, path);
|
thumbPathForUrl(input, 7, path);
|
||||||
cout << path << endl;
|
cout << path << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *thisprog;
|
int main(int argc, char **argv)
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
|
||||||
{
|
{
|
||||||
thisprog = *argv++;
|
thisprog = *argv;
|
||||||
argc--;
|
std::vector<struct option> long_options;
|
||||||
|
|
||||||
string s;
|
for (auto& entry : options) {
|
||||||
vector<string>::const_iterator it;
|
struct option opt;
|
||||||
|
opt.name = entry.first.c_str();
|
||||||
#if 0
|
opt.has_arg = 0;
|
||||||
if (argc > 1) {
|
opt.flag = &entry.second;
|
||||||
cerr << "Usage: thumbpath <filepath>" << endl;
|
opt.val = 1;
|
||||||
exit(1);
|
long_options.push_back(opt);
|
||||||
}
|
}
|
||||||
string input;
|
long_options.push_back({0, 0, 0, 0});
|
||||||
if (argc == 1) {
|
|
||||||
input = *argv++;
|
while (getopt_long(argc, argv, "", &long_options[0], nullptr) != -1) {
|
||||||
if (input.empty()) {
|
}
|
||||||
cerr << "Usage: thumbpath <filepath>" << endl;
|
if (options["path_to_thumb"]) {
|
||||||
exit(1);
|
if (optind >= argc) {
|
||||||
|
cerr << "Usage: trrcutil --path_to_thumb <filepath>" << "\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
string input = argv[optind];
|
||||||
|
optind++;
|
||||||
|
if (optind != argc) {
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
path_to_thumb(input);
|
path_to_thumb(input);
|
||||||
} else {
|
} else if (options["url_encode"]) {
|
||||||
while (getline(cin, input)) {
|
if (optind >= argc) {
|
||||||
path_to_thumb(input);
|
cerr << "Usage: trsmallut --url_encode <arg> [offs=0]\n";
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
string s = argv[optind];
|
||||||
|
optind++;
|
||||||
|
int offs = 0;
|
||||||
|
if (optind != argc) {
|
||||||
|
offs = atoi(argv[optind]);
|
||||||
|
optind++;
|
||||||
|
}
|
||||||
|
if (optind != argc) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
cout << "url_encode(" << s << ", " << offs << ") -> [" << url_encode(s, offs) << "]\n";
|
||||||
|
} else {
|
||||||
|
Usage();
|
||||||
}
|
}
|
||||||
exit(0);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1107,179 +1107,6 @@ bool path_access(const std::string& path, int mode)
|
|||||||
return ACCESS(syspath, mode) == 0;
|
return ACCESS(syspath, mode) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* There is a lot of vagueness about what should be percent-encoded or
|
|
||||||
* not in a file:// url. The constraint that we have is that we may use
|
|
||||||
* the encoded URL to compute (MD5) a thumbnail path according to the
|
|
||||||
* freedesktop.org thumbnail spec, which itself does not define what
|
|
||||||
* should be escaped. We choose to exactly escape what gio does, as
|
|
||||||
* implemented in glib/gconvert.c:g_escape_uri_string(uri, UNSAFE_PATH).
|
|
||||||
* Hopefully, the other desktops have the same set of escaped chars.
|
|
||||||
* Note that $ is not encoded, so the value is not shell-safe.
|
|
||||||
*/
|
|
||||||
string url_encode(const string& url, string::size_type offs)
|
|
||||||
{
|
|
||||||
string out = url.substr(0, offs);
|
|
||||||
const char *cp = url.c_str();
|
|
||||||
for (string::size_type i = offs; i < url.size(); i++) {
|
|
||||||
unsigned int c;
|
|
||||||
const char *h = "0123456789ABCDEF";
|
|
||||||
c = cp[i];
|
|
||||||
if (c <= 0x20 ||
|
|
||||||
c >= 0x7f ||
|
|
||||||
c == '"' ||
|
|
||||||
c == '#' ||
|
|
||||||
c == '%' ||
|
|
||||||
c == ';' ||
|
|
||||||
c == '<' ||
|
|
||||||
c == '>' ||
|
|
||||||
c == '?' ||
|
|
||||||
c == '[' ||
|
|
||||||
c == '\\' ||
|
|
||||||
c == ']' ||
|
|
||||||
c == '^' ||
|
|
||||||
c == '`' ||
|
|
||||||
c == '{' ||
|
|
||||||
c == '|' ||
|
|
||||||
c == '}') {
|
|
||||||
out += '%';
|
|
||||||
out += h[(c >> 4) & 0xf];
|
|
||||||
out += h[c & 0xf];
|
|
||||||
} else {
|
|
||||||
out += char(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int h2d(int c) {
|
|
||||||
if ('0' <= c && c <= '9')
|
|
||||||
return c - '0';
|
|
||||||
else if ('A' <= c && c <= 'F')
|
|
||||||
return 10 + c - 'A';
|
|
||||||
else if ('a' <= c && c <= 'f')
|
|
||||||
return 10 + c - 'a';
|
|
||||||
else
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
string url_decode(const string &in)
|
|
||||||
{
|
|
||||||
if (in.size() <= 2)
|
|
||||||
return in;
|
|
||||||
string out;
|
|
||||||
out.reserve(in.size());
|
|
||||||
const char *cp = in.c_str();
|
|
||||||
string::size_type i = 0;
|
|
||||||
for (; i < in.size() - 2; i++) {
|
|
||||||
if (cp[i] == '%') {
|
|
||||||
int d1 = h2d(cp[i+1]);
|
|
||||||
int d2 = h2d(cp[i+2]);
|
|
||||||
if (d1 != -1 && d2 != -1) {
|
|
||||||
out += (d1 << 4) + d2;
|
|
||||||
} else {
|
|
||||||
out += '%';
|
|
||||||
out += cp[i+1];
|
|
||||||
out += cp[i+2];
|
|
||||||
}
|
|
||||||
i += 2;
|
|
||||||
} else {
|
|
||||||
out += cp[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (i < in.size()) {
|
|
||||||
out += cp[i++];
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
string url_gpath(const string& url)
|
|
||||||
{
|
|
||||||
// Remove the access schema part (or whatever it's called)
|
|
||||||
string::size_type colon = url.find_first_of(":");
|
|
||||||
if (colon == string::npos || colon == url.size() - 1) {
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
// If there are non-alphanum chars before the ':', then there
|
|
||||||
// probably is no scheme. Whatever...
|
|
||||||
for (string::size_type i = 0; i < colon; i++) {
|
|
||||||
if (!isalnum(url.at(i))) {
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// In addition we canonize the path to remove empty host parts
|
|
||||||
// (for compatibility with older versions of recoll where file://
|
|
||||||
// was hardcoded, but the local path was used for doc
|
|
||||||
// identification.
|
|
||||||
return path_canon(url.substr(colon + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
string url_parentfolder(const string& url)
|
|
||||||
{
|
|
||||||
// In general, the parent is the directory above the full path
|
|
||||||
string parenturl = path_getfather(url_gpath(url));
|
|
||||||
// But if this is http, make sure to keep the host part. Recoll
|
|
||||||
// only has file or http urls for now.
|
|
||||||
bool isfileurl = urlisfileurl(url);
|
|
||||||
if (!isfileurl && parenturl == "/") {
|
|
||||||
parenturl = url_gpath(url);
|
|
||||||
}
|
|
||||||
return isfileurl ? string("file://") + parenturl :
|
|
||||||
string("http://") + parenturl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Convert to file path if url is like file:
|
|
||||||
// Note: this only works with our internal pseudo-urls which are not
|
|
||||||
// encoded/escaped
|
|
||||||
string fileurltolocalpath(string url)
|
|
||||||
{
|
|
||||||
if (url.find("file://") == 0) {
|
|
||||||
url = url.substr(7, string::npos);
|
|
||||||
} else {
|
|
||||||
return string();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
// Absolute file urls are like: file:///c:/mydir/...
|
|
||||||
// Get rid of the initial '/'
|
|
||||||
if (url.size() >= 3 && url[0] == '/' && isalpha(url[1]) && url[2] == ':') {
|
|
||||||
url = url.substr(1);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Removing the fragment part. This is exclusively used when
|
|
||||||
// executing a viewer for the recoll manual, and we only strip the
|
|
||||||
// part after # if it is preceded by .html
|
|
||||||
string::size_type pos;
|
|
||||||
if ((pos = url.rfind(".html#")) != string::npos) {
|
|
||||||
url.erase(pos + 5);
|
|
||||||
} else if ((pos = url.rfind(".htm#")) != string::npos) {
|
|
||||||
url.erase(pos + 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const string cstr_fileu("file://");
|
|
||||||
|
|
||||||
string path_pathtofileurl(const string& path)
|
|
||||||
{
|
|
||||||
// We're supposed to receive a canonic absolute path, but on windows we
|
|
||||||
// may need to add a '/' in front of the drive spec
|
|
||||||
string url(cstr_fileu);
|
|
||||||
if (path.empty() || path[0] != '/') {
|
|
||||||
url.push_back('/');
|
|
||||||
}
|
|
||||||
url += path;
|
|
||||||
return url;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool urlisfileurl(const string& url)
|
|
||||||
{
|
|
||||||
return url.find("file://") == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef NO_STD_REGEX
|
#ifndef NO_STD_REGEX
|
||||||
static std::regex
|
static std::regex
|
||||||
re_uriparse("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?",
|
re_uriparse("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?",
|
||||||
|
|||||||
@ -213,22 +213,6 @@ bool path_utimes(const std::string& path, struct path_timeval times[2]);
|
|||||||
* @param mode is an std::fstream mode (ios::in etc.) */
|
* @param mode is an std::fstream mode (ios::in etc.) */
|
||||||
extern bool path_streamopen(const std::string& path, int mode, std::fstream& outstream);
|
extern bool path_streamopen(const std::string& path, int mode, std::fstream& outstream);
|
||||||
|
|
||||||
/// Encode according to rfc 1738
|
|
||||||
extern std::string url_encode(const std::string& url, std::string::size_type offs = 0);
|
|
||||||
extern std::string url_decode(const std::string& encoded);
|
|
||||||
//// Convert to file path if url is like file://. This modifies the
|
|
||||||
//// input (and returns a copy for convenience)
|
|
||||||
extern std::string fileurltolocalpath(std::string url);
|
|
||||||
/// Test for file:/// url
|
|
||||||
extern bool urlisfileurl(const std::string& url);
|
|
||||||
///
|
|
||||||
extern std::string url_parentfolder(const std::string& url);
|
|
||||||
/// Return the host+path part of an url. This is not a general
|
|
||||||
/// routine, it does the right thing only in the recoll context
|
|
||||||
extern std::string url_gpath(const std::string& url);
|
|
||||||
/// Turn absolute path into file:// url
|
|
||||||
extern std::string path_pathtofileurl(const std::string& path);
|
|
||||||
|
|
||||||
/// URI parser, loosely from rfc2396.txt
|
/// URI parser, loosely from rfc2396.txt
|
||||||
class ParsedUri {
|
class ParsedUri {
|
||||||
public:
|
public:
|
||||||
|
|||||||
@ -293,6 +293,175 @@ const string& path_pkgdatadir()
|
|||||||
return datadir;
|
return datadir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* There is a lot of vagueness about what should be percent-encoded or
|
||||||
|
* not in a file:// url. The constraint that we have is that we may use
|
||||||
|
* the encoded URL to compute (MD5) a thumbnail path according to the
|
||||||
|
* freedesktop.org thumbnail spec, which itself does not define what
|
||||||
|
* should be escaped. We choose to exactly escape what gio does, as
|
||||||
|
* implemented in glib/gconvert.c:g_escape_uri_string(uri, UNSAFE_PATH).
|
||||||
|
* Hopefully, the other desktops have the same set of escaped chars.
|
||||||
|
* Note that $ is not encoded, so the value is not shell-safe.
|
||||||
|
*/
|
||||||
|
string url_encode(const string& url, string::size_type offs)
|
||||||
|
{
|
||||||
|
string out = url.substr(0, offs);
|
||||||
|
const char *cp = url.c_str();
|
||||||
|
for (string::size_type i = offs; i < url.size(); i++) {
|
||||||
|
unsigned int c;
|
||||||
|
const char *h = "0123456789ABCDEF";
|
||||||
|
c = cp[i];
|
||||||
|
if (c <= 0x20 ||
|
||||||
|
c >= 0x7f ||
|
||||||
|
c == '"' ||
|
||||||
|
c == '#' ||
|
||||||
|
c == '%' ||
|
||||||
|
c == ';' ||
|
||||||
|
c == '<' ||
|
||||||
|
c == '>' ||
|
||||||
|
c == '?' ||
|
||||||
|
c == '[' ||
|
||||||
|
c == '\\' ||
|
||||||
|
c == ']' ||
|
||||||
|
c == '^' ||
|
||||||
|
c == '`' ||
|
||||||
|
c == '{' ||
|
||||||
|
c == '|' ||
|
||||||
|
c == '}') {
|
||||||
|
out += '%';
|
||||||
|
out += h[(c >> 4) & 0xf];
|
||||||
|
out += h[c & 0xf];
|
||||||
|
} else {
|
||||||
|
out += char(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int h2d(int c) {
|
||||||
|
if ('0' <= c && c <= '9')
|
||||||
|
return c - '0';
|
||||||
|
else if ('A' <= c && c <= 'F')
|
||||||
|
return 10 + c - 'A';
|
||||||
|
else if ('a' <= c && c <= 'f')
|
||||||
|
return 10 + c - 'a';
|
||||||
|
else
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
string url_decode(const string &in)
|
||||||
|
{
|
||||||
|
if (in.size() <= 2)
|
||||||
|
return in;
|
||||||
|
string out;
|
||||||
|
out.reserve(in.size());
|
||||||
|
const char *cp = in.c_str();
|
||||||
|
string::size_type i = 0;
|
||||||
|
for (; i < in.size() - 2; i++) {
|
||||||
|
if (cp[i] == '%') {
|
||||||
|
int d1 = h2d(cp[i+1]);
|
||||||
|
int d2 = h2d(cp[i+2]);
|
||||||
|
if (d1 != -1 && d2 != -1) {
|
||||||
|
out += (d1 << 4) + d2;
|
||||||
|
} else {
|
||||||
|
out += '%';
|
||||||
|
out += cp[i+1];
|
||||||
|
out += cp[i+2];
|
||||||
|
}
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
out += cp[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (i < in.size()) {
|
||||||
|
out += cp[i++];
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
string url_gpath(const string& url)
|
||||||
|
{
|
||||||
|
// Remove the access schema part (or whatever it's called)
|
||||||
|
string::size_type colon = url.find_first_of(":");
|
||||||
|
if (colon == string::npos || colon == url.size() - 1) {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
// If there are non-alphanum chars before the ':', then there
|
||||||
|
// probably is no scheme. Whatever...
|
||||||
|
for (string::size_type i = 0; i < colon; i++) {
|
||||||
|
if (!isalnum(url.at(i))) {
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In addition we canonize the path to remove empty host parts
|
||||||
|
// (for compatibility with older versions of recoll where file://
|
||||||
|
// was hardcoded, but the local path was used for doc
|
||||||
|
// identification.
|
||||||
|
return path_canon(url.substr(colon + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
string url_parentfolder(const string& url)
|
||||||
|
{
|
||||||
|
// In general, the parent is the directory above the full path
|
||||||
|
string parenturl = path_getfather(url_gpath(url));
|
||||||
|
// But if this is http, make sure to keep the host part. Recoll
|
||||||
|
// only has file or http urls for now.
|
||||||
|
bool isfileurl = urlisfileurl(url);
|
||||||
|
if (!isfileurl && parenturl == "/") {
|
||||||
|
parenturl = url_gpath(url);
|
||||||
|
}
|
||||||
|
return isfileurl ? string("file://") + parenturl :
|
||||||
|
string("http://") + parenturl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Convert to file path if url is like file:
|
||||||
|
// Note: this only works with our internal pseudo-urls which are not
|
||||||
|
// encoded/escaped
|
||||||
|
string fileurltolocalpath(string url)
|
||||||
|
{
|
||||||
|
if (url.find("file://") == 0) {
|
||||||
|
url = url.substr(7, string::npos);
|
||||||
|
} else {
|
||||||
|
return string();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this looks like a Windows path: absolute file urls are like: file:///c:/mydir/...
|
||||||
|
// Get rid of the initial '/'
|
||||||
|
if (url.size() >= 3 && url[0] == '/' && isalpha(url[1]) && url[2] == ':') {
|
||||||
|
url = url.substr(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Removing the fragment part. This is exclusively used when
|
||||||
|
// executing a viewer for the recoll manual, and we only strip the
|
||||||
|
// part after # if it is preceded by .html
|
||||||
|
string::size_type pos;
|
||||||
|
if ((pos = url.rfind(".html#")) != string::npos) {
|
||||||
|
url.erase(pos + 5);
|
||||||
|
} else if ((pos = url.rfind(".htm#")) != string::npos) {
|
||||||
|
url.erase(pos + 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
string path_pathtofileurl(const string& path)
|
||||||
|
{
|
||||||
|
// We're supposed to receive a canonic absolute path, but on windows we
|
||||||
|
// may need to add a '/' in front of the drive spec
|
||||||
|
string url(cstr_fileu);
|
||||||
|
if (path.empty() || path[0] != '/') {
|
||||||
|
url.push_back('/');
|
||||||
|
}
|
||||||
|
url += path;
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool urlisfileurl(const string& url)
|
||||||
|
{
|
||||||
|
return url.find("file://") == 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Printable url: this is used to transcode from the system charset
|
// Printable url: this is used to transcode from the system charset
|
||||||
// into either utf-8 if transcoding succeeds, or url-encoded
|
// into either utf-8 if transcoding succeeds, or url-encoded
|
||||||
bool printableUrl(const string& fcharset, const string& in, string& out)
|
bool printableUrl(const string& fcharset, const string& in, string& out)
|
||||||
|
|||||||
@ -43,6 +43,22 @@ extern const std::string& path_pkgdatadir();
|
|||||||
extern std::string path_thisexecpath();
|
extern std::string path_thisexecpath();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/// Encode according to rfc 1738
|
||||||
|
extern std::string url_encode(const std::string& url, std::string::size_type offs = 0);
|
||||||
|
extern std::string url_decode(const std::string& encoded);
|
||||||
|
//// Convert to file path if url is like file://. This modifies the
|
||||||
|
//// input (and returns a copy for convenience)
|
||||||
|
extern std::string fileurltolocalpath(std::string url);
|
||||||
|
/// Test for file:/// url
|
||||||
|
extern bool urlisfileurl(const std::string& url);
|
||||||
|
///
|
||||||
|
extern std::string url_parentfolder(const std::string& url);
|
||||||
|
/// Return the host+path part of an url. This is not a general
|
||||||
|
/// routine, it does the right thing only in the recoll context
|
||||||
|
extern std::string url_gpath(const std::string& url);
|
||||||
|
/// Turn absolute path into file:// url
|
||||||
|
extern std::string path_pathtofileurl(const std::string& path);
|
||||||
|
|
||||||
/// Transcode to utf-8 if possible or url encoding, for display.
|
/// Transcode to utf-8 if possible or url encoding, for display.
|
||||||
extern bool printableUrl(const std::string& fcharset,
|
extern bool printableUrl(const std::string& fcharset,
|
||||||
const std::string& in, std::string& out);
|
const std::string& in, std::string& out);
|
||||||
|
|||||||
@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "pathut.h"
|
#include "rclutil.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user