diff --git a/src/qtgui/ptrans_w.cpp b/src/qtgui/ptrans_w.cpp index 8fa27111..b1feba6d 100644 --- a/src/qtgui/ptrans_w.cpp +++ b/src/qtgui/ptrans_w.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2006 J.F.Dockes +/* Copyright (C) 2006-2022 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -44,7 +44,7 @@ void EditTrans::init(const string& dbdir) { m_dbdir = path_canon(dbdir); connect(transTW, SIGNAL(itemDoubleClicked(QTableWidgetItem *)), - this, SLOT(onItemDoubleClicked(QTableWidgetItem *))); + this, SLOT(onItemDoubleClicked(QTableWidgetItem *))); connect(cancelPB, SIGNAL(clicked()), this, SLOT(close())); QString lab = whatIdxLA->text(); @@ -57,18 +57,17 @@ void EditTrans::init(const string& dbdir) ConfSimple *conftrans = theconfig->getPTrans(); if (!conftrans) - return; + return; int row = 0; vector opaths = conftrans->getNames(m_dbdir); - for (vector::const_iterator it = opaths.begin(); - it != opaths.end(); it++) { - transTW->setRowCount(row+1); - transTW->setItem(row, 0, new QTableWidgetItem(path2qs(*it))); - string npath; - conftrans->get(*it, npath, m_dbdir); - transTW->setItem(row, 1, new QTableWidgetItem(path2qs(npath))); - row++; + for (const auto& opath : opaths) { + transTW->setRowCount(row+1); + transTW->setItem(row, 0, new QTableWidgetItem(path2qs(opath))); + string npath; + conftrans->get(opath, npath, m_dbdir); + transTW->setItem(row, 1, new QTableWidgetItem(path2qs(npath))); + row++; } resize(QSize(640, 300).expandedTo(minimumSizeHint())); @@ -83,18 +82,18 @@ void EditTrans::on_savePB_clicked() { ConfSimple *conftrans = theconfig->getPTrans(); if (!conftrans) { - QMessageBox::warning(0, "Recoll", tr("Config error")); - return; + QMessageBox::warning(0, "Recoll", tr("Config error")); + return; } conftrans->holdWrites(true); conftrans->eraseKey(m_dbdir); for (int row = 0; row < transTW->rowCount(); row++) { - QTableWidgetItem *item0 = transTW->item(row, 0); - string from = path_canon(qs2path(item0->text())); - QTableWidgetItem *item1 = transTW->item(row, 1); - string to = path_canon(qs2path(item1->text())); - conftrans->set(from, to, m_dbdir); + QTableWidgetItem *item0 = transTW->item(row, 0); + string from = qs2path(item0->text()); + QTableWidgetItem *item1 = transTW->item(row, 1); + string to = qs2path(item1->text()); + conftrans->set(from, to, m_dbdir); } conftrans->holdWrites(false); // The rcldb does not use the same configuration object, but a @@ -118,12 +117,12 @@ void EditTrans::on_delPB_clicked() QModelIndexList indexes = transTW->selectionModel()->selectedIndexes(); vector rows; for (int i = 0; i < indexes.size(); i++) { - rows.push_back(indexes.at(i).row()); + rows.push_back(indexes.at(i).row()); } sort(rows.begin(), rows.end()); rows.resize(unique(rows.begin(), rows.end()) - rows.begin()); for (int i = rows.size()-1; i >= 0; i--) { - transTW->removeRow(rows[i]); + transTW->removeRow(rows[i]); } } @@ -131,8 +130,8 @@ void EditTrans::on_transTW_itemSelectionChanged() { QModelIndexList indexes = transTW->selectionModel()->selectedIndexes(); if(indexes.size() < 1) - delPB->setEnabled(0); + delPB->setEnabled(0); else - delPB->setEnabled(1); + delPB->setEnabled(1); } diff --git a/src/testmains/Makefile.am b/src/testmains/Makefile.am index 74a6185b..74f8dee8 100644 --- a/src/testmains/Makefile.am +++ b/src/testmains/Makefile.am @@ -39,7 +39,7 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \ $(DEFS) noinst_PROGRAMS = plaintorich textsplit fstreewalk rclconfig hldata unac mbox \ - circache wipedir mimetype fileudi x11mon trqrstore ecrontab rcldb + circache wipedir mimetype fileudi x11mon trqrstore ecrontab rcldb rclutil ecrontab_SOURCES = trecrontab.cpp ecrontab_LDADD = ../librecoll.la @@ -68,6 +68,9 @@ rclconfig_LDADD = ../librecoll.la rcldb_SOURCES = trrcldb.cpp rcldb_LDADD = ../librecoll.la +rclutil_SOURCES = trrclutil.cpp +rclutil_LDADD = ../librecoll.la + textsplit_SOURCES = trtextsplit.cpp textsplit_LDADD = ../librecoll.la diff --git a/src/testmains/trrclutil.cpp b/src/testmains/trrclutil.cpp index 6a180236..bd45ec4a 100644 --- a/src/testmains/trrclutil.cpp +++ b/src/testmains/trrclutil.cpp @@ -1,6 +1,30 @@ #include "rclutil.h" +#include + +#include +#include + +#include "pathut.h" + +using namespace std; + +static std::map options { + {"path_to_thumb", 0}, + {"url_encode", 0}, + }; + +static const char *thisprog; +static void Usage(void) +{ + string sopts; + for (const auto& opt: options) { + sopts += "--" + opt.first + "\n"; + } + fprintf(stderr, "%s: usage: %s\n%s", thisprog, thisprog, sopts.c_str()); + exit(1); +} void path_to_thumb(const string& _input) { @@ -17,35 +41,52 @@ void path_to_thumb(const string& _input) thumbPathForUrl(input, 7, path); cout << path << endl; } - -const char *thisprog; - -int main(int argc, const char **argv) + +int main(int argc, char **argv) { - thisprog = *argv++; - argc--; + thisprog = *argv; + std::vector long_options; - string s; - vector::const_iterator it; - -#if 0 - if (argc > 1) { - cerr << "Usage: thumbpath " << endl; - exit(1); + for (auto& entry : options) { + struct option opt; + opt.name = entry.first.c_str(); + opt.has_arg = 0; + opt.flag = &entry.second; + opt.val = 1; + long_options.push_back(opt); } - string input; - if (argc == 1) { - input = *argv++; - if (input.empty()) { - cerr << "Usage: thumbpath " << endl; - exit(1); + long_options.push_back({0, 0, 0, 0}); + + while (getopt_long(argc, argv, "", &long_options[0], nullptr) != -1) { + } + if (options["path_to_thumb"]) { + if (optind >= argc) { + cerr << "Usage: trrcutil --path_to_thumb " << "\n"; + return 1; + } + string input = argv[optind]; + optind++; + if (optind != argc) { + return 1; } path_to_thumb(input); - } else { - while (getline(cin, input)) { - path_to_thumb(input); + } else if (options["url_encode"]) { + if (optind >= argc) { + cerr << "Usage: trsmallut --url_encode [offs=0]\n"; + return 1; } + string s = argv[optind]; + optind++; + int offs = 0; + if (optind != argc) { + offs = atoi(argv[optind]); + optind++; + } + if (optind != argc) { + return 1; + } + cout << "url_encode(" << s << ", " << offs << ") -> [" << url_encode(s, offs) << "]\n"; + } else { + Usage(); } - exit(0); -#endif } diff --git a/src/utils/pathut.cpp b/src/utils/pathut.cpp index 9874c92b..689fa307 100644 --- a/src/utils/pathut.cpp +++ b/src/utils/pathut.cpp @@ -1107,179 +1107,6 @@ bool path_access(const std::string& path, int mode) return ACCESS(syspath, mode) == 0; } -/* There is a lot of vagueness about what should be percent-encoded or - * not in a file:// url. The constraint that we have is that we may use - * the encoded URL to compute (MD5) a thumbnail path according to the - * freedesktop.org thumbnail spec, which itself does not define what - * should be escaped. We choose to exactly escape what gio does, as - * implemented in glib/gconvert.c:g_escape_uri_string(uri, UNSAFE_PATH). - * Hopefully, the other desktops have the same set of escaped chars. - * Note that $ is not encoded, so the value is not shell-safe. - */ -string url_encode(const string& url, string::size_type offs) -{ - string out = url.substr(0, offs); - const char *cp = url.c_str(); - for (string::size_type i = offs; i < url.size(); i++) { - unsigned int c; - const char *h = "0123456789ABCDEF"; - c = cp[i]; - if (c <= 0x20 || - c >= 0x7f || - c == '"' || - c == '#' || - c == '%' || - c == ';' || - c == '<' || - c == '>' || - c == '?' || - c == '[' || - c == '\\' || - c == ']' || - c == '^' || - c == '`' || - c == '{' || - c == '|' || - c == '}') { - out += '%'; - out += h[(c >> 4) & 0xf]; - out += h[c & 0xf]; - } else { - out += char(c); - } - } - return out; -} - -static inline int h2d(int c) { - if ('0' <= c && c <= '9') - return c - '0'; - else if ('A' <= c && c <= 'F') - return 10 + c - 'A'; - else if ('a' <= c && c <= 'f') - return 10 + c - 'a'; - else - return -1; -} - -string url_decode(const string &in) -{ - if (in.size() <= 2) - return in; - string out; - out.reserve(in.size()); - const char *cp = in.c_str(); - string::size_type i = 0; - for (; i < in.size() - 2; i++) { - if (cp[i] == '%') { - int d1 = h2d(cp[i+1]); - int d2 = h2d(cp[i+2]); - if (d1 != -1 && d2 != -1) { - out += (d1 << 4) + d2; - } else { - out += '%'; - out += cp[i+1]; - out += cp[i+2]; - } - i += 2; - } else { - out += cp[i]; - } - } - while (i < in.size()) { - out += cp[i++]; - } - return out; -} - -string url_gpath(const string& url) -{ - // Remove the access schema part (or whatever it's called) - string::size_type colon = url.find_first_of(":"); - if (colon == string::npos || colon == url.size() - 1) { - return url; - } - // If there are non-alphanum chars before the ':', then there - // probably is no scheme. Whatever... - for (string::size_type i = 0; i < colon; i++) { - if (!isalnum(url.at(i))) { - return url; - } - } - - // In addition we canonize the path to remove empty host parts - // (for compatibility with older versions of recoll where file:// - // was hardcoded, but the local path was used for doc - // identification. - return path_canon(url.substr(colon + 1)); -} - -string url_parentfolder(const string& url) -{ - // In general, the parent is the directory above the full path - string parenturl = path_getfather(url_gpath(url)); - // But if this is http, make sure to keep the host part. Recoll - // only has file or http urls for now. - bool isfileurl = urlisfileurl(url); - if (!isfileurl && parenturl == "/") { - parenturl = url_gpath(url); - } - return isfileurl ? string("file://") + parenturl : - string("http://") + parenturl; -} - - -// Convert to file path if url is like file: -// Note: this only works with our internal pseudo-urls which are not -// encoded/escaped -string fileurltolocalpath(string url) -{ - if (url.find("file://") == 0) { - url = url.substr(7, string::npos); - } else { - return string(); - } - -#ifdef _WIN32 - // Absolute file urls are like: file:///c:/mydir/... - // Get rid of the initial '/' - if (url.size() >= 3 && url[0] == '/' && isalpha(url[1]) && url[2] == ':') { - url = url.substr(1); - } -#endif - - // Removing the fragment part. This is exclusively used when - // executing a viewer for the recoll manual, and we only strip the - // part after # if it is preceded by .html - string::size_type pos; - if ((pos = url.rfind(".html#")) != string::npos) { - url.erase(pos + 5); - } else if ((pos = url.rfind(".htm#")) != string::npos) { - url.erase(pos + 4); - } - - return url; -} - -static const string cstr_fileu("file://"); - -string path_pathtofileurl(const string& path) -{ - // We're supposed to receive a canonic absolute path, but on windows we - // may need to add a '/' in front of the drive spec - string url(cstr_fileu); - if (path.empty() || path[0] != '/') { - url.push_back('/'); - } - url += path; - return url; -} - -bool urlisfileurl(const string& url) -{ - return url.find("file://") == 0; -} - #ifndef NO_STD_REGEX static std::regex re_uriparse("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?", diff --git a/src/utils/pathut.h b/src/utils/pathut.h index 17cf3172..0ac5a67a 100644 --- a/src/utils/pathut.h +++ b/src/utils/pathut.h @@ -213,22 +213,6 @@ bool path_utimes(const std::string& path, struct path_timeval times[2]); * @param mode is an std::fstream mode (ios::in etc.) */ extern bool path_streamopen(const std::string& path, int mode, std::fstream& outstream); -/// Encode according to rfc 1738 -extern std::string url_encode(const std::string& url, std::string::size_type offs = 0); -extern std::string url_decode(const std::string& encoded); -//// Convert to file path if url is like file://. This modifies the -//// input (and returns a copy for convenience) -extern std::string fileurltolocalpath(std::string url); -/// Test for file:/// url -extern bool urlisfileurl(const std::string& url); -/// -extern std::string url_parentfolder(const std::string& url); -/// Return the host+path part of an url. This is not a general -/// routine, it does the right thing only in the recoll context -extern std::string url_gpath(const std::string& url); -/// Turn absolute path into file:// url -extern std::string path_pathtofileurl(const std::string& path); - /// URI parser, loosely from rfc2396.txt class ParsedUri { public: diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp index 07e161f6..60ff5fb3 100644 --- a/src/utils/rclutil.cpp +++ b/src/utils/rclutil.cpp @@ -293,6 +293,175 @@ const string& path_pkgdatadir() return datadir; } +/* There is a lot of vagueness about what should be percent-encoded or + * not in a file:// url. The constraint that we have is that we may use + * the encoded URL to compute (MD5) a thumbnail path according to the + * freedesktop.org thumbnail spec, which itself does not define what + * should be escaped. We choose to exactly escape what gio does, as + * implemented in glib/gconvert.c:g_escape_uri_string(uri, UNSAFE_PATH). + * Hopefully, the other desktops have the same set of escaped chars. + * Note that $ is not encoded, so the value is not shell-safe. + */ +string url_encode(const string& url, string::size_type offs) +{ + string out = url.substr(0, offs); + const char *cp = url.c_str(); + for (string::size_type i = offs; i < url.size(); i++) { + unsigned int c; + const char *h = "0123456789ABCDEF"; + c = cp[i]; + if (c <= 0x20 || + c >= 0x7f || + c == '"' || + c == '#' || + c == '%' || + c == ';' || + c == '<' || + c == '>' || + c == '?' || + c == '[' || + c == '\\' || + c == ']' || + c == '^' || + c == '`' || + c == '{' || + c == '|' || + c == '}') { + out += '%'; + out += h[(c >> 4) & 0xf]; + out += h[c & 0xf]; + } else { + out += char(c); + } + } + return out; +} + +static inline int h2d(int c) { + if ('0' <= c && c <= '9') + return c - '0'; + else if ('A' <= c && c <= 'F') + return 10 + c - 'A'; + else if ('a' <= c && c <= 'f') + return 10 + c - 'a'; + else + return -1; +} + +string url_decode(const string &in) +{ + if (in.size() <= 2) + return in; + string out; + out.reserve(in.size()); + const char *cp = in.c_str(); + string::size_type i = 0; + for (; i < in.size() - 2; i++) { + if (cp[i] == '%') { + int d1 = h2d(cp[i+1]); + int d2 = h2d(cp[i+2]); + if (d1 != -1 && d2 != -1) { + out += (d1 << 4) + d2; + } else { + out += '%'; + out += cp[i+1]; + out += cp[i+2]; + } + i += 2; + } else { + out += cp[i]; + } + } + while (i < in.size()) { + out += cp[i++]; + } + return out; +} + +string url_gpath(const string& url) +{ + // Remove the access schema part (or whatever it's called) + string::size_type colon = url.find_first_of(":"); + if (colon == string::npos || colon == url.size() - 1) { + return url; + } + // If there are non-alphanum chars before the ':', then there + // probably is no scheme. Whatever... + for (string::size_type i = 0; i < colon; i++) { + if (!isalnum(url.at(i))) { + return url; + } + } + + // In addition we canonize the path to remove empty host parts + // (for compatibility with older versions of recoll where file:// + // was hardcoded, but the local path was used for doc + // identification. + return path_canon(url.substr(colon + 1)); +} + +string url_parentfolder(const string& url) +{ + // In general, the parent is the directory above the full path + string parenturl = path_getfather(url_gpath(url)); + // But if this is http, make sure to keep the host part. Recoll + // only has file or http urls for now. + bool isfileurl = urlisfileurl(url); + if (!isfileurl && parenturl == "/") { + parenturl = url_gpath(url); + } + return isfileurl ? string("file://") + parenturl : + string("http://") + parenturl; +} + + +// Convert to file path if url is like file: +// Note: this only works with our internal pseudo-urls which are not +// encoded/escaped +string fileurltolocalpath(string url) +{ + if (url.find("file://") == 0) { + url = url.substr(7, string::npos); + } else { + return string(); + } + + // If this looks like a Windows path: absolute file urls are like: file:///c:/mydir/... + // Get rid of the initial '/' + if (url.size() >= 3 && url[0] == '/' && isalpha(url[1]) && url[2] == ':') { + url = url.substr(1); + } + + // Removing the fragment part. This is exclusively used when + // executing a viewer for the recoll manual, and we only strip the + // part after # if it is preceded by .html + string::size_type pos; + if ((pos = url.rfind(".html#")) != string::npos) { + url.erase(pos + 5); + } else if ((pos = url.rfind(".htm#")) != string::npos) { + url.erase(pos + 4); + } + + return url; +} + +string path_pathtofileurl(const string& path) +{ + // We're supposed to receive a canonic absolute path, but on windows we + // may need to add a '/' in front of the drive spec + string url(cstr_fileu); + if (path.empty() || path[0] != '/') { + url.push_back('/'); + } + url += path; + return url; +} + +bool urlisfileurl(const string& url) +{ + return url.find("file://") == 0; +} + // Printable url: this is used to transcode from the system charset // into either utf-8 if transcoding succeeds, or url-encoded bool printableUrl(const string& fcharset, const string& in, string& out) diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h index 38b4d9d2..e92adc21 100644 --- a/src/utils/rclutil.h +++ b/src/utils/rclutil.h @@ -43,6 +43,22 @@ extern const std::string& path_pkgdatadir(); extern std::string path_thisexecpath(); #endif +/// Encode according to rfc 1738 +extern std::string url_encode(const std::string& url, std::string::size_type offs = 0); +extern std::string url_decode(const std::string& encoded); +//// Convert to file path if url is like file://. This modifies the +//// input (and returns a copy for convenience) +extern std::string fileurltolocalpath(std::string url); +/// Test for file:/// url +extern bool urlisfileurl(const std::string& url); +/// +extern std::string url_parentfolder(const std::string& url); +/// Return the host+path part of an url. This is not a general +/// routine, it does the right thing only in the recoll context +extern std::string url_gpath(const std::string& url); +/// Turn absolute path into file:// url +extern std::string path_pathtofileurl(const std::string& path); + /// Transcode to utf-8 if possible or url encoding, for display. extern bool printableUrl(const std::string& fcharset, const std::string& in, std::string& out); diff --git a/src/utils/strmatcher.cpp b/src/utils/strmatcher.cpp index 1da0edf7..8f2eafcc 100644 --- a/src/utils/strmatcher.cpp +++ b/src/utils/strmatcher.cpp @@ -26,7 +26,7 @@ #include "cstr.h" #include "log.h" -#include "pathut.h" +#include "rclutil.h" using namespace std;