index: escape colon characters inside ipaths. This could potentially happen with the zip (ie: zipped maildir) and chm filters

This commit is contained in:
Jean-Francois Dockes 2011-03-12 12:03:39 +01:00
parent 9b5ff141c0
commit 7eb182f53c
5 changed files with 37 additions and 2 deletions

View File

@ -53,10 +53,31 @@ using namespace std;
#include "pxattr.h"
#endif // RCL_USE_XATTR
static const string stxtplain("text/plain");
// The internal path element separator. This can't be the same as the rcldb
// file to ipath separator : "|"
// We replace it with a control char if it comes out of a filter (ie:
// rclzip or rclchm can do this). If you want the SOH control char
// inside an ipath, you're out of luck (and a bit weird).
static const string isep(":");
static const string stxtplain("text/plain");
static const char colon_repl = '\x01';
static string colon_hide(const string& in)
{
string out;
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
out += *it == ':' ? colon_repl : *it;
}
return out;
}
static string colon_restore(const string& in)
{
string out;
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
out += *it == colon_repl ? ':' : *it;
}
return out;
}
set<string> FileInterner::o_missingExternal;
map<string, set<string> > FileInterner::o_typesForMissing;
@ -603,7 +624,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
getKeyValue(docdata, keymt, doc.mimetype);
getKeyValue(docdata, keyfn, doc.utf8fn);
}
ipath += ipathel + isep;
ipath += colon_hide(ipathel) + isep;
} else {
ipath += isep;
}
@ -750,6 +771,10 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
if (!ipath.empty()) {
vector<string> lipath;
stringToTokens(ipath, lipath, isep, true);
for (vector<string>::iterator it = lipath.begin();
it != lipath.end(); it++) {
*it = colon_restore(*it);
}
vipath.insert(vipath.begin(), lipath.begin(), lipath.end());
if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
LOGERR(("FileInterner::internfile: can't skip\n"));

View File

@ -167,6 +167,8 @@ bool MimeHandlerExecMultiple::next_document()
obuf << "Filename: " << 0 << "\n";
}
if (m_ipath.length()) {
LOGDEB(("next_doc: sending len %d val [%s]\n", m_ipath.length(),
m_ipath.c_str()));
obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
}
if (!m_dfltInputCharset.empty()) {

View File

@ -12,6 +12,7 @@ initvariables $0
recollq '"Dear Corporate Administrator"'
recollq TestTbirdWithoutEmptyLine
recollq TestTbirdWithEmptyLine
recollq ZIPPEDMAILDIR_UNIQUEXXX
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1

View File

@ -11,3 +11,5 @@ message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/badMail.edi
message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/thunderbird/Sent] [Pronote: salut les genies.] 2565 bytes
1 results
message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/thunderbird/Sent] [De mieux en mieux] 2565 bytes
1 results
message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/maildir.zip] [[FreeBSD-Announce] ZIPPEDMAILDIR_UNIQUEXXX FreeBSD Security Advisory FreeBSD-SA-04:17.procfs] 59171 bytes

View File

@ -37,6 +37,11 @@
<h2><a name="b_latest">recoll 1.15</a></h2>
<ul>
<li>The rclzip filter can't handle archive members with a colon
(':') in the file name or path. The files are normally indexed and
can be searched for, but they can't be displayed (neither opened
nor previewed).</li>
<li>After an upgrade, the recoll GUI sometimes crashes on
startup. This is fixed by removing (back it up just in case)
~/.config/Recoll.org/recoll.conf, the QSettings storage for