index: escape colon characters inside ipaths. This could potentially happen with the zip (ie: zipped maildir) and chm filters

This commit is contained in:
Jean-Francois Dockes 2011-03-12 12:03:39 +01:00
parent 9b5ff141c0
commit 7eb182f53c
5 changed files with 37 additions and 2 deletions

View File

@ -53,10 +53,31 @@ using namespace std;
#include "pxattr.h" #include "pxattr.h"
#endif // RCL_USE_XATTR #endif // RCL_USE_XATTR
static const string stxtplain("text/plain");
// The internal path element separator. This can't be the same as the rcldb // The internal path element separator. This can't be the same as the rcldb
// file to ipath separator : "|" // file to ipath separator : "|"
// We replace it with a control char if it comes out of a filter (ie:
// rclzip or rclchm can do this). If you want the SOH control char
// inside an ipath, you're out of luck (and a bit weird).
static const string isep(":"); static const string isep(":");
static const string stxtplain("text/plain"); static const char colon_repl = '\x01';
static string colon_hide(const string& in)
{
string out;
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
out += *it == ':' ? colon_repl : *it;
}
return out;
}
static string colon_restore(const string& in)
{
string out;
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
out += *it == colon_repl ? ':' : *it;
}
return out;
}
set<string> FileInterner::o_missingExternal; set<string> FileInterner::o_missingExternal;
map<string, set<string> > FileInterner::o_typesForMissing; map<string, set<string> > FileInterner::o_typesForMissing;
@ -603,7 +624,7 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
getKeyValue(docdata, keymt, doc.mimetype); getKeyValue(docdata, keymt, doc.mimetype);
getKeyValue(docdata, keyfn, doc.utf8fn); getKeyValue(docdata, keyfn, doc.utf8fn);
} }
ipath += ipathel + isep; ipath += colon_hide(ipathel) + isep;
} else { } else {
ipath += isep; ipath += isep;
} }
@ -750,6 +771,10 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
if (!ipath.empty()) { if (!ipath.empty()) {
vector<string> lipath; vector<string> lipath;
stringToTokens(ipath, lipath, isep, true); stringToTokens(ipath, lipath, isep, true);
for (vector<string>::iterator it = lipath.begin();
it != lipath.end(); it++) {
*it = colon_restore(*it);
}
vipath.insert(vipath.begin(), lipath.begin(), lipath.end()); vipath.insert(vipath.begin(), lipath.begin(), lipath.end());
if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){ if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
LOGERR(("FileInterner::internfile: can't skip\n")); LOGERR(("FileInterner::internfile: can't skip\n"));

View File

@ -167,6 +167,8 @@ bool MimeHandlerExecMultiple::next_document()
obuf << "Filename: " << 0 << "\n"; obuf << "Filename: " << 0 << "\n";
} }
if (m_ipath.length()) { if (m_ipath.length()) {
LOGDEB(("next_doc: sending len %d val [%s]\n", m_ipath.length(),
m_ipath.c_str()));
obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath; obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
} }
if (!m_dfltInputCharset.empty()) { if (!m_dfltInputCharset.empty()) {

View File

@ -12,6 +12,7 @@ initvariables $0
recollq '"Dear Corporate Administrator"' recollq '"Dear Corporate Administrator"'
recollq TestTbirdWithoutEmptyLine recollq TestTbirdWithoutEmptyLine
recollq TestTbirdWithEmptyLine recollq TestTbirdWithEmptyLine
recollq ZIPPEDMAILDIR_UNIQUEXXX
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout ) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1 diff -w ${myname}.txt $mystdout > $mydiffs 2>&1

View File

@ -11,3 +11,5 @@ message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/badMail.edi
message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/thunderbird/Sent] [Pronote: salut les genies.] 2565 bytes message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/thunderbird/Sent] [Pronote: salut les genies.] 2565 bytes
1 results 1 results
message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/thunderbird/Sent] [De mieux en mieux] 2565 bytes message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/thunderbird/Sent] [De mieux en mieux] 2565 bytes
1 results
message/rfc822 [file:///home/dockes/projets/fulltext/testrecoll/mail/maildir.zip] [[FreeBSD-Announce] ZIPPEDMAILDIR_UNIQUEXXX FreeBSD Security Advisory FreeBSD-SA-04:17.procfs] 59171 bytes

View File

@ -37,6 +37,11 @@
<h2><a name="b_latest">recoll 1.15</a></h2> <h2><a name="b_latest">recoll 1.15</a></h2>
<ul> <ul>
<li>The rclzip filter can't handle archive members with a colon
(':') in the file name or path. The files are normally indexed and
can be searched for, but they can't be displayed (neither opened
nor previewed).</li>
<li>After an upgrade, the recoll GUI sometimes crashes on <li>After an upgrade, the recoll GUI sometimes crashes on
startup. This is fixed by removing (back it up just in case) startup. This is fixed by removing (back it up just in case)
~/.config/Recoll.org/recoll.conf, the QSettings storage for ~/.config/Recoll.org/recoll.conf, the QSettings storage for