indexing: an error on an archive member could crash or block the indexing because of the unclean way the ipath was passed in/out of internfile(). Closes issue #55

This commit is contained in:
Jean-Francois Dockes 2011-04-25 16:41:43 +02:00
parent 0039ba9ecf
commit f4c1c3678d
6 changed files with 44 additions and 32 deletions

View File

@ -232,10 +232,9 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
FileInterner interner(data, m_config, m_tmpdir,
FileInterner::FIF_doUseInputMimetype,
dotdoc.mimetype);
string ipath;
FileInterner::Status fis;
try {
fis = interner.internfile(doc, ipath);
fis = interner.internfile(doc);
} catch (CancelExcept) {
LOGERR(("BeagleQueueIndexer: interrupted\n"));
return false;
@ -437,10 +436,9 @@ BeagleQueueIndexer::processone(const string &path,
FileInterner interner(path, stp, m_config, m_tmpdir,
FileInterner::FIF_doUseInputMimetype,
&dotdoc.mimetype);
string ipath;
FileInterner::Status fis;
try {
fis = interner.internfile(doc, ipath);
fis = interner.internfile(doc);
} catch (CancelExcept) {
LOGERR(("BeagleQueueIndexer: interrupted\n"));
goto out;

View File

@ -391,9 +391,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
bool hadNullIpath = false;
while (fis == FileInterner::FIAgain) {
doc.erase();
string ipath;
try {
fis = interner.internfile(doc, ipath);
fis = interner.internfile(doc);
} catch (CancelExcept) {
LOGERR(("fsIndexer::processone: interrupted\n"));
return FsTreeWalker::FtwStop;
@ -404,10 +403,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
// be retried every time.
// Internal access path for multi-document files
if (ipath.empty())
if (doc.ipath.empty())
hadNullIpath = true;
else
doc.ipath = ipath;
// Set file name, mod time and url if not done by filter
if (doc.fmtime.empty())
@ -441,8 +438,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
// Add document to database. If there is an ipath, add it as a children
// of the file document.
string udi;
make_udi(fn, ipath, udi);
if (!m_db->addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc))
make_udi(fn, doc.ipath, udi);
if (!m_db->addOrUpdate(udi, doc.ipath.empty() ? "" : parent_udi, doc))
return FsTreeWalker::FtwError;
// Tell what we are doing and check for interrupt request
@ -451,8 +448,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
if (m_updater->status.dbtotdocs < m_updater->status.docsdone)
m_updater->status.dbtotdocs = m_updater->status.docsdone;
m_updater->status.fn = fn;
if (!ipath.empty())
m_updater->status.fn += "|" + ipath;
if (!doc.ipath.empty())
m_updater->status.fn += "|" + doc.ipath;
if (!m_updater->update()) {
return FsTreeWalker::FtwStop;
}

View File

@ -116,7 +116,8 @@ bool FileInterner::getEnclosing(const string &url, const string &ipath,
eurl = url;
eipath = ipath;
string::size_type colon;
LOGDEB(("FileInterner::getEnclosing(): [%s]\n", eipath.c_str()));
LOGDEB(("FileInterner::getEnclosing(): url [%s] ipath [%s]\n",
url.c_str(), eipath.c_str()));
if (eipath.empty())
return false;
if ((colon = eipath.find_last_of(isep)) != string::npos) {
@ -599,6 +600,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
// which has them.
void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
{
LOGDEB2(("FileInterner::collectIpathAndMT\n"));
bool hasipath = false;
#ifdef RCL_USE_XATTR
@ -752,7 +754,7 @@ void FileInterner::processNextDocError(Rcl::Doc &doc, string& ipath)
ipath.c_str(), doc.mimetype.c_str(), m_reason.c_str()));
}
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath)
{
LOGDEB(("FileInterner::internfile. ipath [%s]\n", ipath.c_str()));
if (m_handlers.size() < 1) {
@ -816,7 +818,8 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
// might be ie an error while decoding an attachment, but we
// still want to process the rest of the mbox! For preview: fatal.
if (!m_handlers.back()->next_document()) {
processNextDocError(doc, ipath);
string oipath;
processNextDocError(doc, oipath);
if (m_forPreview) {
m_reason += "Requested document does not exist. ";
m_reason += m_handlers.back()->get_error();
@ -831,16 +834,21 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
// handler to stack.
switch (addHandler()) {
case ADD_OK: // Just go through: handler has been stacked, use it
LOGDEB2(("addHandler returned OK\n"));
break;
case ADD_CONTINUE:
// forget this doc and retrieve next from current handler
// (ipath stays same)
LOGDEB2(("addHandler returned CONTINUE\n"));
continue;
case ADD_BREAK:
// Stop looping: doc type ok, need complete its processing
// and return it
LOGDEB2(("addHandler returned BREAK\n"));
goto breakloop; // when you have to you have to
case ADD_ERROR: return FIError;
case ADD_ERROR:
LOGDEB2(("addHandler returned ERROR\n"));
return FIError;
}
if (!ipath.empty() &&
@ -856,16 +864,16 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
}
// If indexing compute ipath and significant mimetype. ipath is
// returned through the parameter not doc.ipath We also retrieve
// some metadata fields from the ancesters (like date or
// author). This is useful for email attachments. The values will
// be replaced by those internal to the document (by dijontorcl())
// if any, so the order of calls is important.
if (!m_forPreview)
collectIpathAndMT(doc, ipath);
else
// returned through doc.ipath. We also retrieve some metadata
// fields from the ancesters (like date or author). This is useful
// for email attachments. The values will be replaced by those
// internal to the document (by dijontorcl()) if any, so the order
// of calls is important.
if (!m_forPreview) {
collectIpathAndMT(doc, doc.ipath);
} else {
doc.mimetype = m_reachedMType;
}
// Keep this AFTER collectIpathAndMT
dijontorcl(doc);
@ -916,8 +924,7 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
FileInterner interner(idoc, cnf, tmpdir, FIF_forPreview);
interner.setTargetMType(idoc.mimetype);
Rcl::Doc doc;
string mipath = idoc.ipath;
Status ret = interner.internfile(doc, mipath);
Status ret = interner.internfile(doc, idoc.ipath);
if (ret == FileInterner::FIError) {
LOGERR(("FileInterner::idocToFile: internfile() failed\n"));
return false;

View File

@ -119,12 +119,12 @@ class FileInterner {
* @param doc output document
* @param ipath internal path. If set by caller, the specified subdoc will
* be returned. Else the next document according to current state will
* be returned, and ipath will be set on output.
* be returned, and doc.ipath will be set on output.
* @return FIError and FIDone are self-explanatory. If FIAgain is returned,
* this is a multi-document file, with more subdocs, and internfile()
* should be called again to get the following one(s).
*/
Status internfile(Rcl::Doc& doc, string &ipath);
Status internfile(Rcl::Doc& doc, const string &ipath = "");
/** Return the file's (top level object) mimetype (useful for
* container files)

View File

@ -217,7 +217,7 @@ bool MimeHandlerExecMultiple::next_document()
LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
} else if (!stringlowercmp("charset:", name)) {
charset = data;
LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
LOGDEB(("MHExecMultiple: got charset [%s]\n", data.c_str()));
} else if (!stringlowercmp("mimetype:", name)) {
mtype = data;
LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str()));
@ -253,6 +253,8 @@ bool MimeHandlerExecMultiple::next_document()
if (!ipath.empty()) {
m_metaData["ipath"] = ipath;
if (mtype.empty()) {
LOGDEB0(("MHExecMultiple: no mime type from filter, "
"using ipath for a guess\n"));
mtype = mimetype(ipath, 0, m_config, false);
if (mtype.empty()) {
// mimetype() won't call idFile when there is no file. Do it
@ -298,5 +300,9 @@ bool MimeHandlerExecMultiple::next_document()
if (eofnext_received)
m_havedoc = false;
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
" mtype [%s] charset [%s]\n",
m_metaData["content"].size(), m_metaData["mimetype"].c_str(),
m_metaData["charset"].c_str()));
return true;
}

View File

@ -100,7 +100,11 @@ bool MimeHandlerText::set_document_string(const string& otext)
bool MimeHandlerText::skip_to_document(const string& ipath)
{
long long t;
sscanf(ipath.c_str(), "%lld", &t);
if (sscanf(ipath.c_str(), "%lld", &t) != 1) {
LOGERR(("MimeHandlerText::skip_to_document: bad ipath offs [%s]\n",
ipath.c_str()));
return false;
}
m_offs = (off_t)t;
readnext();
return true;