indexing: an error on an archive member could crash or block the indexing because of the unclean way the ipath was passed in/out of internfile(). Closes issue #55
This commit is contained in:
parent
0039ba9ecf
commit
f4c1c3678d
@ -232,10 +232,9 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi)
|
||||
FileInterner interner(data, m_config, m_tmpdir,
|
||||
FileInterner::FIF_doUseInputMimetype,
|
||||
dotdoc.mimetype);
|
||||
string ipath;
|
||||
FileInterner::Status fis;
|
||||
try {
|
||||
fis = interner.internfile(doc, ipath);
|
||||
fis = interner.internfile(doc);
|
||||
} catch (CancelExcept) {
|
||||
LOGERR(("BeagleQueueIndexer: interrupted\n"));
|
||||
return false;
|
||||
@ -437,10 +436,9 @@ BeagleQueueIndexer::processone(const string &path,
|
||||
FileInterner interner(path, stp, m_config, m_tmpdir,
|
||||
FileInterner::FIF_doUseInputMimetype,
|
||||
&dotdoc.mimetype);
|
||||
string ipath;
|
||||
FileInterner::Status fis;
|
||||
try {
|
||||
fis = interner.internfile(doc, ipath);
|
||||
fis = interner.internfile(doc);
|
||||
} catch (CancelExcept) {
|
||||
LOGERR(("BeagleQueueIndexer: interrupted\n"));
|
||||
goto out;
|
||||
|
||||
@ -391,9 +391,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
bool hadNullIpath = false;
|
||||
while (fis == FileInterner::FIAgain) {
|
||||
doc.erase();
|
||||
string ipath;
|
||||
try {
|
||||
fis = interner.internfile(doc, ipath);
|
||||
fis = interner.internfile(doc);
|
||||
} catch (CancelExcept) {
|
||||
LOGERR(("fsIndexer::processone: interrupted\n"));
|
||||
return FsTreeWalker::FtwStop;
|
||||
@ -404,10 +403,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
// be retried every time.
|
||||
|
||||
// Internal access path for multi-document files
|
||||
if (ipath.empty())
|
||||
if (doc.ipath.empty())
|
||||
hadNullIpath = true;
|
||||
else
|
||||
doc.ipath = ipath;
|
||||
|
||||
// Set file name, mod time and url if not done by filter
|
||||
if (doc.fmtime.empty())
|
||||
@ -441,8 +438,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
// Add document to database. If there is an ipath, add it as a children
|
||||
// of the file document.
|
||||
string udi;
|
||||
make_udi(fn, ipath, udi);
|
||||
if (!m_db->addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc))
|
||||
make_udi(fn, doc.ipath, udi);
|
||||
if (!m_db->addOrUpdate(udi, doc.ipath.empty() ? "" : parent_udi, doc))
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
// Tell what we are doing and check for interrupt request
|
||||
@ -451,8 +448,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
if (m_updater->status.dbtotdocs < m_updater->status.docsdone)
|
||||
m_updater->status.dbtotdocs = m_updater->status.docsdone;
|
||||
m_updater->status.fn = fn;
|
||||
if (!ipath.empty())
|
||||
m_updater->status.fn += "|" + ipath;
|
||||
if (!doc.ipath.empty())
|
||||
m_updater->status.fn += "|" + doc.ipath;
|
||||
if (!m_updater->update()) {
|
||||
return FsTreeWalker::FtwStop;
|
||||
}
|
||||
|
||||
@ -116,7 +116,8 @@ bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
||||
eurl = url;
|
||||
eipath = ipath;
|
||||
string::size_type colon;
|
||||
LOGDEB(("FileInterner::getEnclosing(): [%s]\n", eipath.c_str()));
|
||||
LOGDEB(("FileInterner::getEnclosing(): url [%s] ipath [%s]\n",
|
||||
url.c_str(), eipath.c_str()));
|
||||
if (eipath.empty())
|
||||
return false;
|
||||
if ((colon = eipath.find_last_of(isep)) != string::npos) {
|
||||
@ -599,6 +600,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||
// which has them.
|
||||
void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
|
||||
{
|
||||
LOGDEB2(("FileInterner::collectIpathAndMT\n"));
|
||||
bool hasipath = false;
|
||||
|
||||
#ifdef RCL_USE_XATTR
|
||||
@ -752,7 +754,7 @@ void FileInterner::processNextDocError(Rcl::Doc &doc, string& ipath)
|
||||
ipath.c_str(), doc.mimetype.c_str(), m_reason.c_str()));
|
||||
}
|
||||
|
||||
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
||||
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath)
|
||||
{
|
||||
LOGDEB(("FileInterner::internfile. ipath [%s]\n", ipath.c_str()));
|
||||
if (m_handlers.size() < 1) {
|
||||
@ -816,7 +818,8 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
||||
// might be ie an error while decoding an attachment, but we
|
||||
// still want to process the rest of the mbox! For preview: fatal.
|
||||
if (!m_handlers.back()->next_document()) {
|
||||
processNextDocError(doc, ipath);
|
||||
string oipath;
|
||||
processNextDocError(doc, oipath);
|
||||
if (m_forPreview) {
|
||||
m_reason += "Requested document does not exist. ";
|
||||
m_reason += m_handlers.back()->get_error();
|
||||
@ -831,16 +834,21 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
||||
// handler to stack.
|
||||
switch (addHandler()) {
|
||||
case ADD_OK: // Just go through: handler has been stacked, use it
|
||||
LOGDEB2(("addHandler returned OK\n"));
|
||||
break;
|
||||
case ADD_CONTINUE:
|
||||
// forget this doc and retrieve next from current handler
|
||||
// (ipath stays same)
|
||||
LOGDEB2(("addHandler returned CONTINUE\n"));
|
||||
continue;
|
||||
case ADD_BREAK:
|
||||
// Stop looping: doc type ok, need complete its processing
|
||||
// and return it
|
||||
LOGDEB2(("addHandler returned BREAK\n"));
|
||||
goto breakloop; // when you have to you have to
|
||||
case ADD_ERROR: return FIError;
|
||||
case ADD_ERROR:
|
||||
LOGDEB2(("addHandler returned ERROR\n"));
|
||||
return FIError;
|
||||
}
|
||||
|
||||
if (!ipath.empty() &&
|
||||
@ -856,16 +864,16 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
||||
}
|
||||
|
||||
// If indexing compute ipath and significant mimetype. ipath is
|
||||
// returned through the parameter not doc.ipath We also retrieve
|
||||
// some metadata fields from the ancesters (like date or
|
||||
// author). This is useful for email attachments. The values will
|
||||
// be replaced by those internal to the document (by dijontorcl())
|
||||
// if any, so the order of calls is important.
|
||||
if (!m_forPreview)
|
||||
collectIpathAndMT(doc, ipath);
|
||||
else
|
||||
// returned through doc.ipath. We also retrieve some metadata
|
||||
// fields from the ancesters (like date or author). This is useful
|
||||
// for email attachments. The values will be replaced by those
|
||||
// internal to the document (by dijontorcl()) if any, so the order
|
||||
// of calls is important.
|
||||
if (!m_forPreview) {
|
||||
collectIpathAndMT(doc, doc.ipath);
|
||||
} else {
|
||||
doc.mimetype = m_reachedMType;
|
||||
|
||||
}
|
||||
// Keep this AFTER collectIpathAndMT
|
||||
dijontorcl(doc);
|
||||
|
||||
@ -916,8 +924,7 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
|
||||
FileInterner interner(idoc, cnf, tmpdir, FIF_forPreview);
|
||||
interner.setTargetMType(idoc.mimetype);
|
||||
Rcl::Doc doc;
|
||||
string mipath = idoc.ipath;
|
||||
Status ret = interner.internfile(doc, mipath);
|
||||
Status ret = interner.internfile(doc, idoc.ipath);
|
||||
if (ret == FileInterner::FIError) {
|
||||
LOGERR(("FileInterner::idocToFile: internfile() failed\n"));
|
||||
return false;
|
||||
|
||||
@ -119,12 +119,12 @@ class FileInterner {
|
||||
* @param doc output document
|
||||
* @param ipath internal path. If set by caller, the specified subdoc will
|
||||
* be returned. Else the next document according to current state will
|
||||
* be returned, and ipath will be set on output.
|
||||
* be returned, and doc.ipath will be set on output.
|
||||
* @return FIError and FIDone are self-explanatory. If FIAgain is returned,
|
||||
* this is a multi-document file, with more subdocs, and internfile()
|
||||
* should be called again to get the following one(s).
|
||||
*/
|
||||
Status internfile(Rcl::Doc& doc, string &ipath);
|
||||
Status internfile(Rcl::Doc& doc, const string &ipath = "");
|
||||
|
||||
/** Return the file's (top level object) mimetype (useful for
|
||||
* container files)
|
||||
|
||||
@ -217,7 +217,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
|
||||
} else if (!stringlowercmp("charset:", name)) {
|
||||
charset = data;
|
||||
LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
|
||||
LOGDEB(("MHExecMultiple: got charset [%s]\n", data.c_str()));
|
||||
} else if (!stringlowercmp("mimetype:", name)) {
|
||||
mtype = data;
|
||||
LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str()));
|
||||
@ -253,6 +253,8 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
if (!ipath.empty()) {
|
||||
m_metaData["ipath"] = ipath;
|
||||
if (mtype.empty()) {
|
||||
LOGDEB0(("MHExecMultiple: no mime type from filter, "
|
||||
"using ipath for a guess\n"));
|
||||
mtype = mimetype(ipath, 0, m_config, false);
|
||||
if (mtype.empty()) {
|
||||
// mimetype() won't call idFile when there is no file. Do it
|
||||
@ -298,5 +300,9 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
if (eofnext_received)
|
||||
m_havedoc = false;
|
||||
|
||||
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
|
||||
" mtype [%s] charset [%s]\n",
|
||||
m_metaData["content"].size(), m_metaData["mimetype"].c_str(),
|
||||
m_metaData["charset"].c_str()));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -100,7 +100,11 @@ bool MimeHandlerText::set_document_string(const string& otext)
|
||||
bool MimeHandlerText::skip_to_document(const string& ipath)
|
||||
{
|
||||
long long t;
|
||||
sscanf(ipath.c_str(), "%lld", &t);
|
||||
if (sscanf(ipath.c_str(), "%lld", &t) != 1) {
|
||||
LOGERR(("MimeHandlerText::skip_to_document: bad ipath offs [%s]\n",
|
||||
ipath.c_str()));
|
||||
return false;
|
||||
}
|
||||
m_offs = (off_t)t;
|
||||
readnext();
|
||||
return true;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user