diff --git a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp index a90153ea..50971b4e 100644 --- a/src/index/beaglequeue.cpp +++ b/src/index/beaglequeue.cpp @@ -232,10 +232,9 @@ bool BeagleQueueIndexer::indexFromCache(const string& udi) FileInterner interner(data, m_config, m_tmpdir, FileInterner::FIF_doUseInputMimetype, dotdoc.mimetype); - string ipath; FileInterner::Status fis; try { - fis = interner.internfile(doc, ipath); + fis = interner.internfile(doc); } catch (CancelExcept) { LOGERR(("BeagleQueueIndexer: interrupted\n")); return false; @@ -437,10 +436,9 @@ BeagleQueueIndexer::processone(const string &path, FileInterner interner(path, stp, m_config, m_tmpdir, FileInterner::FIF_doUseInputMimetype, &dotdoc.mimetype); - string ipath; FileInterner::Status fis; try { - fis = interner.internfile(doc, ipath); + fis = interner.internfile(doc); } catch (CancelExcept) { LOGERR(("BeagleQueueIndexer: interrupted\n")); goto out; diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index b764baff..c9af77bc 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -391,9 +391,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, bool hadNullIpath = false; while (fis == FileInterner::FIAgain) { doc.erase(); - string ipath; try { - fis = interner.internfile(doc, ipath); + fis = interner.internfile(doc); } catch (CancelExcept) { LOGERR(("fsIndexer::processone: interrupted\n")); return FsTreeWalker::FtwStop; @@ -404,10 +403,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, // be retried every time. // Internal access path for multi-document files - if (ipath.empty()) + if (doc.ipath.empty()) hadNullIpath = true; - else - doc.ipath = ipath; // Set file name, mod time and url if not done by filter if (doc.fmtime.empty()) @@ -441,8 +438,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, // Add document to database. If there is an ipath, add it as a children // of the file document. string udi; - make_udi(fn, ipath, udi); - if (!m_db->addOrUpdate(udi, ipath.empty() ? "" : parent_udi, doc)) + make_udi(fn, doc.ipath, udi); + if (!m_db->addOrUpdate(udi, doc.ipath.empty() ? "" : parent_udi, doc)) return FsTreeWalker::FtwError; // Tell what we are doing and check for interrupt request @@ -451,8 +448,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp, if (m_updater->status.dbtotdocs < m_updater->status.docsdone) m_updater->status.dbtotdocs = m_updater->status.docsdone; m_updater->status.fn = fn; - if (!ipath.empty()) - m_updater->status.fn += "|" + ipath; + if (!doc.ipath.empty()) + m_updater->status.fn += "|" + doc.ipath; if (!m_updater->update()) { return FsTreeWalker::FtwStop; } diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 5e79ed3d..53af0f7a 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -116,7 +116,8 @@ bool FileInterner::getEnclosing(const string &url, const string &ipath, eurl = url; eipath = ipath; string::size_type colon; - LOGDEB(("FileInterner::getEnclosing(): [%s]\n", eipath.c_str())); + LOGDEB(("FileInterner::getEnclosing(): url [%s] ipath [%s]\n", + url.c_str(), eipath.c_str())); if (eipath.empty()) return false; if ((colon = eipath.find_last_of(isep)) != string::npos) { @@ -599,6 +600,7 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) // which has them. void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const { + LOGDEB2(("FileInterner::collectIpathAndMT\n")); bool hasipath = false; #ifdef RCL_USE_XATTR @@ -752,7 +754,7 @@ void FileInterner::processNextDocError(Rcl::Doc &doc, string& ipath) ipath.c_str(), doc.mimetype.c_str(), m_reason.c_str())); } -FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath) +FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, const string& ipath) { LOGDEB(("FileInterner::internfile. ipath [%s]\n", ipath.c_str())); if (m_handlers.size() < 1) { @@ -816,7 +818,8 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath) // might be ie an error while decoding an attachment, but we // still want to process the rest of the mbox! For preview: fatal. if (!m_handlers.back()->next_document()) { - processNextDocError(doc, ipath); + string oipath; + processNextDocError(doc, oipath); if (m_forPreview) { m_reason += "Requested document does not exist. "; m_reason += m_handlers.back()->get_error(); @@ -831,16 +834,21 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath) // handler to stack. switch (addHandler()) { case ADD_OK: // Just go through: handler has been stacked, use it + LOGDEB2(("addHandler returned OK\n")); break; case ADD_CONTINUE: // forget this doc and retrieve next from current handler // (ipath stays same) + LOGDEB2(("addHandler returned CONTINUE\n")); continue; case ADD_BREAK: // Stop looping: doc type ok, need complete its processing // and return it + LOGDEB2(("addHandler returned BREAK\n")); goto breakloop; // when you have to you have to - case ADD_ERROR: return FIError; + case ADD_ERROR: + LOGDEB2(("addHandler returned ERROR\n")); + return FIError; } if (!ipath.empty() && @@ -856,16 +864,16 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath) } // If indexing compute ipath and significant mimetype. ipath is - // returned through the parameter not doc.ipath We also retrieve - // some metadata fields from the ancesters (like date or - // author). This is useful for email attachments. The values will - // be replaced by those internal to the document (by dijontorcl()) - // if any, so the order of calls is important. - if (!m_forPreview) - collectIpathAndMT(doc, ipath); - else + // returned through doc.ipath. We also retrieve some metadata + // fields from the ancesters (like date or author). This is useful + // for email attachments. The values will be replaced by those + // internal to the document (by dijontorcl()) if any, so the order + // of calls is important. + if (!m_forPreview) { + collectIpathAndMT(doc, doc.ipath); + } else { doc.mimetype = m_reachedMType; - + } // Keep this AFTER collectIpathAndMT dijontorcl(doc); @@ -916,8 +924,7 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile, FileInterner interner(idoc, cnf, tmpdir, FIF_forPreview); interner.setTargetMType(idoc.mimetype); Rcl::Doc doc; - string mipath = idoc.ipath; - Status ret = interner.internfile(doc, mipath); + Status ret = interner.internfile(doc, idoc.ipath); if (ret == FileInterner::FIError) { LOGERR(("FileInterner::idocToFile: internfile() failed\n")); return false; diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index bdb0e73e..53aaee04 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -119,12 +119,12 @@ class FileInterner { * @param doc output document * @param ipath internal path. If set by caller, the specified subdoc will * be returned. Else the next document according to current state will - * be returned, and ipath will be set on output. + * be returned, and doc.ipath will be set on output. * @return FIError and FIDone are self-explanatory. If FIAgain is returned, * this is a multi-document file, with more subdocs, and internfile() * should be called again to get the following one(s). */ - Status internfile(Rcl::Doc& doc, string &ipath); + Status internfile(Rcl::Doc& doc, const string &ipath = ""); /** Return the file's (top level object) mimetype (useful for * container files) diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp index 75373b4c..003728d7 100644 --- a/src/internfile/mh_execm.cpp +++ b/src/internfile/mh_execm.cpp @@ -217,7 +217,7 @@ bool MimeHandlerExecMultiple::next_document() LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str())); } else if (!stringlowercmp("charset:", name)) { charset = data; - LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str())); + LOGDEB(("MHExecMultiple: got charset [%s]\n", data.c_str())); } else if (!stringlowercmp("mimetype:", name)) { mtype = data; LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str())); @@ -253,6 +253,8 @@ bool MimeHandlerExecMultiple::next_document() if (!ipath.empty()) { m_metaData["ipath"] = ipath; if (mtype.empty()) { + LOGDEB0(("MHExecMultiple: no mime type from filter, " + "using ipath for a guess\n")); mtype = mimetype(ipath, 0, m_config, false); if (mtype.empty()) { // mimetype() won't call idFile when there is no file. Do it @@ -298,5 +300,9 @@ bool MimeHandlerExecMultiple::next_document() if (eofnext_received) m_havedoc = false; + LOGDEB0(("MHExecMultiple: returning %d bytes of content," + " mtype [%s] charset [%s]\n", + m_metaData["content"].size(), m_metaData["mimetype"].c_str(), + m_metaData["charset"].c_str())); return true; } diff --git a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp index 1d574d40..bf762b6a 100644 --- a/src/internfile/mh_text.cpp +++ b/src/internfile/mh_text.cpp @@ -100,7 +100,11 @@ bool MimeHandlerText::set_document_string(const string& otext) bool MimeHandlerText::skip_to_document(const string& ipath) { long long t; - sscanf(ipath.c_str(), "%lld", &t); + if (sscanf(ipath.c_str(), "%lld", &t) != 1) { + LOGERR(("MimeHandlerText::skip_to_document: bad ipath offs [%s]\n", + ipath.c_str())); + return false; + } m_offs = (off_t)t; readnext(); return true;