From 0460f1016c0c60ace29b540981c5227e129f5a56 Mon Sep 17 00:00:00 2001 From: dockes Date: Tue, 1 Jul 2008 10:29:45 +0000 Subject: [PATCH] mh_mail now uses mimetype() to try and better identify application/octet-stream --- src/index/mimetype.cpp | 33 +++++++++++++++++---------------- src/index/mimetype.h | 14 ++++++++++---- src/internfile/mh_mail.cpp | 18 +++++++++++++++--- 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/index/mimetype.cpp b/src/index/mimetype.cpp index 2be54785..ea1a6e9b 100644 --- a/src/index/mimetype.cpp +++ b/src/index/mimetype.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.20 2006-12-20 09:54:18 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.21 2008-07-01 10:29:45 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -57,13 +57,11 @@ using namespace std; static string mimetypefromdata(const string &fn, bool usfc) { - string mime; - // In any case first try the internal identifier - mime = idFile(fn.c_str()); + string mime = idFile(fn.c_str()); #ifdef USE_SYSTEM_FILE_COMMAND - if (usfc && mime == "") { + if (usfc && mime.empty()) { // Last resort: use "file -i" list args; @@ -75,7 +73,7 @@ static string mimetypefromdata(const string &fn, bool usfc) int status = ex.doexec(cmd, args, 0, &result); if (status) { LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status)); - return ""; + return string(); } // LOGDEB(("mimetypefromdata: %s [%s]\n", result.c_str(), fn.c_str())); @@ -88,7 +86,7 @@ static string mimetypefromdata(const string &fn, bool usfc) list res; stringToStrings(result, res); if (res.size() <= 1) - return ""; + return string(); list::iterator it = res.begin(); mime = *++it; // Remove possible punctuation at the end @@ -96,7 +94,7 @@ static string mimetypefromdata(const string &fn, bool usfc) mime.erase(mime.length() -1); // File -i will sometimes return strange stuff (ie: "very small file") if(mime.find("/") == string::npos) - mime = ""; + mime.clear(); } #endif @@ -109,17 +107,18 @@ static string mimetypefromdata(const string &fn, bool usfc) string mimetype(const string &fn, const struct stat *stp, RclConfig *cfg, bool usfc) { - if (S_ISDIR(stp->st_mode)) - return "application/x-fsdirectory"; - if (!S_ISREG(stp->st_mode)) - return "application/x-fsspecial"; - + if (stp) { + if (S_ISDIR(stp->st_mode)) + return "application/x-fsdirectory"; + if (!S_ISREG(stp->st_mode)) + return "application/x-fsspecial"; + } if (cfg == 0) - return ""; + return string(); if (cfg->inStopSuffixes(fn)) { LOGDEB(("mimetype: fn [%s] in stopsuffixes\n", fn.c_str())); - return ""; + return string(); } // First look for suffix in mimetype map @@ -135,7 +134,9 @@ string mimetype(const string &fn, const struct stat *stp, return mtype; } - // Then examine data + // Finally examine data + if (!stp) + return string(); return mimetypefromdata(fn, usfc); } diff --git a/src/index/mimetype.h b/src/index/mimetype.h index 44a7bb4b..35663f89 100644 --- a/src/index/mimetype.h +++ b/src/index/mimetype.h @@ -16,16 +16,22 @@ */ #ifndef _MIMETYPE_H_INCLUDED_ #define _MIMETYPE_H_INCLUDED_ -/* @(#$Id: mimetype.h,v 1.6 2006-12-19 08:40:50 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mimetype.h,v 1.7 2008-07-01 10:29:45 dockes Exp $ (C) 2004 J.F.Dockes */ #include class RclConfig; struct stat; + /** - * Try to determine a mime type for filename. - * This may imply more than matching the suffix, the name must be usable - * to actually access file data. + * Try to determine a mime type for file. + * + * If stp is not null, this may imply more than matching the suffix, + * the name must be usable to actually access file data. + * @param filename file/path name to use + * @param stp if not null use st_mode bits for directories etc. + * @param cfg recoll config + * @param usfc Use system's 'file' command as last resort (or not) */ string mimetype(const std::string &filename, const struct stat *stp, RclConfig *cfg, bool usfc); diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index 6fa7f6ec..32798b0e 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.31 2007-12-13 06:58:21 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.32 2008-07-01 10:29:45 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -36,6 +36,8 @@ static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.31 2007-12-13 06:58:21 dockes Exp #include "debuglog.h" #include "smallut.h" #include "mh_html.h" +#include "rclconfig.h" +#include "mimetype.h" // binc imap mime definitions #include "mime.h" @@ -103,7 +105,7 @@ bool MimeHandlerMail::skip_to_document(const string& ipath) LOGDEB(("MimeHandlerMail::skip_to_document(%s)\n", ipath.c_str())); if (m_idx == -1) { // No decoding done yet. If ipath is null need do nothing - if (ipath == "" || ipath == "-1") + if (ipath.empty() || ipath == "-1") return true; // ipath points to attachment: need to decode message if (!next_document()) { @@ -189,7 +191,7 @@ bool MimeHandlerMail::processAttach() att->m_charset.c_str(), att->m_filename.c_str())); - m_metaData["content"] = ""; + m_metaData["content"] = string(); string& body = m_metaData["content"]; att->m_part->getBody(body, 0, att->m_part->bodylength); string decoded; @@ -215,6 +217,16 @@ bool MimeHandlerMail::processAttach() } } + // Special case for application/octet-stream: try to better + // identify content, using file name if set + if (m_metaData["mimetype"] == "application/octet-stream" && + !m_metaData["filename"].empty()) { + string mt = mimetype(m_metaData["filename"], 0, + RclConfig::getMainConfig(), false); + if (!mt.empty()) + m_metaData["mimetype"] = mt; + } + // Ipath char nbuf[10]; sprintf(nbuf, "%d", m_idx);