mail attachments sort of ok

This commit is contained in:
dockes 2006-12-16 15:39:54 +00:00
parent 1435101bb6
commit 8f1f2ca66d
10 changed files with 475 additions and 201 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.20 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.21 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -21,6 +21,7 @@ static char rcsid[] = "@(#$Id: internfile.cpp,v 1.20 2006-12-15 16:33:15 dockes
#ifndef TEST_INTERNFILE
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
@ -41,6 +42,10 @@ using namespace std;
#include "wipedir.h"
#include "rclconfig.h"
// The internal path element separator. This can't be the same as the rcldb
// file to ipath separator : "|"
static const string isep(":");
// Execute the command to uncompress a file into a temporary one.
static bool uncompressfile(RclConfig *conf, const string& ifn,
const list<string>& cmdv, const string& tdir,
@ -133,7 +138,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
return;
}
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
LOGDEB1(("internfile: after ucomp: m_tdir %s, tfile %s\n",
m_tdir.c_str(), m_tfile.c_str()));
m_fn = m_tfile;
l_mime = mimetype(m_fn, m_cfg, usfci);
@ -167,77 +172,151 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
return;
}
m_handlers.reserve(20);
m_handlers.reserve(MAXHANDLERS);
for (unsigned int i = 0; i < MAXHANDLERS; i++)
m_tmpflgs[i] = false;
m_handlers.push_back(df);
LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(),
m_fn.c_str()));
m_fn.c_str()));
m_targetMType = "text/plain";
}
FileInterner::~FileInterner()
{
while (!m_handlers.empty()) {
delete m_handlers.back();
m_handlers.pop_back();
}
tmpcleanup();
for (vector<Dijon::Filter*>::iterator it = m_handlers.begin();
it != m_handlers.end(); it++)
delete *it;
// m_tempfiles will take care of itself
}
static const string string_empty;
static const string get_mimetype(Dijon::Filter* df)
bool FileInterner::dataToTempFile(const string& dt, const string& mt,
string& fn)
{
const std::map<std::string, std::string> *docdata = &df->get_meta_data();
map<string,string>::const_iterator it;
it = docdata->find("mimetype");
if (it != docdata->end()) {
return it->second;
// Find appropriate suffix for mime type
TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
if (temp->ok()) {
m_tmpflgs[m_handlers.size()-1] = true;
m_tempfiles.push_back(temp);
} else {
return string_empty;
LOGERR(("FileInterner::dataToTempFile: cant create tempfile\n"));
return false;
}
int fd = open(temp->filename(), O_WRONLY);
if (fd < 0) {
LOGERR(("FileInterner::dataToTempFile: open(%s) failed errno %d\n",
temp->filename(), errno));
return false;
}
if (write(fd, dt.c_str(), dt.length()) != (int)dt.length()) {
close(fd);
LOGERR(("FileInterner::dataToTempFile: write to %s failed errno %d\n",
temp->filename(), errno));
return false;
}
close(fd);
fn = temp->filename();
return true;
}
static inline bool getKeyValue(const map<string, string>& docdata,
const string& key, string& value)
{
map<string,string>::const_iterator it;
it = docdata.find(key);
if (it != docdata.end()) {
value = it->second;
return true;
}
return false;
}
static const string keyab("abstract");
static const string keycs("charset");
static const string keyct("content");
static const string keyfn("filename");
static const string keykw("keywords");
static const string keymd("modificationdate");
static const string keymt("mimetype");
static const string keyoc("origcharset");
static const string keysm("sample");
static const string keytt("title");
bool FileInterner::dijontorcl(Rcl::Doc& doc)
{
Dijon::Filter *df = m_handlers.back();
const std::map<std::string, std::string> *docdata = &df->get_meta_data();
map<string,string>::const_iterator it;
const std::map<std::string, std::string>& docdata = df->get_meta_data();
it = docdata->find("origcharset");
if (it != docdata->end())
doc.origcharset = it->second;
it = docdata->find("content");
if (it != docdata->end())
doc.text = it->second;
it = docdata->find("title");
if (it != docdata->end())
doc.title = it->second;
it = docdata->find("keywords");
if (it != docdata->end())
doc.keywords = it->second;
it = docdata->find("modificationdate");
if (it != docdata->end())
doc.dmtime = it->second;
it = docdata->find("abstract");
if (it != docdata->end()) {
doc.abstract = it->second;
} else {
it = docdata->find("sample");
if (it != docdata->end())
doc.abstract = it->second;
}
getKeyValue(docdata, keyoc, doc.origcharset);
getKeyValue(docdata, keyct, doc.text);
getKeyValue(docdata, keytt, doc.title);
getKeyValue(docdata, keykw, doc.keywords);
getKeyValue(docdata, keymd, doc.dmtime);
if (!getKeyValue(docdata, keyab, doc.abstract))
getKeyValue(docdata, keysm, doc.abstract);
LOGDEB1(("FILENAME: %s\n", doc.utf8fn.c_str()));
return true;
}
// Collect the ipath stack.
// While we're at it, we also set the mimetype and filename, which are special
// properties: we want to get them from the topmost doc
// with an ipath, not the last one which is usually text/plain
void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath)
{
bool hasipath = false;
static const unsigned int MAXHANDLERS = 20;
// If there is no ipath stack, the mimetype is the one from the file
doc.mimetype = m_mimetype;
LOGDEB2(("INITIAL mimetype: %s\n", doc.mimetype.c_str()));
string ipathel;
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
hit != m_handlers.end(); hit++) {
const map<string, string>& docdata = (*hit)->get_meta_data();
if (getKeyValue(docdata, "ipath", ipathel)) {
if (!ipathel.empty()) {
// We have a non-empty ipath
hasipath = true;
getKeyValue(docdata, keymt, doc.mimetype);
getKeyValue(docdata, keyfn, doc.utf8fn);
}
ipath += ipathel + isep;
} else {
ipath += isep;
}
}
// Trim empty tail elements in ipath.
if (hasipath) {
LOGDEB2(("IPATH [%s]\n", ipath.c_str()));
string::size_type sit = ipath.find_last_not_of(isep);
if (sit == string::npos)
ipath.erase();
else if (sit < ipath.length() -1)
ipath.erase(sit+1);
} else {
ipath.erase();
}
}
// Remove handler from stack. Clean up temp file if needed.
void FileInterner::popHandler()
{
int i = m_handlers.size()-1;
if (m_tmpflgs[i]) {
m_tempfiles.pop_back();
m_tmpflgs[i] = false;
}
delete m_handlers.back();
m_handlers.pop_back();
}
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
{
if (m_handlers.size() != 1) {
LOGDEB(("FileInterner::internfile. ipath [%s]\n", ipath.c_str()));
if (m_handlers.size() < 1) {
LOGERR(("FileInterner::internfile: bad stack size %d !!\n",
m_handlers.size()));
return FIError;
@ -252,7 +331,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
int vipathidx = 0;
if (!ipath.empty()) {
list<string> lipath;
stringToTokens(ipath, lipath, "|", true);
stringToTokens(ipath, lipath, isep, true);
vipath.insert(vipath.begin(), lipath.begin(), lipath.end());
if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
LOGERR(("FileInterner::internfile: can't skip\n"));
@ -261,12 +340,17 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
}
/* Try to get doc from the topmost filter */
// Security counter: we try not to loop but ...
int loop = 0;
while (!m_handlers.empty()) {
if (loop++ > 30) {
LOGERR(("FileInterner:: looping!\n"));
return FIError;
}
if (!m_handlers.back()->has_documents()) {
// No docs at the current top level. Pop and see if there
// is something at the previous one
delete m_handlers.back();
m_handlers.pop_back();
popHandler();
continue;
}
@ -276,21 +360,16 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
}
// Look at what we've got
const std::map<std::string, std::string> *docdata =
&m_handlers.back()->get_meta_data();
map<string,string>::const_iterator it;
string charset;
it = docdata->find("charset");
if (it != docdata->end())
charset = it->second;
string mimetype;
it = docdata->find("mimetype");
if (it != docdata->end())
mimetype = it->second;
const std::map<std::string, std::string>& docdata =
m_handlers.back()->get_meta_data();
string charset, mimetype;
getKeyValue(docdata, keycs, charset);
getKeyValue(docdata, keymt, mimetype);
LOGDEB(("FileInterner::internfile:next_doc is %s\n",mimetype.c_str()));
LOGDEB(("FileInterner::internfile: next_doc is %s\n",
mimetype.c_str()));
// If we find a text/plain doc, we're done
if (!strcmp(mimetype.c_str(), "text/plain"))
if (!stringicmp(mimetype, m_targetMType))
break;
// Got a non text/plain doc. We need to stack another
@ -298,7 +377,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
if (m_handlers.size() > MAXHANDLERS) {
// Stack too big. Skip this and go on to check if there is
// something else in the current back()
LOGDEB(("FileInterner::internfile: stack too high\n"));
LOGINFO(("FileInterner::internfile: stack too high\n"));
continue;
}
@ -306,7 +385,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
if (!again) {
// If we can't find a filter, this doc can't be handled
// but there can be other ones so we go on
LOGERR(("FileInterner::internfile: no filter for [%s]\n",
LOGINFO(("FileInterner::internfile: no filter for [%s]\n",
mimetype.c_str()));
continue;
}
@ -316,18 +395,37 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
charset);
string ns;
const string *txt = &ns;
it = docdata->find("content");
if (it != docdata->end())
map<string,string>::const_iterator it;
it = docdata.find("content");
if (it != docdata.end())
txt = &it->second;
if (!again->set_document_string(*txt)) {
LOGERR(("FileInterner::internfile: error reparsing for %s\n",
bool setres = false;
if (again->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
setres = again->set_document_string(*txt);
} else if (again->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
setres = again->set_document_data(txt->c_str(), txt->length());
}else if(again->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
string filename;
if (dataToTempFile(*txt, mimetype, filename)) {
if (!(setres = again->set_document_file(filename))) {
m_tmpflgs[m_handlers.size()-1] = false;
m_tempfiles.pop_back();
}
}
}
if (!setres) {
LOGINFO(("FileInterner::internfile: set_doc failed inside %s\n",
m_fn.c_str()));
delete again;
if (m_forPreview)
return FIError;
continue;
}
// add filter and go on
// add filter and go on, maybe this one will give us text...
m_handlers.push_back(again);
if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
if (!ipath.empty() &&
!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
LOGERR(("FileInterner::internfile: can't skip\n"));
return FIError;
}
@ -338,64 +436,79 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
return FIError;
}
// If indexing, we have to collect the ipath stack.
// While we're at it, we also set the mimetype, which is a special
// property:we want to get it from the topmost doc
// with an ipath, not the last one which is always text/html
// If indexing compute ipath and significant mimetype
// Note that ipath is returned through the parameter not doc.ipath
if (!m_forPreview) {
bool hasipath = false;
doc.mimetype = m_mimetype;
LOGDEB2(("INITIAL mimetype: %s\n", doc.mimetype.c_str()));
map<string,string>::const_iterator titi;
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
hit != m_handlers.end(); hit++) {
const map<string, string>& docdata = (*hit)->get_meta_data();
map<string, string>::const_iterator iti = docdata.find("ipath");
if (iti != docdata.end()) {
if (!iti->second.empty()) {
// We have a non-empty ipath
hasipath = true;
titi = docdata.find("mimetype");
if (titi != docdata.end())
doc.mimetype = titi->second;
}
ipath += iti->second + "|";
} else {
ipath += "|";
}
}
// Walk done, transform the list into a string
if (hasipath) {
LOGDEB2(("IPATH [%s]\n", ipath.c_str()));
string::size_type sit = ipath.find_last_not_of("|");
if (sit == string::npos)
ipath.erase();
else if (sit < ipath.length() -1)
ipath.erase(sit+1);
} else {
ipath.erase();
}
}
if (!m_forPreview)
collectIpathAndMT(doc, ipath);
dijontorcl(doc);
// Destack what can be
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
delete m_handlers.back();
m_handlers.pop_back();
popHandler();
}
if (m_handlers.empty() || !m_handlers.back()->has_documents())
if (m_handlers.empty())
return FIDone;
else
return FIAgain;
}
class DirWiper {
public:
string dir;
bool do_it;
DirWiper(string d) : dir(d), do_it(true) {}
~DirWiper() {
if (do_it) {
wipedir(dir);
rmdir(dir.c_str());
}
}
};
bool FileInterner::idocTempFile(TempFile& otemp, RclConfig *cnf,
const string& fn, const string& ipath,
const string& mtype)
{
string tmpdir, reason;
if (!maketmpdir(tmpdir, reason))
return false;
DirWiper wiper(tmpdir);
FileInterner interner(fn, cnf, tmpdir, &mtype);
interner.setTargetMType(mtype);
Rcl::Doc doc;
string mipath = ipath;
Status ret = interner.internfile(doc, mipath);
if (ret == FileInterner::FIError) {
LOGERR(("FileInterner::idocTempFile: internfile() failed\n"));
return false;
}
TempFile temp(new TempFileInternal(cnf->getSuffixFromMimeType(mtype)));
if (!temp->ok()) {
LOGERR(("FileInterner::idocTempFile: cannot create temporary file"));
return false;
}
int fd = open(temp->filename(), O_WRONLY);
if (fd < 0) {
LOGERR(("FileInterner::idocTempFile: open(%s) failed errno %d\n",
temp->filename(), errno));
return false;
}
const string& dt = doc.text;
if (write(fd, dt.c_str(), dt.length()) != (int)dt.length()) {
close(fd);
LOGERR(("FileInterner::idocTempFile: write to %s failed errno %d\n",
temp->filename(), errno));
return false;
}
close(fd);
otemp = temp;
return true;
}
#else
#include <stdio.h>

View File

@ -16,13 +16,14 @@
*/
#ifndef _INTERNFILE_H_INCLUDED_
#define _INTERNFILE_H_INCLUDED_
/* @(#$Id: internfile.h,v 1.8 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: internfile.h,v 1.9 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <vector>
using std::string;
using std::vector;
#include "pathut.h"
#include "Filter.h"
class RclConfig;
@ -30,10 +31,13 @@ namespace Rcl {
class Doc;
}
/// Turn external file into internal representation, according to mime
/// type etc
/**
* A class to convert a file into possibly multiple documents in internal
* representation.
*/
class FileInterner {
public:
/**
* Identify and possibly decompress file, create adequate
* handler. The mtype parameter is only set when the object is
@ -70,20 +74,39 @@ class FileInterner {
* should be called again to get the following one(s).
*/
Status internfile(Rcl::Doc& doc, string &ipath);
/** Return the file's mimetype (useful for container files) */
const string& get_mimetype() {return m_mimetype;}
/** We normally always return text/plain data. A caller can request
* that we stop conversion at the native document type (ie: text/html)
*/
void setTargetMType(const string& tp) {m_targetMType = tp;}
/** Utility function: extract internal document and make temporary file */
static bool idocTempFile(TempFile& temp, RclConfig *cnf, const string& fn,
const string& ipath, const string& mtype);
private:
static const unsigned int MAXHANDLERS = 20;
RclConfig *m_cfg;
string m_fn;
string m_mimetype; // Mime type for [uncompressed] file
bool m_forPreview;
string m_targetMType;
// m_tdir and m_tfile are used only for decompressing input file if needed
const string& m_tdir;
string m_tfile;
vector<Dijon::Filter*> m_handlers;
bool m_tmpflgs[MAXHANDLERS];
vector<TempFile> m_tempfiles;
void tmpcleanup();
bool dijontorcl(Rcl::Doc&);
void collectIpathAndMT(Rcl::Doc&, string& ipath);
bool dataToTempFile(const string& data, const string& mt, string& fn);
void popHandler();
};
#endif /* _INTERNFILE_H_INCLUDED_ */

View File

@ -16,7 +16,7 @@
*/
#ifndef _HTML_H_INCLUDED_
#define _HTML_H_INCLUDED_
/* @(#$Id: mh_html.h,v 1.8 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mh_html.h,v 1.9 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -31,6 +31,11 @@ class MimeHandlerHtml : public RecollFilter {
virtual ~MimeHandlerHtml() {}
virtual bool set_document_file(const string &file_path);
virtual bool set_document_string(const string &data);
virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
return true;
return false;
}
virtual bool next_document();
private:
string m_html;

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.25 2006-12-15 16:33:15 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.26 2006-12-16 15:39:54 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -49,9 +49,15 @@ MimeHandlerMail::~MimeHandlerMail()
if (m_fd >= 0)
close(m_fd);
delete m_stream;
for (vector<MHMailAttach*>::iterator it = m_attachments.begin();
it != m_attachments.end(); it++) {
delete *it;
}
}
bool MimeHandlerMail::set_document_file(const string &fn)
{
LOGDEB(("MimeHandlerMail::set_document_file(%s)\n", fn.c_str()));
if (m_fd >= 0) {
close(m_fd);
m_fd = -1;
@ -76,6 +82,7 @@ bool MimeHandlerMail::set_document_file(const string &fn)
bool MimeHandlerMail::set_document_string(const string &msgtxt)
{
LOGDEB1(("MimeHandlerMail::set_document_string\n"));
LOGDEB2(("Message text: [%s]\n", msgtxt.c_str()));
delete m_stream;
m_stream = new stringstream(msgtxt);
@ -90,15 +97,36 @@ bool MimeHandlerMail::set_document_string(const string &msgtxt)
return true;
}
bool MimeHandlerMail::skip_to_document(const string& ipath)
{
LOGDEB(("MimeHandlerMail::skip_to_document(%s)\n", ipath.c_str()));
if (m_idx == -1) {
// No decoding done yet. If ipath is null need do nothing
if (ipath == "" || ipath == "-1")
return true;
// ipath points to attachment: need to decode message
if (!next_document()) {
LOGERR(("MimeHandlerMail::skip_to_doc: next_document failed\n"));
return false;
}
}
m_idx = atoi(ipath.c_str());
return true;
}
bool MimeHandlerMail::next_document()
{
LOGDEB(("MimeHandlerMail::next_document m_idx %d m_havedoc %d\n",
m_idx, m_havedoc));
if (!m_havedoc)
return false;
bool res = false;
if (m_idx == -1) {
m_metaData["mimetype"] = "text/plain";
res =processMsg(m_bincdoc, 0);
res = processMsg(m_bincdoc, 0);
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
m_metaData["mimetype"].c_str()));
} else {
res = processAttach();
}
@ -107,9 +135,61 @@ bool MimeHandlerMail::next_document()
return res;
}
// Decode according to content transfer encoding
static bool decodeBody(const string& cte, const string& body, string& decoded,
const string** respp)
{
// By default, there is no encoding (7bit,8bit,raw). Also in case of
// decoding error
*respp = &body;
if (!stringlowercmp("quoted-printable", cte)) {
if (!qp_decode(body, decoded)) {
LOGERR(("decodeBody: quoted-printable decoding failed !\n"));
return false;
}
*respp = &decoded;
} else if (!stringlowercmp("base64", cte)) {
if (!base64_decode(body, decoded)) {
LOGERR(("decodeBody: base64 decoding failed !. body [%s]\n",
body.c_str()));
return false;
}
*respp = &decoded;
}
return true;
}
bool MimeHandlerMail::processAttach()
{
return false;
LOGDEB(("MimeHandlerMail::processAttach() m_idx %d\n", m_idx));
if (!m_havedoc)
return false;
if (m_idx >= (int)m_attachments.size()) {
m_havedoc = false;
return false;
}
MHMailAttach *att = m_attachments[m_idx];
LOGDEB1(("processAttach:content-type: %s\n", att->m_contentType.c_str()));
m_metaData["mimetype"] = att->m_contentType;
m_metaData["charset"] = att->m_charset;
m_metaData["filename"] = att->m_filename;
m_metaData["content"] = "";
string& body = m_metaData["content"];
att->m_part->getBody(body, 0, att->m_part->bodylength);
string decoded;
const string *bdp;
if (!decodeBody(att->m_contentTransferEncoding, body, decoded, &bdp)) {
return false;
}
if (bdp != &body)
body = decoded;
char nbuf[10];
sprintf(nbuf, "%d", m_idx);
m_metaData["ipath"] = nbuf;
return true;
}
// Transform a single message into a document. The subject becomes the
@ -124,7 +204,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
LOGDEB2(("MimeHandlerMail::processMsg: depth %d\n", depth));
if (depth++ >= maxdepth) {
// Have to stop somewhere
LOGDEB(("MimeHandlerMail::processMsg: maxdepth %d exceeded\n",
LOGINFO(("MimeHandlerMail::processMsg: maxdepth %d exceeded\n",
maxdepth));
// Return true anyway, better to index partially than not at all
return true;
@ -218,7 +298,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
// Get and parse content-type header
Binc::HeaderItem hi;
if (!it->h.getFirstHeader("Content-Type", hi)) {
LOGDEB(("No content-type header for part %d\n", i));
LOGDEB(("walkmime:no ctent-type header for part %d\n", i));
continue;
}
MimeHeaderValue content_type;
@ -297,30 +377,6 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
// "Simple" part.
LOGDEB2(("walkmime: simple part\n"));
// If the Content-Disposition is not inline, we treat it as
// attachment, as per rfc2183. We don't process attachments
// for now, except for indexing/displaying the file name
// If it is inline but not text or html, same thing.
if (stringlowercmp("inline", content_disposition.value) ||
(stringlowercmp("text/plain", content_type.value) &&
stringlowercmp("text/html", content_type.value)) ) {
if (!filename.empty()) {
out += "\n";
if (m_forPreview)
out += "[" + dispindic + " " + content_type.value + ": ";
out += filename;
if (m_forPreview)
out += "]";
out += "\n\n";
}
// m_attachments.push_back(&doc);
// We're done with this part
return;
}
// We are dealing with an inline part of text/plain or text/html type
// Normally the default charset is us-ascii. But it happens that
// 8 bit chars exist in a message that is stated as us-ascii. Ie the
// mailer used by yahoo support ('KANA') does this. We could convert
@ -345,34 +401,52 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
cte = hi.getValue();
}
// If the Content-Disposition is not inline, we treat it as
// attachment, as per rfc2183. We don't process attachments
// for now, except for indexing/displaying the file name
// If it is inline but not text or html, same thing.
if (stringlowercmp("inline", content_disposition.value) ||
(stringlowercmp("text/plain", content_type.value) &&
stringlowercmp("text/html", content_type.value)) ) {
if (!filename.empty()) {
out += "\n";
if (m_forPreview)
out += "[" + dispindic + " " + content_type.value + ": ";
out += filename;
if (m_forPreview)
out += "]";
out += "\n\n";
}
LOGDEB(("walkmime: pushing attchmnt fn [%s]\n", filename.c_str()));
MHMailAttach *att = new MHMailAttach;
if (att == 0) {
LOGERR(("Out of memory\n"));
return;
}
att->m_contentType = content_type.value;
att->m_filename = filename;
att->m_charset = charset;
att->m_contentTransferEncoding = cte;
att->m_part = doc;
m_attachments.push_back(att);
return;
}
// We are dealing with an inline part of text/plain or text/html type
LOGDEB2(("walkmime: final: body start offset %d, length %d\n",
doc->getBodyStartOffset(), doc->getBodyLength()));
string body;
doc->getBody(body, 0, doc->bodylength);
// Decode according to content transfer encoding
if (!stringlowercmp("quoted-printable", cte)) {
string decoded;
if (!qp_decode(body, decoded)) {
LOGERR(("walkmime: quoted-printable decoding failed !\n"));
return;
}
body = decoded;
} else if (!stringlowercmp("base64", cte)) {
string decoded;
if (!base64_decode(body, decoded)) {
LOGERR(("walkmime: base64 decoding failed !\n"));
#if 0
FILE *fp = fopen("/tmp/recoll_decodefail", "w");
if (fp) {
fprintf(fp, "%s", body.c_str());
fclose(fp);
}
#endif
return;
}
body = decoded;
string decoded;
const string *bdp;
if (!decodeBody(cte, body, decoded, &bdp)) {
LOGERR(("MimeHandlerMail::walkmime: failed decoding body\n"));
}
if (bdp != &body)
body = decoded;
// Handle html stripping and transcoding to utf8
string utf8;
@ -390,6 +464,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
out += it->second;
} else {
// Transcode to utf-8
LOGDEB1(("walkmime: transcoding from %s to UTF-8\n", charset.c_str()));
if (!transcode(body, utf8, charset, "UTF-8")) {
LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
charset.c_str()));

View File

@ -16,7 +16,7 @@
*/
#ifndef _MAIL_H_INCLUDED_
#define _MAIL_H_INCLUDED_
/* @(#$Id: mh_mail.h,v 1.10 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mh_mail.h,v 1.11 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
#include <sstream>
#include <vector>
@ -29,6 +29,8 @@ namespace Binc {
class MimePart;
}
class MHMailAttach;
/**
* Translate a mail folder file into internal documents (also works
* for maildir files). This has to keep state while parsing a mail folder
@ -40,9 +42,15 @@ class MimeHandlerMail : public RecollFilter {
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
{}
virtual ~MimeHandlerMail();
virtual bool set_document_file(const string &file_path);
virtual bool set_document_string(const string &data);
virtual bool set_document_file(const string& file_path);
virtual bool set_document_string(const string& data);
virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
return true;
return false;
}
virtual bool next_document();
virtual bool skip_to_document(const string& ipath);
private:
bool processMsg(Binc::MimePart *doc, int depth);
@ -53,7 +61,16 @@ class MimeHandlerMail : public RecollFilter {
std::stringstream *m_stream;
int m_idx; // starts at -1 for self, then index into
// attachments;
vector<Binc::MimePart *> m_attachments;
vector<MHMailAttach *> m_attachments;
};
class MHMailAttach {
public:
string m_contentType;
string m_filename;
string m_charset;
string m_contentTransferEncoding;
Binc::MimePart *m_part;
};
#endif /* _MAIL_H_INCLUDED_ */

View File

@ -16,7 +16,7 @@
*/
#ifndef _MH_TEXT_H_INCLUDED_
#define _MH_TEXT_H_INCLUDED_
/* @(#$Id: mh_text.h,v 1.3 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mh_text.h,v 1.4 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
using std::string;
@ -34,6 +34,11 @@ class MimeHandlerText : public RecollFilter {
virtual ~MimeHandlerText() {}
virtual bool set_document_file(const string &file_path);
virtual bool set_document_string(const string&);
virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
return true;
return false;
}
virtual bool next_document();
private:
string m_text;

View File

@ -16,7 +16,7 @@
*/
#ifndef _MIMEHANDLER_H_INCLUDED_
#define _MIMEHANDLER_H_INCLUDED_
/* @(#$Id: mimehandler.h,v 1.13 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: mimehandler.h,v 1.14 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -66,8 +66,12 @@ public:
return false;
}
virtual DataInput get_required_data_input() const
{return DOCUMENT_FILE_NAME;}
virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME)
return true;
return false;
}
virtual string get_error() const {
return m_reason;
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: main.cpp,v 1.56 2006-12-05 15:23:50 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: main.cpp,v 1.57 2006-12-16 15:39:54 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -281,6 +281,7 @@ int main(int argc, char **argv)
// Connect exit handlers etc..
app.connect(&app, SIGNAL(lastWindowClosed()), &app, SLOT(quit()));
app.connect(&app, SIGNAL(aboutToQuit()), mainWindow, SLOT(close()));
QTimer *timer = new QTimer(&app);
mainWindow->connect(timer, SIGNAL(timeout()),
mainWindow, SLOT(periodic100()));

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.14 2006-12-14 13:53:43 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.15 2006-12-16 15:39:54 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -66,6 +66,7 @@ using std::pair;
#include "refcntr.h"
#include "ssearch_w.h"
#include "execmd.h"
#include "internfile.h"
#include "rclmain_w.h"
#include "moc_rclmain_w.cpp"
@ -161,15 +162,6 @@ void RclMain::init()
#endif
}
// We also want to get rid of the advanced search form and previews
// when we exit (not our children so that it's not systematically
// created over the main form).
bool RclMain::close(bool)
{
LOGDEB(("RclMain::close\n"));
fileExit();
return false;
}
//#define SHOWEVENTS
#if defined(SHOWEVENTS)
@ -257,9 +249,20 @@ static const char *eventTypeToStr(int tp)
}
#endif
// We also want to get rid of the advanced search form and previews
// when we exit (not our children so that it's not systematically
// created over the main form).
bool RclMain::close()
{
LOGDEB(("RclMain::close\n"));
fileExit();
return false;
}
void RclMain::fileExit()
{
LOGDEB1(("RclMain: fileExit\n"));
LOGDEB(("RclMain: fileExit\n"));
m_tempfiles.clear();
prefs.mainwidth = width();
prefs.mainheight = height();
prefs.ssearchTyp = sSearch->searchTypCMB->currentItem();
@ -686,15 +689,38 @@ void RclMain::startNativeViewer(int docnum)
}
}
string fn = urltolocalpath(doc.url);
string url = url_encode(doc.url, 7);
string ipath = doc.ipath;
// Substitute %u (url) and %f (file name) inside prototype command
// For files with an ipath, we do things differently depending if the
// configured command seems to be able to grok it or not.
bool wantsipath = cmd.find("%i") != string::npos;
bool istempfile = false;
string fn, url;
if (doc.ipath.empty() || wantsipath) {
fn = urltolocalpath(doc.url);
url = url_encode(doc.url, 7);
} else {
// There is an ipath and the command does not know about
// them. We need a temp file.
TempFile temp;
if (!FileInterner::idocTempFile(temp, rclconfig,
urltolocalpath(doc.url),
doc.ipath, doc.mimetype)) {
QMessageBox::warning(0, "Recoll",
tr("Cannot extract document or create "
"temporary file"));
return;
}
istempfile = true;
m_tempfiles.push_back(temp);
fn = temp->filename();
url = string("file://") + fn;
}
// Substitute %xx inside prototype command
string ncmd;
map<char, string> subs;
subs['u'] = escapeShell(url);
subs['f'] = escapeShell(fn);
subs['i'] = escapeShell(ipath);
subs['i'] = escapeShell(doc.ipath);
pcSubst(cmd, ncmd, subs);
ncmd += " &";
@ -707,7 +733,10 @@ void RclMain::startNativeViewer(int docnum)
QString::fromUtf8(prcmd.c_str()) + "]";
stb->message(msg, 5000);
}
g_dynconf->enterDoc(fn, doc.ipath);
if (!istempfile)
g_dynconf->enterDoc(fn, doc.ipath);
// We should actually monitor these processes so that we can
// delete the temp files when they exit
system(ncmd.c_str());
}

View File

@ -29,6 +29,7 @@
#include "searchdata.h"
#include "spell_w.h"
#include "refcntr.h"
#include "pathut.h"
#if QT_VERSION < 0x040000
#include "rclmain.h"
@ -64,9 +65,9 @@ public:
}
~RclMain() {}
virtual bool close( bool );
public slots:
virtual bool close();
virtual void fileExit();
virtual void periodic100();
virtual void startIndexing();
@ -103,7 +104,8 @@ private:
RefCntr<Rcl::SearchData> m_searchData;
DocSeqSortSpec m_sortspecs;
RefCntr<DocSequence> m_docSource;
vector<TempFile> m_tempfiles;
// Serial number of current search for this process.
// Used to match to preview windows
int m_searchId;