mail attachments sort of ok
This commit is contained in:
parent
1435101bb6
commit
8f1f2ca66d
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.20 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.21 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -21,6 +21,7 @@ static char rcsid[] = "@(#$Id: internfile.cpp,v 1.20 2006-12-15 16:33:15 dockes
|
|||||||
#ifndef TEST_INTERNFILE
|
#ifndef TEST_INTERNFILE
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
@ -41,6 +42,10 @@ using namespace std;
|
|||||||
#include "wipedir.h"
|
#include "wipedir.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
|
|
||||||
|
// The internal path element separator. This can't be the same as the rcldb
|
||||||
|
// file to ipath separator : "|"
|
||||||
|
static const string isep(":");
|
||||||
|
|
||||||
// Execute the command to uncompress a file into a temporary one.
|
// Execute the command to uncompress a file into a temporary one.
|
||||||
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
||||||
const list<string>& cmdv, const string& tdir,
|
const list<string>& cmdv, const string& tdir,
|
||||||
@ -133,7 +138,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
|||||||
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
|
if (!uncompressfile(m_cfg, m_fn, ucmd, m_tdir, m_tfile)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOGDEB(("internfile: after ucomp: m_tdir %s, tfile %s\n",
|
LOGDEB1(("internfile: after ucomp: m_tdir %s, tfile %s\n",
|
||||||
m_tdir.c_str(), m_tfile.c_str()));
|
m_tdir.c_str(), m_tfile.c_str()));
|
||||||
m_fn = m_tfile;
|
m_fn = m_tfile;
|
||||||
l_mime = mimetype(m_fn, m_cfg, usfci);
|
l_mime = mimetype(m_fn, m_cfg, usfci);
|
||||||
@ -167,77 +172,151 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
|||||||
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
|
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
m_handlers.reserve(20);
|
m_handlers.reserve(MAXHANDLERS);
|
||||||
|
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
||||||
|
m_tmpflgs[i] = false;
|
||||||
m_handlers.push_back(df);
|
m_handlers.push_back(df);
|
||||||
LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(),
|
LOGDEB(("FileInterner::FileInterner: %s [%s]\n", l_mime.c_str(),
|
||||||
m_fn.c_str()));
|
m_fn.c_str()));
|
||||||
|
m_targetMType = "text/plain";
|
||||||
}
|
}
|
||||||
|
|
||||||
FileInterner::~FileInterner()
|
FileInterner::~FileInterner()
|
||||||
{
|
{
|
||||||
while (!m_handlers.empty()) {
|
|
||||||
delete m_handlers.back();
|
|
||||||
m_handlers.pop_back();
|
|
||||||
}
|
|
||||||
tmpcleanup();
|
tmpcleanup();
|
||||||
|
for (vector<Dijon::Filter*>::iterator it = m_handlers.begin();
|
||||||
|
it != m_handlers.end(); it++)
|
||||||
|
delete *it;
|
||||||
|
// m_tempfiles will take care of itself
|
||||||
}
|
}
|
||||||
|
|
||||||
static const string string_empty;
|
bool FileInterner::dataToTempFile(const string& dt, const string& mt,
|
||||||
static const string get_mimetype(Dijon::Filter* df)
|
string& fn)
|
||||||
{
|
{
|
||||||
const std::map<std::string, std::string> *docdata = &df->get_meta_data();
|
// Find appropriate suffix for mime type
|
||||||
map<string,string>::const_iterator it;
|
TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
|
||||||
it = docdata->find("mimetype");
|
if (temp->ok()) {
|
||||||
if (it != docdata->end()) {
|
m_tmpflgs[m_handlers.size()-1] = true;
|
||||||
return it->second;
|
m_tempfiles.push_back(temp);
|
||||||
} else {
|
} else {
|
||||||
return string_empty;
|
LOGERR(("FileInterner::dataToTempFile: cant create tempfile\n"));
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int fd = open(temp->filename(), O_WRONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
LOGERR(("FileInterner::dataToTempFile: open(%s) failed errno %d\n",
|
||||||
|
temp->filename(), errno));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (write(fd, dt.c_str(), dt.length()) != (int)dt.length()) {
|
||||||
|
close(fd);
|
||||||
|
LOGERR(("FileInterner::dataToTempFile: write to %s failed errno %d\n",
|
||||||
|
temp->filename(), errno));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
fn = temp->filename();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool getKeyValue(const map<string, string>& docdata,
|
||||||
|
const string& key, string& value)
|
||||||
|
{
|
||||||
|
map<string,string>::const_iterator it;
|
||||||
|
it = docdata.find(key);
|
||||||
|
if (it != docdata.end()) {
|
||||||
|
value = it->second;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const string keyab("abstract");
|
||||||
|
static const string keycs("charset");
|
||||||
|
static const string keyct("content");
|
||||||
|
static const string keyfn("filename");
|
||||||
|
static const string keykw("keywords");
|
||||||
|
static const string keymd("modificationdate");
|
||||||
|
static const string keymt("mimetype");
|
||||||
|
static const string keyoc("origcharset");
|
||||||
|
static const string keysm("sample");
|
||||||
|
static const string keytt("title");
|
||||||
|
|
||||||
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
||||||
{
|
{
|
||||||
Dijon::Filter *df = m_handlers.back();
|
Dijon::Filter *df = m_handlers.back();
|
||||||
const std::map<std::string, std::string> *docdata = &df->get_meta_data();
|
const std::map<std::string, std::string>& docdata = df->get_meta_data();
|
||||||
map<string,string>::const_iterator it;
|
|
||||||
|
|
||||||
it = docdata->find("origcharset");
|
getKeyValue(docdata, keyoc, doc.origcharset);
|
||||||
if (it != docdata->end())
|
getKeyValue(docdata, keyct, doc.text);
|
||||||
doc.origcharset = it->second;
|
getKeyValue(docdata, keytt, doc.title);
|
||||||
|
getKeyValue(docdata, keykw, doc.keywords);
|
||||||
it = docdata->find("content");
|
getKeyValue(docdata, keymd, doc.dmtime);
|
||||||
if (it != docdata->end())
|
if (!getKeyValue(docdata, keyab, doc.abstract))
|
||||||
doc.text = it->second;
|
getKeyValue(docdata, keysm, doc.abstract);
|
||||||
|
LOGDEB1(("FILENAME: %s\n", doc.utf8fn.c_str()));
|
||||||
it = docdata->find("title");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.title = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("keywords");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.keywords = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("modificationdate");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.dmtime = it->second;
|
|
||||||
|
|
||||||
it = docdata->find("abstract");
|
|
||||||
if (it != docdata->end()) {
|
|
||||||
doc.abstract = it->second;
|
|
||||||
} else {
|
|
||||||
it = docdata->find("sample");
|
|
||||||
if (it != docdata->end())
|
|
||||||
doc.abstract = it->second;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collect the ipath stack.
|
||||||
|
// While we're at it, we also set the mimetype and filename, which are special
|
||||||
|
// properties: we want to get them from the topmost doc
|
||||||
|
// with an ipath, not the last one which is usually text/plain
|
||||||
|
void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath)
|
||||||
|
{
|
||||||
|
bool hasipath = false;
|
||||||
|
|
||||||
static const unsigned int MAXHANDLERS = 20;
|
// If there is no ipath stack, the mimetype is the one from the file
|
||||||
|
doc.mimetype = m_mimetype;
|
||||||
|
LOGDEB2(("INITIAL mimetype: %s\n", doc.mimetype.c_str()));
|
||||||
|
|
||||||
|
string ipathel;
|
||||||
|
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
||||||
|
hit != m_handlers.end(); hit++) {
|
||||||
|
const map<string, string>& docdata = (*hit)->get_meta_data();
|
||||||
|
if (getKeyValue(docdata, "ipath", ipathel)) {
|
||||||
|
if (!ipathel.empty()) {
|
||||||
|
// We have a non-empty ipath
|
||||||
|
hasipath = true;
|
||||||
|
getKeyValue(docdata, keymt, doc.mimetype);
|
||||||
|
getKeyValue(docdata, keyfn, doc.utf8fn);
|
||||||
|
}
|
||||||
|
ipath += ipathel + isep;
|
||||||
|
} else {
|
||||||
|
ipath += isep;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim empty tail elements in ipath.
|
||||||
|
if (hasipath) {
|
||||||
|
LOGDEB2(("IPATH [%s]\n", ipath.c_str()));
|
||||||
|
string::size_type sit = ipath.find_last_not_of(isep);
|
||||||
|
if (sit == string::npos)
|
||||||
|
ipath.erase();
|
||||||
|
else if (sit < ipath.length() -1)
|
||||||
|
ipath.erase(sit+1);
|
||||||
|
} else {
|
||||||
|
ipath.erase();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove handler from stack. Clean up temp file if needed.
|
||||||
|
void FileInterner::popHandler()
|
||||||
|
{
|
||||||
|
int i = m_handlers.size()-1;
|
||||||
|
if (m_tmpflgs[i]) {
|
||||||
|
m_tempfiles.pop_back();
|
||||||
|
m_tmpflgs[i] = false;
|
||||||
|
}
|
||||||
|
delete m_handlers.back();
|
||||||
|
m_handlers.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
||||||
{
|
{
|
||||||
if (m_handlers.size() != 1) {
|
LOGDEB(("FileInterner::internfile. ipath [%s]\n", ipath.c_str()));
|
||||||
|
if (m_handlers.size() < 1) {
|
||||||
LOGERR(("FileInterner::internfile: bad stack size %d !!\n",
|
LOGERR(("FileInterner::internfile: bad stack size %d !!\n",
|
||||||
m_handlers.size()));
|
m_handlers.size()));
|
||||||
return FIError;
|
return FIError;
|
||||||
@ -252,7 +331,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
int vipathidx = 0;
|
int vipathidx = 0;
|
||||||
if (!ipath.empty()) {
|
if (!ipath.empty()) {
|
||||||
list<string> lipath;
|
list<string> lipath;
|
||||||
stringToTokens(ipath, lipath, "|", true);
|
stringToTokens(ipath, lipath, isep, true);
|
||||||
vipath.insert(vipath.begin(), lipath.begin(), lipath.end());
|
vipath.insert(vipath.begin(), lipath.begin(), lipath.end());
|
||||||
if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
|
if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
|
||||||
LOGERR(("FileInterner::internfile: can't skip\n"));
|
LOGERR(("FileInterner::internfile: can't skip\n"));
|
||||||
@ -261,12 +340,17 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Try to get doc from the topmost filter */
|
/* Try to get doc from the topmost filter */
|
||||||
|
// Security counter: we try not to loop but ...
|
||||||
|
int loop = 0;
|
||||||
while (!m_handlers.empty()) {
|
while (!m_handlers.empty()) {
|
||||||
|
if (loop++ > 30) {
|
||||||
|
LOGERR(("FileInterner:: looping!\n"));
|
||||||
|
return FIError;
|
||||||
|
}
|
||||||
if (!m_handlers.back()->has_documents()) {
|
if (!m_handlers.back()->has_documents()) {
|
||||||
// No docs at the current top level. Pop and see if there
|
// No docs at the current top level. Pop and see if there
|
||||||
// is something at the previous one
|
// is something at the previous one
|
||||||
delete m_handlers.back();
|
popHandler();
|
||||||
m_handlers.pop_back();
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -276,21 +360,16 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Look at what we've got
|
// Look at what we've got
|
||||||
const std::map<std::string, std::string> *docdata =
|
const std::map<std::string, std::string>& docdata =
|
||||||
&m_handlers.back()->get_meta_data();
|
m_handlers.back()->get_meta_data();
|
||||||
map<string,string>::const_iterator it;
|
string charset, mimetype;
|
||||||
string charset;
|
getKeyValue(docdata, keycs, charset);
|
||||||
it = docdata->find("charset");
|
getKeyValue(docdata, keymt, mimetype);
|
||||||
if (it != docdata->end())
|
|
||||||
charset = it->second;
|
|
||||||
string mimetype;
|
|
||||||
it = docdata->find("mimetype");
|
|
||||||
if (it != docdata->end())
|
|
||||||
mimetype = it->second;
|
|
||||||
|
|
||||||
LOGDEB(("FileInterner::internfile:next_doc is %s\n",mimetype.c_str()));
|
LOGDEB(("FileInterner::internfile: next_doc is %s\n",
|
||||||
|
mimetype.c_str()));
|
||||||
// If we find a text/plain doc, we're done
|
// If we find a text/plain doc, we're done
|
||||||
if (!strcmp(mimetype.c_str(), "text/plain"))
|
if (!stringicmp(mimetype, m_targetMType))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Got a non text/plain doc. We need to stack another
|
// Got a non text/plain doc. We need to stack another
|
||||||
@ -298,7 +377,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
if (m_handlers.size() > MAXHANDLERS) {
|
if (m_handlers.size() > MAXHANDLERS) {
|
||||||
// Stack too big. Skip this and go on to check if there is
|
// Stack too big. Skip this and go on to check if there is
|
||||||
// something else in the current back()
|
// something else in the current back()
|
||||||
LOGDEB(("FileInterner::internfile: stack too high\n"));
|
LOGINFO(("FileInterner::internfile: stack too high\n"));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -306,7 +385,7 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
if (!again) {
|
if (!again) {
|
||||||
// If we can't find a filter, this doc can't be handled
|
// If we can't find a filter, this doc can't be handled
|
||||||
// but there can be other ones so we go on
|
// but there can be other ones so we go on
|
||||||
LOGERR(("FileInterner::internfile: no filter for [%s]\n",
|
LOGINFO(("FileInterner::internfile: no filter for [%s]\n",
|
||||||
mimetype.c_str()));
|
mimetype.c_str()));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -316,18 +395,37 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
charset);
|
charset);
|
||||||
string ns;
|
string ns;
|
||||||
const string *txt = &ns;
|
const string *txt = &ns;
|
||||||
it = docdata->find("content");
|
map<string,string>::const_iterator it;
|
||||||
if (it != docdata->end())
|
it = docdata.find("content");
|
||||||
|
if (it != docdata.end())
|
||||||
txt = &it->second;
|
txt = &it->second;
|
||||||
if (!again->set_document_string(*txt)) {
|
|
||||||
LOGERR(("FileInterner::internfile: error reparsing for %s\n",
|
bool setres = false;
|
||||||
|
if (again->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
||||||
|
setres = again->set_document_string(*txt);
|
||||||
|
} else if (again->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
||||||
|
setres = again->set_document_data(txt->c_str(), txt->length());
|
||||||
|
}else if(again->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
||||||
|
string filename;
|
||||||
|
if (dataToTempFile(*txt, mimetype, filename)) {
|
||||||
|
if (!(setres = again->set_document_file(filename))) {
|
||||||
|
m_tmpflgs[m_handlers.size()-1] = false;
|
||||||
|
m_tempfiles.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!setres) {
|
||||||
|
LOGINFO(("FileInterner::internfile: set_doc failed inside %s\n",
|
||||||
m_fn.c_str()));
|
m_fn.c_str()));
|
||||||
delete again;
|
delete again;
|
||||||
|
if (m_forPreview)
|
||||||
|
return FIError;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// add filter and go on
|
// add filter and go on, maybe this one will give us text...
|
||||||
m_handlers.push_back(again);
|
m_handlers.push_back(again);
|
||||||
if (!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
|
if (!ipath.empty() &&
|
||||||
|
!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
|
||||||
LOGERR(("FileInterner::internfile: can't skip\n"));
|
LOGERR(("FileInterner::internfile: can't skip\n"));
|
||||||
return FIError;
|
return FIError;
|
||||||
}
|
}
|
||||||
@ -338,64 +436,79 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
return FIError;
|
return FIError;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If indexing, we have to collect the ipath stack.
|
// If indexing compute ipath and significant mimetype
|
||||||
|
|
||||||
// While we're at it, we also set the mimetype, which is a special
|
|
||||||
// property:we want to get it from the topmost doc
|
|
||||||
// with an ipath, not the last one which is always text/html
|
|
||||||
// Note that ipath is returned through the parameter not doc.ipath
|
// Note that ipath is returned through the parameter not doc.ipath
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview)
|
||||||
bool hasipath = false;
|
collectIpathAndMT(doc, ipath);
|
||||||
doc.mimetype = m_mimetype;
|
|
||||||
LOGDEB2(("INITIAL mimetype: %s\n", doc.mimetype.c_str()));
|
|
||||||
map<string,string>::const_iterator titi;
|
|
||||||
|
|
||||||
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
|
||||||
hit != m_handlers.end(); hit++) {
|
|
||||||
|
|
||||||
const map<string, string>& docdata = (*hit)->get_meta_data();
|
|
||||||
map<string, string>::const_iterator iti = docdata.find("ipath");
|
|
||||||
|
|
||||||
if (iti != docdata.end()) {
|
|
||||||
if (!iti->second.empty()) {
|
|
||||||
// We have a non-empty ipath
|
|
||||||
hasipath = true;
|
|
||||||
titi = docdata.find("mimetype");
|
|
||||||
if (titi != docdata.end())
|
|
||||||
doc.mimetype = titi->second;
|
|
||||||
}
|
|
||||||
ipath += iti->second + "|";
|
|
||||||
} else {
|
|
||||||
ipath += "|";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Walk done, transform the list into a string
|
|
||||||
if (hasipath) {
|
|
||||||
LOGDEB2(("IPATH [%s]\n", ipath.c_str()));
|
|
||||||
string::size_type sit = ipath.find_last_not_of("|");
|
|
||||||
if (sit == string::npos)
|
|
||||||
ipath.erase();
|
|
||||||
else if (sit < ipath.length() -1)
|
|
||||||
ipath.erase(sit+1);
|
|
||||||
} else {
|
|
||||||
ipath.erase();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
dijontorcl(doc);
|
dijontorcl(doc);
|
||||||
|
|
||||||
// Destack what can be
|
// Destack what can be
|
||||||
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
||||||
delete m_handlers.back();
|
popHandler();
|
||||||
m_handlers.pop_back();
|
|
||||||
}
|
}
|
||||||
if (m_handlers.empty() || !m_handlers.back()->has_documents())
|
if (m_handlers.empty())
|
||||||
return FIDone;
|
return FIDone;
|
||||||
else
|
else
|
||||||
return FIAgain;
|
return FIAgain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DirWiper {
|
||||||
|
public:
|
||||||
|
string dir;
|
||||||
|
bool do_it;
|
||||||
|
DirWiper(string d) : dir(d), do_it(true) {}
|
||||||
|
~DirWiper() {
|
||||||
|
if (do_it) {
|
||||||
|
wipedir(dir);
|
||||||
|
rmdir(dir.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
bool FileInterner::idocTempFile(TempFile& otemp, RclConfig *cnf,
|
||||||
|
const string& fn, const string& ipath,
|
||||||
|
const string& mtype)
|
||||||
|
{
|
||||||
|
string tmpdir, reason;
|
||||||
|
if (!maketmpdir(tmpdir, reason))
|
||||||
|
return false;
|
||||||
|
DirWiper wiper(tmpdir);
|
||||||
|
|
||||||
|
FileInterner interner(fn, cnf, tmpdir, &mtype);
|
||||||
|
interner.setTargetMType(mtype);
|
||||||
|
Rcl::Doc doc;
|
||||||
|
string mipath = ipath;
|
||||||
|
Status ret = interner.internfile(doc, mipath);
|
||||||
|
if (ret == FileInterner::FIError) {
|
||||||
|
LOGERR(("FileInterner::idocTempFile: internfile() failed\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
TempFile temp(new TempFileInternal(cnf->getSuffixFromMimeType(mtype)));
|
||||||
|
if (!temp->ok()) {
|
||||||
|
LOGERR(("FileInterner::idocTempFile: cannot create temporary file"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int fd = open(temp->filename(), O_WRONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
LOGERR(("FileInterner::idocTempFile: open(%s) failed errno %d\n",
|
||||||
|
temp->filename(), errno));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const string& dt = doc.text;
|
||||||
|
if (write(fd, dt.c_str(), dt.length()) != (int)dt.length()) {
|
||||||
|
close(fd);
|
||||||
|
LOGERR(("FileInterner::idocTempFile: write to %s failed errno %d\n",
|
||||||
|
temp->filename(), errno));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
otemp = temp;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|||||||
@ -16,13 +16,14 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _INTERNFILE_H_INCLUDED_
|
#ifndef _INTERNFILE_H_INCLUDED_
|
||||||
#define _INTERNFILE_H_INCLUDED_
|
#define _INTERNFILE_H_INCLUDED_
|
||||||
/* @(#$Id: internfile.h,v 1.8 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: internfile.h,v 1.9 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
|
#include "pathut.h"
|
||||||
#include "Filter.h"
|
#include "Filter.h"
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
@ -30,10 +31,13 @@ namespace Rcl {
|
|||||||
class Doc;
|
class Doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Turn external file into internal representation, according to mime
|
/**
|
||||||
/// type etc
|
* A class to convert a file into possibly multiple documents in internal
|
||||||
|
* representation.
|
||||||
|
*/
|
||||||
class FileInterner {
|
class FileInterner {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Identify and possibly decompress file, create adequate
|
* Identify and possibly decompress file, create adequate
|
||||||
* handler. The mtype parameter is only set when the object is
|
* handler. The mtype parameter is only set when the object is
|
||||||
@ -70,20 +74,39 @@ class FileInterner {
|
|||||||
* should be called again to get the following one(s).
|
* should be called again to get the following one(s).
|
||||||
*/
|
*/
|
||||||
Status internfile(Rcl::Doc& doc, string &ipath);
|
Status internfile(Rcl::Doc& doc, string &ipath);
|
||||||
|
|
||||||
|
/** Return the file's mimetype (useful for container files) */
|
||||||
const string& get_mimetype() {return m_mimetype;}
|
const string& get_mimetype() {return m_mimetype;}
|
||||||
|
|
||||||
|
/** We normally always return text/plain data. A caller can request
|
||||||
|
* that we stop conversion at the native document type (ie: text/html)
|
||||||
|
*/
|
||||||
|
void setTargetMType(const string& tp) {m_targetMType = tp;}
|
||||||
|
|
||||||
|
/** Utility function: extract internal document and make temporary file */
|
||||||
|
static bool idocTempFile(TempFile& temp, RclConfig *cnf, const string& fn,
|
||||||
|
const string& ipath, const string& mtype);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static const unsigned int MAXHANDLERS = 20;
|
||||||
RclConfig *m_cfg;
|
RclConfig *m_cfg;
|
||||||
string m_fn;
|
string m_fn;
|
||||||
string m_mimetype; // Mime type for [uncompressed] file
|
string m_mimetype; // Mime type for [uncompressed] file
|
||||||
bool m_forPreview;
|
bool m_forPreview;
|
||||||
|
string m_targetMType;
|
||||||
// m_tdir and m_tfile are used only for decompressing input file if needed
|
// m_tdir and m_tfile are used only for decompressing input file if needed
|
||||||
const string& m_tdir;
|
const string& m_tdir;
|
||||||
string m_tfile;
|
string m_tfile;
|
||||||
vector<Dijon::Filter*> m_handlers;
|
vector<Dijon::Filter*> m_handlers;
|
||||||
|
bool m_tmpflgs[MAXHANDLERS];
|
||||||
|
vector<TempFile> m_tempfiles;
|
||||||
|
|
||||||
void tmpcleanup();
|
void tmpcleanup();
|
||||||
bool dijontorcl(Rcl::Doc&);
|
bool dijontorcl(Rcl::Doc&);
|
||||||
|
void collectIpathAndMT(Rcl::Doc&, string& ipath);
|
||||||
|
bool dataToTempFile(const string& data, const string& mt, string& fn);
|
||||||
|
void popHandler();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif /* _INTERNFILE_H_INCLUDED_ */
|
#endif /* _INTERNFILE_H_INCLUDED_ */
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _HTML_H_INCLUDED_
|
#ifndef _HTML_H_INCLUDED_
|
||||||
#define _HTML_H_INCLUDED_
|
#define _HTML_H_INCLUDED_
|
||||||
/* @(#$Id: mh_html.h,v 1.8 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_html.h,v 1.9 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -31,6 +31,11 @@ class MimeHandlerHtml : public RecollFilter {
|
|||||||
virtual ~MimeHandlerHtml() {}
|
virtual ~MimeHandlerHtml() {}
|
||||||
virtual bool set_document_file(const string &file_path);
|
virtual bool set_document_file(const string &file_path);
|
||||||
virtual bool set_document_string(const string &data);
|
virtual bool set_document_string(const string &data);
|
||||||
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
private:
|
private:
|
||||||
string m_html;
|
string m_html;
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.25 2006-12-15 16:33:15 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.26 2006-12-16 15:39:54 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -49,9 +49,15 @@ MimeHandlerMail::~MimeHandlerMail()
|
|||||||
if (m_fd >= 0)
|
if (m_fd >= 0)
|
||||||
close(m_fd);
|
close(m_fd);
|
||||||
delete m_stream;
|
delete m_stream;
|
||||||
|
for (vector<MHMailAttach*>::iterator it = m_attachments.begin();
|
||||||
|
it != m_attachments.end(); it++) {
|
||||||
|
delete *it;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::set_document_file(const string &fn)
|
bool MimeHandlerMail::set_document_file(const string &fn)
|
||||||
{
|
{
|
||||||
|
LOGDEB(("MimeHandlerMail::set_document_file(%s)\n", fn.c_str()));
|
||||||
if (m_fd >= 0) {
|
if (m_fd >= 0) {
|
||||||
close(m_fd);
|
close(m_fd);
|
||||||
m_fd = -1;
|
m_fd = -1;
|
||||||
@ -76,6 +82,7 @@ bool MimeHandlerMail::set_document_file(const string &fn)
|
|||||||
|
|
||||||
bool MimeHandlerMail::set_document_string(const string &msgtxt)
|
bool MimeHandlerMail::set_document_string(const string &msgtxt)
|
||||||
{
|
{
|
||||||
|
LOGDEB1(("MimeHandlerMail::set_document_string\n"));
|
||||||
LOGDEB2(("Message text: [%s]\n", msgtxt.c_str()));
|
LOGDEB2(("Message text: [%s]\n", msgtxt.c_str()));
|
||||||
delete m_stream;
|
delete m_stream;
|
||||||
m_stream = new stringstream(msgtxt);
|
m_stream = new stringstream(msgtxt);
|
||||||
@ -90,15 +97,36 @@ bool MimeHandlerMail::set_document_string(const string &msgtxt)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MimeHandlerMail::skip_to_document(const string& ipath)
|
||||||
|
{
|
||||||
|
LOGDEB(("MimeHandlerMail::skip_to_document(%s)\n", ipath.c_str()));
|
||||||
|
if (m_idx == -1) {
|
||||||
|
// No decoding done yet. If ipath is null need do nothing
|
||||||
|
if (ipath == "" || ipath == "-1")
|
||||||
|
return true;
|
||||||
|
// ipath points to attachment: need to decode message
|
||||||
|
if (!next_document()) {
|
||||||
|
LOGERR(("MimeHandlerMail::skip_to_doc: next_document failed\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m_idx = atoi(ipath.c_str());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::next_document()
|
bool MimeHandlerMail::next_document()
|
||||||
{
|
{
|
||||||
|
LOGDEB(("MimeHandlerMail::next_document m_idx %d m_havedoc %d\n",
|
||||||
|
m_idx, m_havedoc));
|
||||||
if (!m_havedoc)
|
if (!m_havedoc)
|
||||||
return false;
|
return false;
|
||||||
bool res = false;
|
bool res = false;
|
||||||
|
|
||||||
if (m_idx == -1) {
|
if (m_idx == -1) {
|
||||||
m_metaData["mimetype"] = "text/plain";
|
m_metaData["mimetype"] = "text/plain";
|
||||||
res =processMsg(m_bincdoc, 0);
|
res = processMsg(m_bincdoc, 0);
|
||||||
|
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
||||||
|
m_metaData["mimetype"].c_str()));
|
||||||
} else {
|
} else {
|
||||||
res = processAttach();
|
res = processAttach();
|
||||||
}
|
}
|
||||||
@ -107,9 +135,61 @@ bool MimeHandlerMail::next_document()
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decode according to content transfer encoding
|
||||||
|
static bool decodeBody(const string& cte, const string& body, string& decoded,
|
||||||
|
const string** respp)
|
||||||
|
{
|
||||||
|
// By default, there is no encoding (7bit,8bit,raw). Also in case of
|
||||||
|
// decoding error
|
||||||
|
*respp = &body;
|
||||||
|
|
||||||
|
if (!stringlowercmp("quoted-printable", cte)) {
|
||||||
|
if (!qp_decode(body, decoded)) {
|
||||||
|
LOGERR(("decodeBody: quoted-printable decoding failed !\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*respp = &decoded;
|
||||||
|
} else if (!stringlowercmp("base64", cte)) {
|
||||||
|
if (!base64_decode(body, decoded)) {
|
||||||
|
LOGERR(("decodeBody: base64 decoding failed !. body [%s]\n",
|
||||||
|
body.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*respp = &decoded;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool MimeHandlerMail::processAttach()
|
bool MimeHandlerMail::processAttach()
|
||||||
{
|
{
|
||||||
return false;
|
LOGDEB(("MimeHandlerMail::processAttach() m_idx %d\n", m_idx));
|
||||||
|
if (!m_havedoc)
|
||||||
|
return false;
|
||||||
|
if (m_idx >= (int)m_attachments.size()) {
|
||||||
|
m_havedoc = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
MHMailAttach *att = m_attachments[m_idx];
|
||||||
|
|
||||||
|
LOGDEB1(("processAttach:content-type: %s\n", att->m_contentType.c_str()));
|
||||||
|
m_metaData["mimetype"] = att->m_contentType;
|
||||||
|
m_metaData["charset"] = att->m_charset;
|
||||||
|
m_metaData["filename"] = att->m_filename;
|
||||||
|
|
||||||
|
m_metaData["content"] = "";
|
||||||
|
string& body = m_metaData["content"];
|
||||||
|
att->m_part->getBody(body, 0, att->m_part->bodylength);
|
||||||
|
string decoded;
|
||||||
|
const string *bdp;
|
||||||
|
if (!decodeBody(att->m_contentTransferEncoding, body, decoded, &bdp)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (bdp != &body)
|
||||||
|
body = decoded;
|
||||||
|
char nbuf[10];
|
||||||
|
sprintf(nbuf, "%d", m_idx);
|
||||||
|
m_metaData["ipath"] = nbuf;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transform a single message into a document. The subject becomes the
|
// Transform a single message into a document. The subject becomes the
|
||||||
@ -124,7 +204,7 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
|||||||
LOGDEB2(("MimeHandlerMail::processMsg: depth %d\n", depth));
|
LOGDEB2(("MimeHandlerMail::processMsg: depth %d\n", depth));
|
||||||
if (depth++ >= maxdepth) {
|
if (depth++ >= maxdepth) {
|
||||||
// Have to stop somewhere
|
// Have to stop somewhere
|
||||||
LOGDEB(("MimeHandlerMail::processMsg: maxdepth %d exceeded\n",
|
LOGINFO(("MimeHandlerMail::processMsg: maxdepth %d exceeded\n",
|
||||||
maxdepth));
|
maxdepth));
|
||||||
// Return true anyway, better to index partially than not at all
|
// Return true anyway, better to index partially than not at all
|
||||||
return true;
|
return true;
|
||||||
@ -218,7 +298,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
// Get and parse content-type header
|
// Get and parse content-type header
|
||||||
Binc::HeaderItem hi;
|
Binc::HeaderItem hi;
|
||||||
if (!it->h.getFirstHeader("Content-Type", hi)) {
|
if (!it->h.getFirstHeader("Content-Type", hi)) {
|
||||||
LOGDEB(("No content-type header for part %d\n", i));
|
LOGDEB(("walkmime:no ctent-type header for part %d\n", i));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
MimeHeaderValue content_type;
|
MimeHeaderValue content_type;
|
||||||
@ -297,30 +377,6 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
|
|
||||||
// "Simple" part.
|
// "Simple" part.
|
||||||
LOGDEB2(("walkmime: simple part\n"));
|
LOGDEB2(("walkmime: simple part\n"));
|
||||||
|
|
||||||
// If the Content-Disposition is not inline, we treat it as
|
|
||||||
// attachment, as per rfc2183. We don't process attachments
|
|
||||||
// for now, except for indexing/displaying the file name
|
|
||||||
// If it is inline but not text or html, same thing.
|
|
||||||
if (stringlowercmp("inline", content_disposition.value) ||
|
|
||||||
(stringlowercmp("text/plain", content_type.value) &&
|
|
||||||
stringlowercmp("text/html", content_type.value)) ) {
|
|
||||||
if (!filename.empty()) {
|
|
||||||
out += "\n";
|
|
||||||
if (m_forPreview)
|
|
||||||
out += "[" + dispindic + " " + content_type.value + ": ";
|
|
||||||
out += filename;
|
|
||||||
if (m_forPreview)
|
|
||||||
out += "]";
|
|
||||||
out += "\n\n";
|
|
||||||
}
|
|
||||||
// m_attachments.push_back(&doc);
|
|
||||||
// We're done with this part
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We are dealing with an inline part of text/plain or text/html type
|
|
||||||
|
|
||||||
// Normally the default charset is us-ascii. But it happens that
|
// Normally the default charset is us-ascii. But it happens that
|
||||||
// 8 bit chars exist in a message that is stated as us-ascii. Ie the
|
// 8 bit chars exist in a message that is stated as us-ascii. Ie the
|
||||||
// mailer used by yahoo support ('KANA') does this. We could convert
|
// mailer used by yahoo support ('KANA') does this. We could convert
|
||||||
@ -345,34 +401,52 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
cte = hi.getValue();
|
cte = hi.getValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the Content-Disposition is not inline, we treat it as
|
||||||
|
// attachment, as per rfc2183. We don't process attachments
|
||||||
|
// for now, except for indexing/displaying the file name
|
||||||
|
// If it is inline but not text or html, same thing.
|
||||||
|
if (stringlowercmp("inline", content_disposition.value) ||
|
||||||
|
(stringlowercmp("text/plain", content_type.value) &&
|
||||||
|
stringlowercmp("text/html", content_type.value)) ) {
|
||||||
|
if (!filename.empty()) {
|
||||||
|
out += "\n";
|
||||||
|
if (m_forPreview)
|
||||||
|
out += "[" + dispindic + " " + content_type.value + ": ";
|
||||||
|
out += filename;
|
||||||
|
if (m_forPreview)
|
||||||
|
out += "]";
|
||||||
|
out += "\n\n";
|
||||||
|
}
|
||||||
|
LOGDEB(("walkmime: pushing attchmnt fn [%s]\n", filename.c_str()));
|
||||||
|
MHMailAttach *att = new MHMailAttach;
|
||||||
|
if (att == 0) {
|
||||||
|
LOGERR(("Out of memory\n"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
att->m_contentType = content_type.value;
|
||||||
|
att->m_filename = filename;
|
||||||
|
att->m_charset = charset;
|
||||||
|
att->m_contentTransferEncoding = cte;
|
||||||
|
att->m_part = doc;
|
||||||
|
m_attachments.push_back(att);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are dealing with an inline part of text/plain or text/html type
|
||||||
|
|
||||||
|
|
||||||
LOGDEB2(("walkmime: final: body start offset %d, length %d\n",
|
LOGDEB2(("walkmime: final: body start offset %d, length %d\n",
|
||||||
doc->getBodyStartOffset(), doc->getBodyLength()));
|
doc->getBodyStartOffset(), doc->getBodyLength()));
|
||||||
string body;
|
string body;
|
||||||
doc->getBody(body, 0, doc->bodylength);
|
doc->getBody(body, 0, doc->bodylength);
|
||||||
|
|
||||||
// Decode according to content transfer encoding
|
string decoded;
|
||||||
if (!stringlowercmp("quoted-printable", cte)) {
|
const string *bdp;
|
||||||
string decoded;
|
if (!decodeBody(cte, body, decoded, &bdp)) {
|
||||||
if (!qp_decode(body, decoded)) {
|
LOGERR(("MimeHandlerMail::walkmime: failed decoding body\n"));
|
||||||
LOGERR(("walkmime: quoted-printable decoding failed !\n"));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
body = decoded;
|
|
||||||
} else if (!stringlowercmp("base64", cte)) {
|
|
||||||
string decoded;
|
|
||||||
if (!base64_decode(body, decoded)) {
|
|
||||||
LOGERR(("walkmime: base64 decoding failed !\n"));
|
|
||||||
#if 0
|
|
||||||
FILE *fp = fopen("/tmp/recoll_decodefail", "w");
|
|
||||||
if (fp) {
|
|
||||||
fprintf(fp, "%s", body.c_str());
|
|
||||||
fclose(fp);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
body = decoded;
|
|
||||||
}
|
}
|
||||||
|
if (bdp != &body)
|
||||||
|
body = decoded;
|
||||||
|
|
||||||
// Handle html stripping and transcoding to utf8
|
// Handle html stripping and transcoding to utf8
|
||||||
string utf8;
|
string utf8;
|
||||||
@ -390,6 +464,7 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
out += it->second;
|
out += it->second;
|
||||||
} else {
|
} else {
|
||||||
// Transcode to utf-8
|
// Transcode to utf-8
|
||||||
|
LOGDEB1(("walkmime: transcoding from %s to UTF-8\n", charset.c_str()));
|
||||||
if (!transcode(body, utf8, charset, "UTF-8")) {
|
if (!transcode(body, utf8, charset, "UTF-8")) {
|
||||||
LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
|
LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
|
||||||
charset.c_str()));
|
charset.c_str()));
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MAIL_H_INCLUDED_
|
#ifndef _MAIL_H_INCLUDED_
|
||||||
#define _MAIL_H_INCLUDED_
|
#define _MAIL_H_INCLUDED_
|
||||||
/* @(#$Id: mh_mail.h,v 1.10 2006-12-15 16:33:15 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_mail.h,v 1.11 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -29,6 +29,8 @@ namespace Binc {
|
|||||||
class MimePart;
|
class MimePart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class MHMailAttach;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Translate a mail folder file into internal documents (also works
|
* Translate a mail folder file into internal documents (also works
|
||||||
* for maildir files). This has to keep state while parsing a mail folder
|
* for maildir files). This has to keep state while parsing a mail folder
|
||||||
@ -40,9 +42,15 @@ class MimeHandlerMail : public RecollFilter {
|
|||||||
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||||
{}
|
{}
|
||||||
virtual ~MimeHandlerMail();
|
virtual ~MimeHandlerMail();
|
||||||
virtual bool set_document_file(const string &file_path);
|
virtual bool set_document_file(const string& file_path);
|
||||||
virtual bool set_document_string(const string &data);
|
virtual bool set_document_string(const string& data);
|
||||||
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
|
virtual bool skip_to_document(const string& ipath);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool processMsg(Binc::MimePart *doc, int depth);
|
bool processMsg(Binc::MimePart *doc, int depth);
|
||||||
@ -53,7 +61,16 @@ class MimeHandlerMail : public RecollFilter {
|
|||||||
std::stringstream *m_stream;
|
std::stringstream *m_stream;
|
||||||
int m_idx; // starts at -1 for self, then index into
|
int m_idx; // starts at -1 for self, then index into
|
||||||
// attachments;
|
// attachments;
|
||||||
vector<Binc::MimePart *> m_attachments;
|
vector<MHMailAttach *> m_attachments;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MHMailAttach {
|
||||||
|
public:
|
||||||
|
string m_contentType;
|
||||||
|
string m_filename;
|
||||||
|
string m_charset;
|
||||||
|
string m_contentTransferEncoding;
|
||||||
|
Binc::MimePart *m_part;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _MAIL_H_INCLUDED_ */
|
#endif /* _MAIL_H_INCLUDED_ */
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MH_TEXT_H_INCLUDED_
|
#ifndef _MH_TEXT_H_INCLUDED_
|
||||||
#define _MH_TEXT_H_INCLUDED_
|
#define _MH_TEXT_H_INCLUDED_
|
||||||
/* @(#$Id: mh_text.h,v 1.3 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_text.h,v 1.4 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using std::string;
|
using std::string;
|
||||||
@ -34,6 +34,11 @@ class MimeHandlerText : public RecollFilter {
|
|||||||
virtual ~MimeHandlerText() {}
|
virtual ~MimeHandlerText() {}
|
||||||
virtual bool set_document_file(const string &file_path);
|
virtual bool set_document_file(const string &file_path);
|
||||||
virtual bool set_document_string(const string&);
|
virtual bool set_document_string(const string&);
|
||||||
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
|
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
virtual bool next_document();
|
virtual bool next_document();
|
||||||
private:
|
private:
|
||||||
string m_text;
|
string m_text;
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MIMEHANDLER_H_INCLUDED_
|
#ifndef _MIMEHANDLER_H_INCLUDED_
|
||||||
#define _MIMEHANDLER_H_INCLUDED_
|
#define _MIMEHANDLER_H_INCLUDED_
|
||||||
/* @(#$Id: mimehandler.h,v 1.13 2006-12-15 12:40:02 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mimehandler.h,v 1.14 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -66,8 +66,12 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual DataInput get_required_data_input() const
|
virtual bool is_data_input_ok(DataInput input) const {
|
||||||
{return DOCUMENT_FILE_NAME;}
|
if (input == DOCUMENT_FILE_NAME)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
virtual string get_error() const {
|
virtual string get_error() const {
|
||||||
return m_reason;
|
return m_reason;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: main.cpp,v 1.56 2006-12-05 15:23:50 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: main.cpp,v 1.57 2006-12-16 15:39:54 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -281,6 +281,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
// Connect exit handlers etc..
|
// Connect exit handlers etc..
|
||||||
app.connect(&app, SIGNAL(lastWindowClosed()), &app, SLOT(quit()));
|
app.connect(&app, SIGNAL(lastWindowClosed()), &app, SLOT(quit()));
|
||||||
|
app.connect(&app, SIGNAL(aboutToQuit()), mainWindow, SLOT(close()));
|
||||||
QTimer *timer = new QTimer(&app);
|
QTimer *timer = new QTimer(&app);
|
||||||
mainWindow->connect(timer, SIGNAL(timeout()),
|
mainWindow->connect(timer, SIGNAL(timeout()),
|
||||||
mainWindow, SLOT(periodic100()));
|
mainWindow, SLOT(periodic100()));
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.14 2006-12-14 13:53:43 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rclmain_w.cpp,v 1.15 2006-12-16 15:39:54 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -66,6 +66,7 @@ using std::pair;
|
|||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
#include "ssearch_w.h"
|
#include "ssearch_w.h"
|
||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
|
#include "internfile.h"
|
||||||
|
|
||||||
#include "rclmain_w.h"
|
#include "rclmain_w.h"
|
||||||
#include "moc_rclmain_w.cpp"
|
#include "moc_rclmain_w.cpp"
|
||||||
@ -161,15 +162,6 @@ void RclMain::init()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// We also want to get rid of the advanced search form and previews
|
|
||||||
// when we exit (not our children so that it's not systematically
|
|
||||||
// created over the main form).
|
|
||||||
bool RclMain::close(bool)
|
|
||||||
{
|
|
||||||
LOGDEB(("RclMain::close\n"));
|
|
||||||
fileExit();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
//#define SHOWEVENTS
|
//#define SHOWEVENTS
|
||||||
#if defined(SHOWEVENTS)
|
#if defined(SHOWEVENTS)
|
||||||
@ -257,9 +249,20 @@ static const char *eventTypeToStr(int tp)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// We also want to get rid of the advanced search form and previews
|
||||||
|
// when we exit (not our children so that it's not systematically
|
||||||
|
// created over the main form).
|
||||||
|
bool RclMain::close()
|
||||||
|
{
|
||||||
|
LOGDEB(("RclMain::close\n"));
|
||||||
|
fileExit();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void RclMain::fileExit()
|
void RclMain::fileExit()
|
||||||
{
|
{
|
||||||
LOGDEB1(("RclMain: fileExit\n"));
|
LOGDEB(("RclMain: fileExit\n"));
|
||||||
|
m_tempfiles.clear();
|
||||||
prefs.mainwidth = width();
|
prefs.mainwidth = width();
|
||||||
prefs.mainheight = height();
|
prefs.mainheight = height();
|
||||||
prefs.ssearchTyp = sSearch->searchTypCMB->currentItem();
|
prefs.ssearchTyp = sSearch->searchTypCMB->currentItem();
|
||||||
@ -686,15 +689,38 @@ void RclMain::startNativeViewer(int docnum)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
string fn = urltolocalpath(doc.url);
|
// For files with an ipath, we do things differently depending if the
|
||||||
string url = url_encode(doc.url, 7);
|
// configured command seems to be able to grok it or not.
|
||||||
string ipath = doc.ipath;
|
bool wantsipath = cmd.find("%i") != string::npos;
|
||||||
// Substitute %u (url) and %f (file name) inside prototype command
|
bool istempfile = false;
|
||||||
|
string fn, url;
|
||||||
|
if (doc.ipath.empty() || wantsipath) {
|
||||||
|
fn = urltolocalpath(doc.url);
|
||||||
|
url = url_encode(doc.url, 7);
|
||||||
|
} else {
|
||||||
|
// There is an ipath and the command does not know about
|
||||||
|
// them. We need a temp file.
|
||||||
|
TempFile temp;
|
||||||
|
if (!FileInterner::idocTempFile(temp, rclconfig,
|
||||||
|
urltolocalpath(doc.url),
|
||||||
|
doc.ipath, doc.mimetype)) {
|
||||||
|
QMessageBox::warning(0, "Recoll",
|
||||||
|
tr("Cannot extract document or create "
|
||||||
|
"temporary file"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
istempfile = true;
|
||||||
|
m_tempfiles.push_back(temp);
|
||||||
|
fn = temp->filename();
|
||||||
|
url = string("file://") + fn;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Substitute %xx inside prototype command
|
||||||
string ncmd;
|
string ncmd;
|
||||||
map<char, string> subs;
|
map<char, string> subs;
|
||||||
subs['u'] = escapeShell(url);
|
subs['u'] = escapeShell(url);
|
||||||
subs['f'] = escapeShell(fn);
|
subs['f'] = escapeShell(fn);
|
||||||
subs['i'] = escapeShell(ipath);
|
subs['i'] = escapeShell(doc.ipath);
|
||||||
pcSubst(cmd, ncmd, subs);
|
pcSubst(cmd, ncmd, subs);
|
||||||
|
|
||||||
ncmd += " &";
|
ncmd += " &";
|
||||||
@ -707,7 +733,10 @@ void RclMain::startNativeViewer(int docnum)
|
|||||||
QString::fromUtf8(prcmd.c_str()) + "]";
|
QString::fromUtf8(prcmd.c_str()) + "]";
|
||||||
stb->message(msg, 5000);
|
stb->message(msg, 5000);
|
||||||
}
|
}
|
||||||
g_dynconf->enterDoc(fn, doc.ipath);
|
if (!istempfile)
|
||||||
|
g_dynconf->enterDoc(fn, doc.ipath);
|
||||||
|
// We should actually monitor these processes so that we can
|
||||||
|
// delete the temp files when they exit
|
||||||
system(ncmd.c_str());
|
system(ncmd.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -29,6 +29,7 @@
|
|||||||
#include "searchdata.h"
|
#include "searchdata.h"
|
||||||
#include "spell_w.h"
|
#include "spell_w.h"
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
|
#include "pathut.h"
|
||||||
|
|
||||||
#if QT_VERSION < 0x040000
|
#if QT_VERSION < 0x040000
|
||||||
#include "rclmain.h"
|
#include "rclmain.h"
|
||||||
@ -64,9 +65,9 @@ public:
|
|||||||
}
|
}
|
||||||
~RclMain() {}
|
~RclMain() {}
|
||||||
|
|
||||||
virtual bool close( bool );
|
|
||||||
|
|
||||||
public slots:
|
public slots:
|
||||||
|
virtual bool close();
|
||||||
virtual void fileExit();
|
virtual void fileExit();
|
||||||
virtual void periodic100();
|
virtual void periodic100();
|
||||||
virtual void startIndexing();
|
virtual void startIndexing();
|
||||||
@ -103,7 +104,8 @@ private:
|
|||||||
RefCntr<Rcl::SearchData> m_searchData;
|
RefCntr<Rcl::SearchData> m_searchData;
|
||||||
DocSeqSortSpec m_sortspecs;
|
DocSeqSortSpec m_sortspecs;
|
||||||
RefCntr<DocSequence> m_docSource;
|
RefCntr<DocSequence> m_docSource;
|
||||||
|
|
||||||
|
vector<TempFile> m_tempfiles;
|
||||||
// Serial number of current search for this process.
|
// Serial number of current search for this process.
|
||||||
// Used to match to preview windows
|
// Used to match to preview windows
|
||||||
int m_searchId;
|
int m_searchId;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user