mail handling 1st working version
This commit is contained in:
parent
d392d317bb
commit
04b279dcd5
@ -33,6 +33,8 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <istream>
|
||||||
|
|
||||||
namespace Binc {
|
namespace Binc {
|
||||||
|
|
||||||
class MimeInputSource {
|
class MimeInputSource {
|
||||||
@ -40,9 +42,10 @@ namespace Binc {
|
|||||||
inline MimeInputSource(int fd, unsigned int start = 0);
|
inline MimeInputSource(int fd, unsigned int start = 0);
|
||||||
virtual inline ~MimeInputSource(void);
|
virtual inline ~MimeInputSource(void);
|
||||||
|
|
||||||
virtual inline bool fillInputBuffer(void);
|
virtual inline size_t fillRaw(char *raw, size_t nbytes);
|
||||||
virtual inline void reset(void);
|
virtual inline void reset(void);
|
||||||
|
|
||||||
|
virtual inline bool fillInputBuffer(void);
|
||||||
inline void seek(unsigned int offset);
|
inline void seek(unsigned int offset);
|
||||||
inline bool getChar(char *c);
|
inline bool getChar(char *c);
|
||||||
inline void ungetChar(void);
|
inline void ungetChar(void);
|
||||||
@ -77,10 +80,15 @@ namespace Binc {
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline size_t MimeInputSource::fillRaw(char *raw, size_t nbytes)
|
||||||
|
{
|
||||||
|
return read(fd, raw, nbytes);
|
||||||
|
}
|
||||||
|
|
||||||
inline bool MimeInputSource::fillInputBuffer(void)
|
inline bool MimeInputSource::fillInputBuffer(void)
|
||||||
{
|
{
|
||||||
char raw[4096];
|
char raw[4096];
|
||||||
ssize_t nbytes = read(fd, raw, sizeof(raw));
|
ssize_t nbytes = fillRaw(raw, 4096);
|
||||||
if (nbytes <= 0) {
|
if (nbytes <= 0) {
|
||||||
// FIXME: If ferror(crlffile) we should log this.
|
// FIXME: If ferror(crlffile) we should log this.
|
||||||
return false;
|
return false;
|
||||||
@ -159,8 +167,53 @@ namespace Binc {
|
|||||||
{
|
{
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////
|
||||||
|
class MimeInputSourceStream : public MimeInputSource {
|
||||||
|
public:
|
||||||
|
inline MimeInputSourceStream(istream& s, unsigned int start = 0);
|
||||||
|
virtual inline size_t fillRaw(char *raw, size_t nb);
|
||||||
|
virtual inline void reset(void);
|
||||||
|
private:
|
||||||
|
istream& s;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline MimeInputSourceStream::MimeInputSourceStream(istream& si,
|
||||||
|
unsigned int start)
|
||||||
|
: MimeInputSource(-1, start), s(si)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t MimeInputSourceStream::fillRaw(char *raw, size_t nb)
|
||||||
|
{
|
||||||
|
// Why can't streams tell how many characters were actually read
|
||||||
|
// when hitting eof ?
|
||||||
|
std::streampos st = s.tellg();
|
||||||
|
s.seekg(0, ios_base::end);
|
||||||
|
std::streampos lst = s.tellg();
|
||||||
|
s.seekg(st);
|
||||||
|
size_t nbytes = lst - st;
|
||||||
|
if (nbytes > nb) {
|
||||||
|
nbytes = nb;
|
||||||
|
}
|
||||||
|
if (nbytes <= 0) {
|
||||||
|
return (size_t)-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
s.read(raw, nbytes);
|
||||||
|
return nbytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void MimeInputSourceStream::reset(void)
|
||||||
|
{
|
||||||
|
MimeInputSource::reset();
|
||||||
|
s.seekg(0);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
extern Binc::MimeInputSource *mimeSource;
|
extern Binc::MimeInputSource *mimeSource;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -80,6 +80,35 @@ void Binc::MimeDocument::parseFull(int fd) const
|
|||||||
size = mimeSource->getOffset();
|
size = mimeSource->getOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Binc::MimeDocument::parseFull(istream& s) const
|
||||||
|
{
|
||||||
|
if (allIsParsed)
|
||||||
|
return;
|
||||||
|
|
||||||
|
allIsParsed = true;
|
||||||
|
|
||||||
|
delete mimeSource;
|
||||||
|
mimeSource = new MimeInputSourceStream(s);
|
||||||
|
|
||||||
|
headerstartoffsetcrlf = 0;
|
||||||
|
headerlength = 0;
|
||||||
|
bodystartoffsetcrlf = 0;
|
||||||
|
bodylength = 0;
|
||||||
|
size = 0;
|
||||||
|
messagerfc822 = false;
|
||||||
|
multipart = false;
|
||||||
|
|
||||||
|
int bsize = 0;
|
||||||
|
string bound;
|
||||||
|
MimePart::parseFull(bound, bsize);
|
||||||
|
|
||||||
|
// eat any trailing junk to get the correct size
|
||||||
|
char c;
|
||||||
|
while (mimeSource->getChar(&c));
|
||||||
|
|
||||||
|
size = mimeSource->getOffset();
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------
|
//------------------------------------------------------------------------
|
||||||
static bool parseOneHeaderLine(Binc::Header *header, unsigned int *nlines)
|
static bool parseOneHeaderLine(Binc::Header *header, unsigned int *nlines)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -73,6 +73,29 @@ void Binc::MimeDocument::parseOnlyHeader(int fd) const
|
|||||||
MimePart::parseOnlyHeader("");
|
MimePart::parseOnlyHeader("");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Binc::MimeDocument::parseOnlyHeader(istream& s) const
|
||||||
|
{
|
||||||
|
if (allIsParsed || headerIsParsed)
|
||||||
|
return;
|
||||||
|
|
||||||
|
headerIsParsed = true;
|
||||||
|
|
||||||
|
delete mimeSource;
|
||||||
|
mimeSource = new MimeInputSourceStream(s);
|
||||||
|
|
||||||
|
headerstartoffsetcrlf = 0;
|
||||||
|
headerlength = 0;
|
||||||
|
bodystartoffsetcrlf = 0;
|
||||||
|
bodylength = 0;
|
||||||
|
messagerfc822 = false;
|
||||||
|
multipart = false;
|
||||||
|
|
||||||
|
nlines = 0;
|
||||||
|
nbodylines = 0;
|
||||||
|
|
||||||
|
MimePart::parseOnlyHeader("");
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------
|
//------------------------------------------------------------------------
|
||||||
int Binc::MimePart::parseOnlyHeader(const string &toboundary) const
|
int Binc::MimePart::parseOnlyHeader(const string &toboundary) const
|
||||||
{
|
{
|
||||||
|
|||||||
@ -77,11 +77,18 @@ void Binc::MimePart::getBody(int fd, string &s,
|
|||||||
unsigned int startoffset,
|
unsigned int startoffset,
|
||||||
unsigned int length) const
|
unsigned int length) const
|
||||||
{
|
{
|
||||||
|
|
||||||
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
|
if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
|
||||||
delete mimeSource;
|
delete mimeSource;
|
||||||
mimeSource = new MimeInputSource(fd);
|
mimeSource = new MimeInputSource(fd);
|
||||||
}
|
}
|
||||||
|
getBody(s, startoffset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Binc::MimePart::getBody(string &s,
|
||||||
|
unsigned int startoffset,
|
||||||
|
unsigned int length) const
|
||||||
|
{
|
||||||
mimeSource->reset();
|
mimeSource->reset();
|
||||||
mimeSource->seek(bodystartoffsetcrlf + startoffset);
|
mimeSource->seek(bodystartoffsetcrlf + startoffset);
|
||||||
|
|
||||||
|
|||||||
@ -108,6 +108,7 @@ namespace Binc {
|
|||||||
|
|
||||||
void printBody(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
|
void printBody(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
|
||||||
void getBody(int fd, std::string& s, unsigned int startoffset, unsigned int length) const;
|
void getBody(int fd, std::string& s, unsigned int startoffset, unsigned int length) const;
|
||||||
|
void getBody(std::string& s, unsigned int startoffset, unsigned int length) const;
|
||||||
void printHeader(int fd, Binc::IODevice &output, std::vector<std::string> headers, bool includeheaders, unsigned int startoffset, unsigned int length, std::string &storage) const;
|
void printHeader(int fd, Binc::IODevice &output, std::vector<std::string> headers, bool includeheaders, unsigned int startoffset, unsigned int length, std::string &storage) const;
|
||||||
void printDoc(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
|
void printDoc(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
|
||||||
virtual void clear(void) const;
|
virtual void clear(void) const;
|
||||||
@ -129,6 +130,8 @@ namespace Binc {
|
|||||||
public:
|
public:
|
||||||
void parseOnlyHeader(int fd) const;
|
void parseOnlyHeader(int fd) const;
|
||||||
void parseFull(int fd) const;
|
void parseFull(int fd) const;
|
||||||
|
void parseOnlyHeader(std::istream& s) const;
|
||||||
|
void parseFull(std::istream& s) const;
|
||||||
void clear(void) const;
|
void clear(void) const;
|
||||||
|
|
||||||
inline bool isHeaderParsed(void) { return headerIsParsed; }
|
inline bool isHeaderParsed(void) { return headerIsParsed; }
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid [] = "@(#$Id: trbinc.cc,v 1.1 2005-03-25 09:40:27 dockes Exp $ (C) 1994 CDKIT";
|
static char rcsid [] = "@(#$Id: trbinc.cc,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 1994 CDKIT";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -9,6 +9,10 @@ static char rcsid [] = "@(#$Id: trbinc.cc,v 1.1 2005-03-25 09:40:27 dockes Exp $
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
#include "mime.h"
|
#include "mime.h"
|
||||||
|
|
||||||
static char *thisprog;
|
static char *thisprog;
|
||||||
@ -68,7 +72,27 @@ int main(int argc, char **argv)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
Binc::MimeDocument doc;
|
Binc::MimeDocument doc;
|
||||||
|
|
||||||
|
#if 0
|
||||||
doc.parseFull(fd);
|
doc.parseFull(fd);
|
||||||
|
#else
|
||||||
|
char *cp;
|
||||||
|
int size = lseek(fd, 0, SEEK_END);
|
||||||
|
lseek(fd, 0, 0);
|
||||||
|
fprintf(stderr, "Size: %d\n", size);
|
||||||
|
cp = (char *)malloc(size);
|
||||||
|
if (cp==0) {
|
||||||
|
fprintf(stderr, "Malloc %d failed\n", size);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
int n;
|
||||||
|
if ((n=read(fd, cp, size)) != size) {
|
||||||
|
fprintf(stderr, "Read failed: requested %d, got %d\n", size, n);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
std::stringstream s(string(cp, size), ios::in);
|
||||||
|
doc.parseFull(s);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
|
if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
|
||||||
fprintf(stderr, "Parse error\n");
|
fprintf(stderr, "Parse error\n");
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.6 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.7 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
@ -51,20 +51,23 @@ RclConfig::RclConfig()
|
|||||||
cerr << "No mime map file" << endl;
|
cerr << "No mime map file" << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// mimemap->list();
|
||||||
|
|
||||||
string mimeconffile;
|
string mimeconffile;
|
||||||
if (!conf->get("mimeconffile", mimeconffile, "")) {
|
if (!conf->get("mimeconffile", mimeconffile, "")) {
|
||||||
mimeconffile = "mimeconf";
|
mimeconffile = "mimeconf";
|
||||||
}
|
}
|
||||||
mpath = confdir;
|
mpath = confdir;
|
||||||
|
|
||||||
path_cat(mpath, mimeconffile);
|
path_cat(mpath, mimeconffile);
|
||||||
mimeconf = new ConfTree(mpath.c_str());
|
mimeconf = new ConfTree(mpath.c_str());
|
||||||
if (mimeconf == 0) {
|
if (mimeconf == 0) {
|
||||||
cerr << "No mime conf file" << endl;
|
cerr << "No mime conf file" << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// mimeconf->list();
|
||||||
|
|
||||||
setKeyDir(string(""));
|
setKeyDir(string(""));
|
||||||
// mimeconf->list();
|
|
||||||
m_ok = true;
|
m_ok = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -56,7 +56,18 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &,
|
|||||||
const string& htext,
|
const string& htext,
|
||||||
const string &mtype, Rcl::Doc &docout)
|
const string &mtype, Rcl::Doc &docout)
|
||||||
{
|
{
|
||||||
// Character set handling:
|
//LOGDEB(("textHtmlToDoc: htext: %s\n", htext.c_str()));
|
||||||
|
// Character set handling: the initial guessed charset depends on
|
||||||
|
// external factors: possible hint (ie mime charset in a mail
|
||||||
|
// message), charset guessing, or default configured charset.
|
||||||
|
string charset;
|
||||||
|
if (!charsethint.empty()) {
|
||||||
|
charset = charsethint;
|
||||||
|
if (conf->getGuessCharset()) {
|
||||||
|
charset = csguess(htext, conf->getDefCharset());
|
||||||
|
} else
|
||||||
|
charset = conf->getDefCharset();
|
||||||
|
}
|
||||||
|
|
||||||
// - We first try to convert from the default configured charset
|
// - We first try to convert from the default configured charset
|
||||||
// (which may depend of the current directory) to utf-8. If this
|
// (which may depend of the current directory) to utf-8. If this
|
||||||
@ -64,12 +75,6 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &,
|
|||||||
// - During parsing, if we find a charset parameter, and it differs from
|
// - During parsing, if we find a charset parameter, and it differs from
|
||||||
// what we started with, we abort and restart with the parameter value
|
// what we started with, we abort and restart with the parameter value
|
||||||
// instead of the configuration one.
|
// instead of the configuration one.
|
||||||
string charset;
|
|
||||||
if (conf->getGuessCharset()) {
|
|
||||||
charset = csguess(htext, conf->getDefCharset());
|
|
||||||
} else
|
|
||||||
charset = conf->getDefCharset();
|
|
||||||
|
|
||||||
LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str()));
|
LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str()));
|
||||||
|
|
||||||
MyHtmlParser pres;
|
MyHtmlParser pres;
|
||||||
@ -108,7 +113,7 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &,
|
|||||||
Rcl::Doc out;
|
Rcl::Doc out;
|
||||||
out.origcharset = charset;
|
out.origcharset = charset;
|
||||||
out.text = pres.dump;
|
out.text = pres.dump;
|
||||||
// LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str()));
|
// LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str()));
|
||||||
out.title = pres.title;
|
out.title = pres.title;
|
||||||
out.keywords = pres.keywords;
|
out.keywords = pres.keywords;
|
||||||
out.abstract = pres.sample;
|
out.abstract = pres.sample;
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
#ifndef _HTML_H_INCLUDED_
|
#ifndef _HTML_H_INCLUDED_
|
||||||
#define _HTML_H_INCLUDED_
|
#define _HTML_H_INCLUDED_
|
||||||
/* @(#$Id: mh_html.h,v 1.3 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_html.h,v 1.4 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
// Code to turn an html document into an internal one. There are 2
|
// Code to turn an html document into an internal one. There are 2
|
||||||
// interfaces, depending if we're working on a file, or on a
|
// interfaces, depending if we're working on a file, or on a
|
||||||
@ -11,6 +12,7 @@
|
|||||||
// carry titles, abstracts, whatever)
|
// carry titles, abstracts, whatever)
|
||||||
class MimeHandlerHtml : public MimeHandler {
|
class MimeHandlerHtml : public MimeHandler {
|
||||||
public:
|
public:
|
||||||
|
std::string charsethint;
|
||||||
virtual MimeHandler::Status worker(RclConfig *conf, const string &fn,
|
virtual MimeHandler::Status worker(RclConfig *conf, const string &fn,
|
||||||
const string &mtype, Rcl::Doc &docout, string&);
|
const string &mtype, Rcl::Doc &docout, string&);
|
||||||
virtual MimeHandler::Status worker1(RclConfig *conf, const string &fn,
|
virtual MimeHandler::Status worker1(RclConfig *conf, const string &fn,
|
||||||
|
|||||||
@ -1,11 +1,14 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.1 2005-03-25 09:40:27 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <sstream>
|
||||||
|
using std::stringstream;
|
||||||
using std::map;
|
using std::map;
|
||||||
|
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
@ -19,65 +22,183 @@ using std::map;
|
|||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "mimeparse.h"
|
#include "mimeparse.h"
|
||||||
|
#include "html.h"
|
||||||
|
|
||||||
|
// binc imap mime definitions
|
||||||
|
#include "mime.h"
|
||||||
|
|
||||||
|
static void
|
||||||
|
walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc, int depth);
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
MimeHandlerMail::~MimeHandlerMail()
|
||||||
|
{
|
||||||
|
if (vfp) {
|
||||||
|
fclose((FILE *)vfp);
|
||||||
|
vfp = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We are called for two different file types: mbox-type folders
|
// We are called for two different file types: mbox-type folders
|
||||||
// holding multiple messages, and maildir-type files with one rfc822
|
// holding multiple messages, and maildir-type files with one message
|
||||||
// message
|
|
||||||
MimeHandler::Status
|
MimeHandler::Status
|
||||||
MimeHandlerMail::worker(RclConfig *cnf, const string &fn,
|
MimeHandlerMail::worker(RclConfig *cnf, const string &fn,
|
||||||
const string &mtype, Rcl::Doc &docout, string&)
|
const string &mtype, Rcl::Doc &docout, string& ipath)
|
||||||
{
|
{
|
||||||
LOGDEB(("MimeHandlerMail::worker: %s [%s]\n", mtype.c_str(), fn.c_str()));
|
LOGDEB(("MimeHandlerMail::worker: %s [%s]\n", mtype.c_str(), fn.c_str()));
|
||||||
conf = cnf;
|
conf = cnf;
|
||||||
|
|
||||||
if (!stringlowercmp("message/rfc822", mtype)) {
|
if (!stringlowercmp("message/rfc822", mtype)) {
|
||||||
return processone(fn, docout);
|
ipath = "";
|
||||||
|
int fd;
|
||||||
|
if ((fd = open(fn.c_str(), 0)) < 0) {
|
||||||
|
LOGERR(("MimeHandlerMail::worker: open(%s) errno %d\n",
|
||||||
|
fn.c_str(), errno));
|
||||||
|
return MimeHandler::MHError;
|
||||||
|
}
|
||||||
|
Binc::MimeDocument doc;
|
||||||
|
doc.parseFull(fd);
|
||||||
|
MimeHandler::Status ret = processone(fn, doc, docout);
|
||||||
|
close(fd);
|
||||||
|
return ret;
|
||||||
} else if (!stringlowercmp("text/x-mail", mtype)) {
|
} else if (!stringlowercmp("text/x-mail", mtype)) {
|
||||||
return MimeHandler::MHError;
|
return processmbox(fn, docout, ipath);
|
||||||
} else
|
} else // hu ho
|
||||||
return MimeHandler::MHError;
|
return MimeHandler::MHError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MimeHandler::Status
|
||||||
|
MimeHandlerMail::processmbox(const string &fn, Rcl::Doc &docout, string& ipath)
|
||||||
|
{
|
||||||
|
int mtarg = 0;
|
||||||
|
if (ipath != "") {
|
||||||
|
sscanf(ipath.c_str(), "%d", &mtarg);
|
||||||
|
}
|
||||||
|
LOGDEB(("MimeHandlerMail::processmbox: fn %s, mtarg %d\n", fn.c_str(),
|
||||||
|
mtarg));
|
||||||
|
|
||||||
#include "mime.h"
|
FILE *fp;
|
||||||
|
if (vfp) {
|
||||||
|
fp = (FILE *)vfp;
|
||||||
|
} else {
|
||||||
|
fp = fopen(fn.c_str(), "r");
|
||||||
|
if (fp == 0) {
|
||||||
|
LOGERR(("MimeHandlerMail::processmbox: error opening %s\n",
|
||||||
|
fn.c_str()));
|
||||||
|
return MimeHandler::MHError;
|
||||||
|
}
|
||||||
|
vfp = fp;
|
||||||
|
}
|
||||||
|
if (mtarg > 0) {
|
||||||
|
fseek(fp, 0, SEEK_SET);
|
||||||
|
msgnum = 0;
|
||||||
|
}
|
||||||
|
|
||||||
const char *hnames[] = {"Subject", "Content-type"};
|
off_t start, end;
|
||||||
int nh = sizeof(hnames) / sizeof(char *);
|
bool iseof = false;
|
||||||
|
do {
|
||||||
|
// Look for next 'From ' Line, start of message. Set start to
|
||||||
|
// line after this
|
||||||
|
char line[301];
|
||||||
|
for (;;) {
|
||||||
|
if (!fgets(line, 300, fp)) {
|
||||||
|
// Eof hit while looking for 'From ' -> file done. We'd need
|
||||||
|
// another return code here
|
||||||
|
return MimeHandler::MHError;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strncmp("From ", line, 5)) {
|
||||||
|
start = ftello(fp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for next 'From ' line or eof, end of message (we let a
|
||||||
|
// spurious empty line in)
|
||||||
|
for (;;) {
|
||||||
|
end = ftello(fp);
|
||||||
|
if (!fgets(line, 300, fp) || !strncmp("From ", line, 5)) {
|
||||||
|
if (ferror(fp) || feof(fp))
|
||||||
|
iseof = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
msgnum++;
|
||||||
|
LOGDEB(("MimeHandlerMail::processmbox: got msg %d\n", msgnum));
|
||||||
|
fseek(fp, end, SEEK_SET);
|
||||||
|
} while (mtarg > 0 && msgnum < mtarg);
|
||||||
|
|
||||||
|
|
||||||
|
size_t size = end - start;
|
||||||
|
fseek(fp, start, SEEK_SET);
|
||||||
|
char *cp = (char *)malloc(size);
|
||||||
|
if (cp == 0) {
|
||||||
|
LOGERR(("MimeHandlerMail::processmbox: malloc(%d) failed\n", size));
|
||||||
|
return MimeHandler::MHError;
|
||||||
|
}
|
||||||
|
if (fread(cp, 1, size, fp) != size) {
|
||||||
|
LOGERR(("MimeHandlerMail::processmbox: fread failed (errno %d)\n",
|
||||||
|
errno));
|
||||||
|
free(cp);
|
||||||
|
return MimeHandler::MHError;
|
||||||
|
}
|
||||||
|
string msgbuf(cp, size);
|
||||||
|
free(cp);
|
||||||
|
stringstream s(msgbuf);
|
||||||
|
Binc::MimeDocument doc;
|
||||||
|
doc.parseFull(s);
|
||||||
|
MimeHandler::Status ret = processone(fn, doc, docout);
|
||||||
|
if (ret == MimeHandler::MHError)
|
||||||
|
return ret;
|
||||||
|
char buf[20];
|
||||||
|
sprintf(buf, "%d", msgnum);
|
||||||
|
ipath = buf;
|
||||||
|
return iseof ? MimeHandler::MHDone :
|
||||||
|
(mtarg > 0) ? MimeHandler::MHDone : MimeHandler::MHAgain;
|
||||||
|
}
|
||||||
|
|
||||||
void walkmime(string &out, Binc::MimePart& doc, int fd, int depth);
|
|
||||||
|
|
||||||
// Transform a single message into a document. The subject becomes the
|
// Transform a single message into a document. The subject becomes the
|
||||||
// title, and any simple body part with a content-type of text or html
|
// title, and any simple body part with a content-type of text or html
|
||||||
// and content-disposition inline gets concatenated as text.
|
// and content-disposition inline gets concatenated as text.
|
||||||
MimeHandler::Status
|
MimeHandler::Status
|
||||||
MimeHandlerMail::processone(const string &fn, Rcl::Doc &docout)
|
MimeHandlerMail::processone(const string &fn, Binc::MimeDocument& doc,
|
||||||
|
Rcl::Doc &docout)
|
||||||
{
|
{
|
||||||
int fd;
|
|
||||||
if ((fd = open(fn.c_str(), 0)) < 0) {
|
|
||||||
LOGERR(("MimeHandlerMail::processone: open(%s) errno %d\n",
|
|
||||||
fn.c_str(), errno));
|
|
||||||
return MimeHandler::MHError;
|
|
||||||
}
|
|
||||||
Binc::MimeDocument doc;
|
|
||||||
doc.parseFull(fd);
|
|
||||||
|
|
||||||
if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
|
if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
|
||||||
LOGERR(("MimeHandlerMail::processone: parse error for %s\n",
|
LOGERR(("MimeHandlerMail::processone: mime parse error for %s\n",
|
||||||
fn.c_str()));
|
fn.c_str()));
|
||||||
close(fd);
|
|
||||||
return MimeHandler::MHError;
|
return MimeHandler::MHError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle some headers. We should process rfc2047 encoding here
|
||||||
|
Binc::HeaderItem hi;
|
||||||
|
if (doc.h.getFirstHeader("Subject", hi)) {
|
||||||
|
docout.title = hi.getValue();
|
||||||
|
}
|
||||||
|
if (doc.h.getFirstHeader("From", hi)) {
|
||||||
|
docout.text += string("From: ") + hi.getValue() + string("\n");
|
||||||
|
}
|
||||||
|
if (doc.h.getFirstHeader("To", hi)) {
|
||||||
|
docout.text += string("To: ") + hi.getValue() + string("\n");
|
||||||
|
}
|
||||||
|
if (doc.h.getFirstHeader("Date", hi)) {
|
||||||
|
docout.text += string("Date: ") + hi.getValue() + string("\n");
|
||||||
|
}
|
||||||
|
|
||||||
LOGDEB(("MimeHandlerMail::processone: ismultipart %d mime subtype '%s'\n",
|
LOGDEB(("MimeHandlerMail::processone: ismultipart %d mime subtype '%s'\n",
|
||||||
doc.isMultipart(), doc.getSubType().c_str()));
|
doc.isMultipart(), doc.getSubType().c_str()));
|
||||||
walkmime(docout.text, doc, fd, 0);
|
walkmime(conf, docout.text, doc, 0);
|
||||||
close(fd);
|
|
||||||
LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str()));
|
LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str()));
|
||||||
return MimeHandler::MHError;
|
return MimeHandler::MHDone;
|
||||||
}
|
}
|
||||||
|
|
||||||
void walkmime(string &out, Binc::MimePart& doc, int fd, int depth)
|
// Recursively walk the message mime parts and concatenate all the
|
||||||
|
// inline html or text that we find anywhere.
|
||||||
|
static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
|
||||||
|
int depth)
|
||||||
{
|
{
|
||||||
if (depth > 5) {
|
if (depth > 5) {
|
||||||
LOGINFO(("walkmime: max depth exceeded\n"));
|
LOGINFO(("walkmime: max depth exceeded\n"));
|
||||||
@ -88,12 +209,12 @@ void walkmime(string &out, Binc::MimePart& doc, int fd, int depth)
|
|||||||
LOGDEB(("walkmime: ismultipart %d subtype '%s'\n",
|
LOGDEB(("walkmime: ismultipart %d subtype '%s'\n",
|
||||||
doc.isMultipart(), doc.getSubType().c_str()));
|
doc.isMultipart(), doc.getSubType().c_str()));
|
||||||
// We only handle alternative and mixed for now. For
|
// We only handle alternative and mixed for now. For
|
||||||
// alternative, we look for a text/plain part, else html and process it
|
// alternative, we look for a text/plain part, else html and
|
||||||
// For mixed, we process each part.
|
// process it For mixed, we process each part.
|
||||||
std::vector<Binc::MimePart>::iterator it;
|
std::vector<Binc::MimePart>::iterator it;
|
||||||
if (!stringicmp("mixed", doc.getSubType())) {
|
if (!stringicmp("mixed", doc.getSubType())) {
|
||||||
for (it = doc.members.begin(); it != doc.members.end();it++) {
|
for (it = doc.members.begin(); it != doc.members.end();it++) {
|
||||||
walkmime(out, *it, fd, depth+1);
|
walkmime(cnf, out, *it, depth+1);
|
||||||
}
|
}
|
||||||
} else if (!stringicmp("alternative", doc.getSubType())) {
|
} else if (!stringicmp("alternative", doc.getSubType())) {
|
||||||
std::vector<Binc::MimePart>::iterator ittxt, ithtml;
|
std::vector<Binc::MimePart>::iterator ittxt, ithtml;
|
||||||
@ -103,7 +224,17 @@ void walkmime(string &out, Binc::MimePart& doc, int fd, int depth)
|
|||||||
Binc::HeaderItem hi;
|
Binc::HeaderItem hi;
|
||||||
if (!doc.h.getFirstHeader("Content-Type", hi))
|
if (!doc.h.getFirstHeader("Content-Type", hi))
|
||||||
continue;
|
continue;
|
||||||
LOGDEB(("walkmime:content-type: %s\n", hi.getValue().c_str()));
|
MimeHeaderValue content_type;
|
||||||
|
parseMimeHeaderValue(hi.getValue(), content_type);
|
||||||
|
if (!stringlowercmp("text/plain", content_type.value))
|
||||||
|
ittxt = it;
|
||||||
|
else if (!stringlowercmp("text/html", content_type.value))
|
||||||
|
ithtml = it;
|
||||||
|
}
|
||||||
|
if (ittxt != doc.members.end()) {
|
||||||
|
walkmime(cnf, out, *ittxt, depth+1);
|
||||||
|
} else if (ithtml != doc.members.end()) {
|
||||||
|
walkmime(cnf, out, *ithtml, depth+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -149,30 +280,36 @@ void walkmime(string &out, Binc::MimePart& doc, int fd, int depth)
|
|||||||
LOGDEB(("walkmime: final: body start offset %d, length %d\n",
|
LOGDEB(("walkmime: final: body start offset %d, length %d\n",
|
||||||
doc.getBodyStartOffset(), doc.getBodyLength()));
|
doc.getBodyStartOffset(), doc.getBodyLength()));
|
||||||
string body;
|
string body;
|
||||||
doc.getBody(fd, body, 0, doc.bodylength);
|
doc.getBody(body, 0, doc.bodylength);
|
||||||
|
|
||||||
// Decode content transfer encoding
|
// Decode content transfer encoding
|
||||||
if (stringlowercmp("quoted-printable", content_disposition.value)) {
|
if (!stringlowercmp("quoted-printable", cte)) {
|
||||||
string decoded;
|
string decoded;
|
||||||
qp_decode(body, decoded);
|
qp_decode(body, decoded);
|
||||||
body = decoded;
|
body = decoded;
|
||||||
} else if (stringlowercmp("base64", content_disposition.value)) {
|
} else if (!stringlowercmp("base64", cte)) {
|
||||||
string decoded;
|
string decoded;
|
||||||
base64_decode(body, decoded);
|
base64_decode(body, decoded);
|
||||||
body = decoded;
|
body = decoded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Transcode to utf-8
|
|
||||||
string transcoded;
|
string transcoded;
|
||||||
if (!transcode(body, transcoded, charset, "UTF-8")) {
|
if (!stringlowercmp("text/html", content_type.value)) {
|
||||||
LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
|
MimeHandlerHtml mh;
|
||||||
charset.c_str()));
|
Rcl::Doc hdoc;
|
||||||
transcoded = body;
|
mh.charsethint = charset;
|
||||||
|
mh.worker1(cnf, "", body, content_type.value, hdoc);
|
||||||
|
transcoded = hdoc.text;
|
||||||
|
} else {
|
||||||
|
// Transcode to utf-8
|
||||||
|
if (!transcode(body, transcoded, charset, "UTF-8")) {
|
||||||
|
LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
|
||||||
|
charset.c_str()));
|
||||||
|
transcoded = body;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
out += string("\r\n") + transcoded;
|
out += string("\r\n") + transcoded;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,14 +1,23 @@
|
|||||||
#ifndef _MAIL_H_INCLUDED_
|
#ifndef _MAIL_H_INCLUDED_
|
||||||
#define _MAIL_H_INCLUDED_
|
#define _MAIL_H_INCLUDED_
|
||||||
/* @(#$Id: mh_mail.h,v 1.1 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mh_mail.h,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
#include "mimehandler.h"
|
#include "mimehandler.h"
|
||||||
|
namespace Binc {
|
||||||
|
class MimeDocument;
|
||||||
|
}
|
||||||
|
|
||||||
// Code to turn a mail folder file into internal documents
|
// Code to turn a mail folder file into internal documents
|
||||||
class MimeHandlerMail : public MimeHandler {
|
class MimeHandlerMail : public MimeHandler {
|
||||||
|
void *vfp;
|
||||||
|
int msgnum;
|
||||||
RclConfig *conf;
|
RclConfig *conf;
|
||||||
MimeHandler::Status processone(const string &fn, Rcl::Doc &docout);
|
MimeHandler::Status processone(const string &fn, Binc::MimeDocument& doc,
|
||||||
|
Rcl::Doc &docout);
|
||||||
|
MimeHandler::Status processmbox(const string &fn, Rcl::Doc &docout,
|
||||||
|
string &ipath);
|
||||||
public:
|
public:
|
||||||
MimeHandlerMail() : conf(0) {}
|
MimeHandlerMail() : vfp(0), msgnum(0), conf(0) {}
|
||||||
|
virtual ~MimeHandlerMail();
|
||||||
virtual MimeHandler::Status
|
virtual MimeHandler::Status
|
||||||
worker(RclConfig *conf, const string &fn,
|
worker(RclConfig *conf, const string &fn,
|
||||||
const string &mtype, Rcl::Doc &docout, string& ipath);
|
const string &mtype, Rcl::Doc &docout, string& ipath);
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.24 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.25 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@ -316,7 +316,8 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
|||||||
splitter.text_to_words(noacc);
|
splitter.text_to_words(noacc);
|
||||||
|
|
||||||
newdocument.add_term("T" + doc.mimetype);
|
newdocument.add_term("T" + doc.mimetype);
|
||||||
string pathterm = "P" + fn;
|
string pathterm = doc.ipath.empty() ?
|
||||||
|
"P" + fn : "P" + fn + "|" + doc.ipath;
|
||||||
newdocument.add_term(pathterm);
|
newdocument.add_term(pathterm);
|
||||||
const char *fnc = fn.c_str();
|
const char *fnc = fn.c_str();
|
||||||
|
|
||||||
@ -332,6 +333,10 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
|||||||
record += "\ncaption=" + doc.title;
|
record += "\ncaption=" + doc.title;
|
||||||
record += "\nkeywords=" + doc.keywords;
|
record += "\nkeywords=" + doc.keywords;
|
||||||
record += "\nabstract=" + doc.abstract;
|
record += "\nabstract=" + doc.abstract;
|
||||||
|
if (!doc.ipath.empty()) {
|
||||||
|
record += "\nipath=" + doc.ipath;
|
||||||
|
}
|
||||||
|
|
||||||
record += "\n";
|
record += "\n";
|
||||||
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
||||||
newdocument.set_data(record);
|
newdocument.set_data(record);
|
||||||
@ -357,9 +362,11 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
|||||||
ndb->wdb.replace_document(pathterm, newdocument);
|
ndb->wdb.replace_document(pathterm, newdocument);
|
||||||
if (did < ndb->updated.size()) {
|
if (did < ndb->updated.size()) {
|
||||||
ndb->updated[did] = true;
|
ndb->updated[did] = true;
|
||||||
LOGDEB(("Rcl::Db::add: docid %d updated [%s]\n", did, fnc));
|
LOGDEB(("Rcl::Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
||||||
|
doc.ipath.c_str()));
|
||||||
} else {
|
} else {
|
||||||
LOGDEB(("Rcl::Db::add: docid %d added [%s]\n", did, fnc));
|
LOGDEB(("Rcl::Db::add: docid %d added [%s , %s]\n", did, fnc,
|
||||||
|
doc.ipath.c_str()));
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
// FIXME: is this ever actually needed?
|
// FIXME: is this ever actually needed?
|
||||||
@ -378,8 +385,12 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
|||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
|
|
||||||
string pathterm = "P" + filename;
|
string pathterm = "P" + filename;
|
||||||
if (!ndb->wdb.term_exists(pathterm))
|
if (!ndb->wdb.term_exists(pathterm)) {
|
||||||
return true;
|
pathterm += string("|") + "1";
|
||||||
|
if (!ndb->wdb.term_exists(pathterm)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
Xapian::PostingIterator doc;
|
Xapian::PostingIterator doc;
|
||||||
try {
|
try {
|
||||||
Xapian::PostingIterator did = ndb->wdb.postlist_begin(pathterm);
|
Xapian::PostingIterator did = ndb->wdb.postlist_begin(pathterm);
|
||||||
@ -775,5 +786,6 @@ bool Rcl::Db::getDoc(int i, Doc &doc, int *percent)
|
|||||||
parms.get(string("caption"), doc.title);
|
parms.get(string("caption"), doc.title);
|
||||||
parms.get(string("keywords"), doc.keywords);
|
parms.get(string("keywords"), doc.keywords);
|
||||||
parms.get(string("abstract"), doc.abstract);
|
parms.get(string("abstract"), doc.abstract);
|
||||||
|
parms.get(string("ipath"), doc.ipath);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user