mail ckpt

2005-03-25 09:40:28 +00:00 · 2005-03-25 09:40:28 +00:00 · d392d317bb
commit d392d317bb
parent 63a29c7ced
25 changed files with 872 additions and 145 deletions
--- a/src/bincimapmime/00README.recoll
+++ b/src/bincimapmime/00README.recoll
@ -0,0 +1,3 @@
+Most of the code in this directory was taken from the Binc IMAP project
+(http://www.bincimap.org/), version 1.3.3
+
--- a/src/bincimapmime/mime-printbody.cc
+++ b/src/bincimapmime/mime-printbody.cc
@ -72,3 +72,27 @@ void Binc::MimePart::printBody(int fd, IODevice &output,
    output << (char)c;
  }
 }
+
+void Binc::MimePart::getBody(int fd, string &s,
+			     unsigned int startoffset,
+			     unsigned int length) const
+{
+  if (!mimeSource || mimeSource->getFileDescriptor() != fd) {
+    delete mimeSource;
+    mimeSource = new MimeInputSource(fd);
+  }
+
+  mimeSource->reset();
+  mimeSource->seek(bodystartoffsetcrlf + startoffset);
+
+  if (startoffset + length > bodylength)
+    length = bodylength - startoffset;
+
+  char c = '\0';
+  for (unsigned int i = 0; i < length; ++i) {
+    if (!mimeSource->getChar(&c))
+      break;
+
+    s += (char)c;
+  }
+}
--- a/src/bincimapmime/mime.h
+++ b/src/bincimapmime/mime.h
@ -107,6 +107,7 @@ namespace Binc {
    inline unsigned int getBodyStartOffset(void) const { return bodystartoffsetcrlf; }

    void printBody(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
+      void getBody(int fd, std::string& s, unsigned int startoffset, unsigned int length) const;
    void printHeader(int fd, Binc::IODevice &output, std::vector<std::string> headers, bool includeheaders, unsigned int startoffset, unsigned int length, std::string &storage) const;
    void printDoc(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const;
    virtual void clear(void) const;
--- a/src/bincimapmime/trbinc.cc
+++ b/src/bincimapmime/trbinc.cc
@ -0,0 +1,87 @@
+#ifndef 	lint
+static char rcsid [] = "@(#$Id: trbinc.cc,v 1.1 2005-03-25 09:40:27 dockes Exp $  (C) 1994 CDKIT";
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "mime.h"
+
+static char *thisprog;
+
+static char usage [] =
+    "trbinc <mboxfile> \n\n"
+    ;
+static void
+Usage(void)
+{
+    fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
+    exit(1);
+}
+
+static int     op_flags;
+#define OPT_MOINS 0x1
+#define OPT_s	  0x2 
+#define OPT_b	  0x4 
+
+#define DEFCOUNT 10
+
+const char *hnames[] = {"Subject", "Content-type"};
+int nh = sizeof(hnames) / sizeof(char *);
+
+int main(int argc, char **argv)
+{
+    int count = DEFCOUNT;
+    
+    thisprog = argv[0];
+    argc--; argv++;
+
+    while (argc > 0 && **argv == '-') {
+	(*argv)++;
+	if (!(**argv))
+	    /* Cas du "adb - core" */
+	    Usage();
+	while (**argv)
+	    switch (*(*argv)++) {
+	    case 's':	op_flags |= OPT_s; break;
+	    case 'b':	op_flags |= OPT_b; if (argc < 2)  Usage();
+		if ((sscanf(*(++argv), "%d", &count)) != 1) 
+		    Usage(); 
+		argc--; 
+		goto b1;
+	    default: Usage();	break;
+	    }
+    b1: argc--; argv++;
+    }
+
+    if (argc != 1)
+	Usage();
+
+    char *mfile = *argv++;argc--;
+    int fd;
+    if ((fd = open(mfile, 0)) < 0) {
+	perror("Opening");
+	exit(1);
+    }
+    Binc::MimeDocument doc;
+    doc.parseFull(fd);
+
+    if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
+	fprintf(stderr, "Parse error\n");
+	exit(1);
+    }
+    close(fd);
+    Binc::HeaderItem hi;
+    for (int i = 0; i < nh ; i++) {
+	if (!doc.h.getFirstHeader(hnames[i], hi)) {
+	    fprintf(stderr, "No %s\n", hnames[i]);
+	    exit(1);
+	}
+	printf("%s: %s\n", hnames[i], hi.getValue().c_str());
+    }
+    exit(0);
+}
--- a/src/common/Makefile
+++ b/src/common/Makefile
@ -1,7 +1,6 @@
 include ../mk/sysconf
 BIGLIB = ../lib/librcl.a

-
 PROGS = unacpp textsplit
 all: $(PROGS)

--- a/src/index/Makefile
+++ b/src/index/Makefile
@ -1,11 +1,12 @@
 include ../mk/sysconf

 BIGLIB = ../lib/librcl.a
+MIMELIB = ../bincimapmime/libmime.a

 PROGS = recollindex csguess
 all: $(PROGS)

-RECOLLINDEX_OBJS= recollindex.o $(BIGLIB)
+RECOLLINDEX_OBJS= recollindex.o $(BIGLIB) $(MIMELIB)
 recollindex : $(RECOLLINDEX_OBJS)
 	$(CXX) $(CXXFLAGS) -o recollindex $(RECOLLINDEX_OBJS) \
 		$(LIBXAPIAN) $(LIBICONV) $(LIBSYS)
--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: indexer.cpp,v 1.8 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: indexer.cpp,v 1.9 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 #include <stdio.h>
 #include <sys/stat.h>
@ -146,18 +146,25 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
 	return FsTreeWalker::FtwOk;
    }

-    Rcl::Doc doc;
-    if (!internfile(fn, config, doc, tmpdir))
-	return FsTreeWalker::FtwOk;
+    FileInterner interner(fn, config, tmpdir);
+    FileInterner::Status fis = FileInterner::FIAgain;
+    while (fis == FileInterner::FIAgain) {
+	Rcl::Doc doc;
+	string ipath;
+	fis = interner.internfile(doc, ipath);
+	if (fis == FileInterner::FIError)
+	    break;

-    // Set up common fields:
-    char ascdate[20];
-    sprintf(ascdate, "%ld", long(stp->st_ctime));
-    doc.mtime = ascdate;
+	// Set up common fields:
+	char ascdate[20];
+	sprintf(ascdate, "%ld", long(stp->st_ctime));
+	doc.mtime = ascdate;
+	doc.ipath = ipath;

-    // Do database-specific work to update document data
-    if (!db.add(fn, doc)) 
-	return FsTreeWalker::FtwError;
+	// Do database-specific work to update document data
+	if (!db.add(fn, doc)) 
+	    return FsTreeWalker::FtwError;
+    }

    return FsTreeWalker::FtwOk;
 }
--- a/src/index/mimetype.cpp
+++ b/src/index/mimetype.cpp
@ -1,14 +1,47 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.5 2005-02-09 13:34:08 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.6 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif

 #include <ctype.h>

 #include <string>
 using std::string;
+#include <list>
+using std::list;

 #include "mimetype.h"
 #include "debuglog.h"
+#include "execmd.h"
+#include "conftree.h"
+
+static string mimetypefromdata(const string &fn)
+{
+    list<string> args;
+
+    args.push_back("-i");
+    args.push_back(fn);
+    ExecCmd ex;
+    string result;
+    string cmd = "file";
+    int status = ex.doexec(cmd, args, 0, &result);
+    if (status) {
+	LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status));
+	return "";
+    }
+    // LOGDEB(("mimetypefromdata: %s [%s]\n", result.c_str(), fn.c_str()));
+    list<string> res;
+    ConfTree::stringToStrings(result, res);
+    if (res.size() <= 1) 
+	return "";
+    list<string>::iterator it = res.begin();
+    it++;
+    string mime = *it;
+
+    if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
+	mime.erase(mime.length() -1);
+
+    return mime;
+}

 string mimetype(const string &fn, ConfTree *mtypes)
 {
@ -38,8 +71,9 @@ string mimetype(const string &fn, ConfTree *mtypes)

    // If the file name has a suffix and we find it in the map, we're done
    string::size_type dot = fn.find_last_of(".");
+    string suff;
    if (dot != string::npos) {
-	string suff = fn.substr(dot);
+        suff = fn.substr(dot);
 	for (unsigned int i = 0; i < suff.length(); i++)
 	    suff[i] = tolower(suff[i]);

@ -48,7 +82,9 @@ string mimetype(const string &fn, ConfTree *mtypes)
 	    return mtype;
    }

-    // Look at file data ? One day maybe
+    // Look at file data ? Only when no suffix
+    if (suff.empty())
+	return mimetypefromdata(fn);
    return "";
 }

--- a/src/internfile/internfile.cpp
+++ b/src/internfile/internfile.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: internfile.cpp,v 1.3 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: internfile.cpp,v 1.4 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 #include <unistd.h>
 #include <sys/types.h>
@ -70,30 +70,27 @@ static bool uncompressfile(RclConfig *conf, const string& ifn,
    return true;
 }

-static void tmpcleanup(const string& tdir, const string& tfile)
+void FileInterner::tmpcleanup()
 {
    if (tdir.empty() || tfile.empty())
 	return;
    if (unlink(tfile.c_str()) < 0) {
-	LOGERR(("tmpcleanup: unlink(%s) errno %d\n", tfile.c_str(), 
-		errno));
+	LOGERR(("FileInterner::tmpcleanup: unlink(%s) errno %d\n", 
+		tfile.c_str(), errno));
 	return;
    }
 }

-bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
-		const string& tdir)
+// Handler==0 on return says we're in error
+FileInterner::FileInterner(const std::string &f, RclConfig *cnf, 
+			   const string& td)
+    : fn(f), config(cnf), tdir(td), handler(0) 
 {
-    string fn = ifn;
-    string tfile;
-    MimeHandler *handler = 0;
-    bool ret = false;
-
-    string mime = mimetype(fn, config->getMimeMap());
+    mime = mimetype(fn, config->getMimeMap());
    if (mime.empty()) {
 	// No mime type: not listed in our map.
-	LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str()));
-	return false;
+	LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", fn.c_str()));
+	return;
    }

    // First check for a compressed file. If so, create a temporary
@ -101,8 +98,9 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
    // rest with the temp file.
    list<string>ucmd;
    if (getUncompressor(mime, config->getMimeConf(), ucmd)) {
-	if (!uncompressfile(config, fn, ucmd, tdir, tfile)) 
-	    return false;
+	if (!uncompressfile(config, fn, ucmd, tdir, tfile)) {
+	    return;
+	}
 	LOGDEB(("internfile: after ucomp: tdir %s, tfile %s\n", 
 		tdir.c_str(), tfile.c_str()));
 	fn = tfile;
@ -110,33 +108,43 @@ bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
 	if (mime.empty()) {
 	    // No mime type ?? pass on.
 	    LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str()));
-	    goto out;
+	    return;
 	}
-
    }
-    

    // Look for appropriate handler
    handler = getMimeHandler(mime, config->getMimeConf());
    if (!handler) {
 	// No handler for this type, for now :(
-	LOGDEB(("internfile: %s : no handler\n", mime.c_str()));
-	goto out;
+	LOGDEB(("FileInterner::FileInterner: %s: no handler\n", mime.c_str()));
+	return;
    }

-    LOGDEB(("internfile: %s [%s]\n", mime.c_str(), fn.c_str()));
+    LOGDEB(("FileInterner::FileInterner: %s [%s]\n",mime.c_str(), fn.c_str()));
+}
+
+FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
+{
+    if (!handler)
+	return FIError;

    // Turn file into a document. The document has fields for title, body 
    // etc.,  all text converted to utf8
-    if (!handler->worker(config, fn,  mime, doc)) {
-	goto out;
+    MimeHandler::Status mhs = handler->worker(config, fn,  mime, doc, ipath);
+    FileInterner::Status ret = FIError;
+    switch (mhs) {
+    case MimeHandler::MHError: break;
+    case MimeHandler::MHDone: ret = FIDone;break;
+    case MimeHandler::MHAgain: ret = FIAgain;break;
    }
-    doc.mimetype = mime;

-    // Clean up. We delete the temp file and its father directory
-    ret = true;
- out:
-    delete handler;
-    tmpcleanup(tdir, tfile);
+    doc.mimetype = mime;
    return ret;
 }
+
+FileInterner::~FileInterner()
+{
+    delete handler; 
+    handler = 0;
+    tmpcleanup();
+}
--- a/src/internfile/internfile.h
+++ b/src/internfile/internfile.h
@ -1,14 +1,31 @@
 #ifndef _INTERNFILE_H_INCLUDED_
 #define _INTERNFILE_H_INCLUDED_
-/* @(#$Id: internfile.h,v 1.2 2005-02-09 12:07:29 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: internfile.h,v 1.3 2005-03-25 09:40:27 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>

 #include "rclconfig.h"
 #include "rcldb.h"

+class MimeHandler;
+
 /// Turn external file into internal representation, according to mime type etc
-extern bool internfile(const std::string &fn, RclConfig *config, 
-		       Rcl::Doc& doc, const string& tdir);
+class FileInterner {
+    string fn;
+    RclConfig *config;
+    const string &tdir;
+    MimeHandler *handler;
+    string tfile;
+    string mime;
+
+    void tmpcleanup();
+
+ public:
+    FileInterner(const std::string &f, RclConfig *cnf, const string& td);
+    ~FileInterner();
+
+    enum Status {FIError, FIDone, FIAgain};
+    Status internfile(Rcl::Doc& doc, string &ipath);
+};

 #endif /* _INTERNFILE_H_INCLUDED_ */
--- a/src/internfile/mh_html.cpp
+++ b/src/internfile/mh_html.cpp
@ -38,21 +38,23 @@
 using namespace std;


-bool MimeHandlerHtml::worker(RclConfig *conf, const string &fn, 
-			     const string &mtype, Rcl::Doc &docout)
+MimeHandler::Status 
+MimeHandlerHtml::worker(RclConfig *conf, const string &fn, 
+			const string &mtype, Rcl::Doc &docout, string&)
 {
    LOGDEB(("textHtmlToDoc: %s\n", fn.c_str()));
    string otext;
    if (!file_to_string(fn, otext)) {
 	LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str()));
-	return false;
+	return MimeHandler::MHError;
    }
    return worker1(conf, fn, otext, mtype, docout);
 }

-bool MimeHandlerHtml::worker1(RclConfig *conf, const string &, 
-			     const string& htext,
-			     const string &mtype, Rcl::Doc &docout)
+MimeHandler::Status 
+MimeHandlerHtml::worker1(RclConfig *conf, const string &, 
+			 const string& htext,
+			 const string &mtype, Rcl::Doc &docout)
 {
    // Character set handling:

@ -111,5 +113,5 @@ bool MimeHandlerHtml::worker1(RclConfig *conf, const string &,
    out.keywords = pres.keywords;
    out.abstract = pres.sample;
    docout = out;
-    return true;
+    return MimeHandler::MHDone;
 }
--- a/src/internfile/mh_html.h
+++ b/src/internfile/mh_html.h
@ -1,6 +1,6 @@
 #ifndef _HTML_H_INCLUDED_
 #define _HTML_H_INCLUDED_
-/* @(#$Id: mh_html.h,v 1.2 2005-03-17 15:35:49 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: mh_html.h,v 1.3 2005-03-25 09:40:27 dockes Exp $  (C) 2004 J.F.Dockes */
 #include "mimehandler.h"

 // Code to turn an html document into an internal one. There are 2
@ -11,9 +11,9 @@
 // carry titles, abstracts, whatever)
 class MimeHandlerHtml : public MimeHandler {
 public:
-    virtual bool worker(RclConfig *conf, const string &fn, 
-			const string &mtype, Rcl::Doc &docout);
-    virtual bool worker1(RclConfig *conf, const string &fn, 
+    virtual MimeHandler::Status worker(RclConfig *conf, const string &fn, 
+			const string &mtype, Rcl::Doc &docout, string&);
+    virtual MimeHandler::Status worker1(RclConfig *conf, const string &fn, 
 			 const string& htext,
 			 const string &mtype, Rcl::Doc &docout);
 };
--- a/src/internfile/mh_mail.cpp
+++ b/src/internfile/mh_mail.cpp
@ -0,0 +1,178 @@
+#ifndef lint
+static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.1 2005-03-25 09:40:27 dockes Exp $ (C) 2005 J.F.Dockes";
+#endif
+
+#include <fcntl.h>
+#include <errno.h>
+
+#include <map>
+using std::map;
+
+#include "mimehandler.h"
+#include "debuglog.h"
+#include "csguess.h"
+#include "readfile.h"
+#include "transcode.h"
+#include "mimeparse.h"
+#include "indextext.h"
+#include "mail.h"
+#include "debuglog.h"
+#include "smallut.h"
+#include "mimeparse.h"
+
+using namespace std;
+
+// We are called for two different file types: mbox-type folders
+// holding multiple messages, and maildir-type files with one rfc822
+// message
+MimeHandler::Status 
+MimeHandlerMail::worker(RclConfig *cnf, const string &fn, 
+			const string &mtype, Rcl::Doc &docout, string&)
+{
+    LOGDEB(("MimeHandlerMail::worker: %s [%s]\n", mtype.c_str(), fn.c_str()));
+    conf = cnf;
+
+    if (!stringlowercmp("message/rfc822", mtype)) {
+	return processone(fn, docout);
+    } else  if (!stringlowercmp("text/x-mail", mtype)) {
+	return MimeHandler::MHError;
+    } else
+	return MimeHandler::MHError;
+}
+
+
+#include "mime.h"
+
+const char *hnames[] = {"Subject", "Content-type"};
+int nh = sizeof(hnames) / sizeof(char *);
+
+void walkmime(string &out, Binc::MimePart& doc, int fd, int depth);
+
+// Transform a single message into a document. The subject becomes the
+// title, and any simple body part with a content-type of text or html
+// and content-disposition inline gets concatenated as text.
+MimeHandler::Status 
+MimeHandlerMail::processone(const string &fn, Rcl::Doc &docout)
+{
+    int fd;
+    if ((fd = open(fn.c_str(), 0)) < 0) {
+	LOGERR(("MimeHandlerMail::processone: open(%s) errno %d\n",
+		fn.c_str(), errno));
+	return MimeHandler::MHError;
+    }
+    Binc::MimeDocument doc;
+    doc.parseFull(fd);
+
+    if (!doc.isHeaderParsed() && !doc.isAllParsed()) {
+	LOGERR(("MimeHandlerMail::processone: parse error for %s\n", 
+		fn.c_str()));
+	close(fd);
+	return MimeHandler::MHError;
+    }
+    LOGDEB(("MimeHandlerMail::processone: ismultipart %d mime subtype '%s'\n", 
+	    doc.isMultipart(), doc.getSubType().c_str()));
+    walkmime(docout.text, doc, fd, 0);
+    close(fd);
+    LOGDEB(("MimeHandlerMail::processone: text: '%s'\n",  docout.text.c_str()));
+    return MimeHandler::MHError;
+}
+
+void walkmime(string &out, Binc::MimePart& doc, int fd, int depth)
+{
+    if (depth > 5) {
+	LOGINFO(("walkmime: max depth exceeded\n"));
+	return;
+    }
+
+    if (doc.isMultipart()) {
+	LOGDEB(("walkmime: ismultipart %d subtype '%s'\n", 
+		doc.isMultipart(), doc.getSubType().c_str()));
+	// We only handle alternative and mixed for now. For
+	// alternative, we look for a text/plain part, else html and process it
+	// For mixed, we process each part.
+	std::vector<Binc::MimePart>::iterator it;
+	if (!stringicmp("mixed", doc.getSubType())) {
+	    for (it = doc.members.begin(); it != doc.members.end();it++) {
+		walkmime(out, *it, fd, depth+1);
+	    }
+	} else if (!stringicmp("alternative", doc.getSubType())) {
+	    std::vector<Binc::MimePart>::iterator ittxt, ithtml;
+	    ittxt = ithtml = doc.members.end();
+	    for (it = doc.members.begin(); it != doc.members.end();it++) {
+		// Get and parse content-type header
+		Binc::HeaderItem hi;
+		if (!doc.h.getFirstHeader("Content-Type", hi)) 
+		    continue;
+		LOGDEB(("walkmime:content-type: %s\n", hi.getValue().c_str()));
+	    }
+	}
+    } else {
+	// If content-type is text or html and content-disposition is inline, 
+	// decode and add to text.
+
+	// Get and parse content-type header.
+	Binc::HeaderItem hi;
+	string ctt = "text/plain";
+	if (doc.h.getFirstHeader("Content-Type", hi)) {
+	    ctt = hi.getValue();
+	}
+	LOGDEB(("walkmime:content-type: %s\n", ctt.c_str()));
+	MimeHeaderValue content_type;
+	parseMimeHeaderValue(ctt, content_type);
+	if (stringlowercmp("text/plain", content_type.value) && 
+	    stringlowercmp("text/html", content_type.value)) {
+	    return;
+	}
+	string charset = "us-ascii";
+	map<string,string>::const_iterator it;
+	it = content_type.params.find(string("charset"));
+	if (it != content_type.params.end())
+	    charset = it->second;
+
+	// Content disposition
+	string ctd = "inline";
+	if (doc.h.getFirstHeader("Content-Disposition", hi)) {
+	    ctd = hi.getValue();
+	}
+	MimeHeaderValue content_disposition;
+	parseMimeHeaderValue(ctd, content_disposition);
+	if (stringlowercmp("inline", content_disposition.value)) {
+	    return;
+	}
+
+	// Content transfer encoding
+	string cte = "7bit";
+	if (doc.h.getFirstHeader("Content-Transfer-Encoding", hi)) {
+	    cte = hi.getValue();
+	} 
+
+	LOGDEB(("walkmime: final: body start offset %d, length %d\n", 
+		doc.getBodyStartOffset(), doc.getBodyLength()));
+	string body;
+	doc.getBody(fd, body, 0, doc.bodylength);
+
+	// Decode content transfer encoding
+	if (stringlowercmp("quoted-printable", content_disposition.value)) {
+	    string decoded;
+	    qp_decode(body, decoded);
+	    body = decoded;
+	} else if (stringlowercmp("base64", content_disposition.value)) {
+	    string decoded;
+	    base64_decode(body, decoded);
+	    body = decoded;
+	}
+
+
+        // Transcode to utf-8 
+	string transcoded;
+	if (!transcode(body, transcoded, charset, "UTF-8")) {
+	    LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n",
+		    charset.c_str()));
+	    transcoded = body;
+	}
+
+	out += string("\r\n") + transcoded;
+    }
+}
+
+
--- a/src/internfile/mh_mail.h
+++ b/src/internfile/mh_mail.h
@ -0,0 +1,16 @@
+#ifndef _MAIL_H_INCLUDED_
+#define _MAIL_H_INCLUDED_
+/* @(#$Id: mh_mail.h,v 1.1 2005-03-25 09:40:27 dockes Exp $  (C) 2004 J.F.Dockes */
+#include "mimehandler.h"
+
+// Code to turn a mail folder file into internal documents
+class MimeHandlerMail : public MimeHandler {
+    RclConfig *conf;
+    MimeHandler::Status processone(const string &fn, Rcl::Doc &docout);
+ public:
+    MimeHandlerMail() : conf(0) {}
+    virtual MimeHandler::Status 
+	worker(RclConfig *conf, const string &fn, 
+	       const string &mtype, Rcl::Doc &docout, string& ipath);
+};
+#endif /* _MAIL_H_INCLUDED_ */
--- a/src/internfile/mimehandler.cpp
+++ b/src/internfile/mimehandler.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.8 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.9 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif

 #include <iostream>
@ -13,23 +13,24 @@ using namespace std;
 #include "debuglog.h"
 #include "smallut.h"
 #include "html.h"
+#include "mail.h"
 #include "execmd.h"
 #include "pathut.h"

 class MimeHandlerText : public MimeHandler {
 public:
-    bool worker(RclConfig *conf, const string &fn, 
-		const string &mtype, Rcl::Doc &docout);
+    MimeHandler::Status worker(RclConfig *conf, const string &fn, 
+		const string &mtype, Rcl::Doc &docout, string&);
    
 };

 // Process a plain text file
-bool MimeHandlerText::worker(RclConfig *conf, const string &fn, 
-			     const string &mtype, Rcl::Doc &docout)
+MimeHandler::Status MimeHandlerText::worker(RclConfig *conf, const string &fn, 
+			     const string &mtype, Rcl::Doc &docout, string&)
 {
    string otext;
    if (!file_to_string(fn, otext))
-	return false;
+	return MimeHandler::MHError;
 	
    // Try to guess charset, then convert to utf-8, and fill document
    // fields The charset guesser really doesnt work well in general
@ -46,36 +47,38 @@ bool MimeHandlerText::worker(RclConfig *conf, const string &fn,
 	cerr << "textPlainToDoc: transcode failed: charset '" << charset
 	     << "' to UTF-8: "<< utf8 << endl;
 	otext.erase();
-	return 0;
+	return MimeHandler::MHError;
    }

    Rcl::Doc out;
    out.origcharset = charset;
    out.text = utf8;
    docout = out;
-    return true;
+    return MimeHandler::MHDone;
 }

 class MimeHandlerExec : public MimeHandler {
 public:
    list<string> params;
    virtual ~MimeHandlerExec() {}
-    virtual bool worker(RclConfig *conf, const string &fn, 
-			const string &mtype, Rcl::Doc &docout);
+    virtual MimeHandler::Status worker(RclConfig *conf, const string &fn, 
+				       const string &mtype, Rcl::Doc &docout, 
+				       string&);

 };

    
 // Execute an external program to translate a file from its native format
 // to html. Then call the html parser to do the actual indexing
-bool MimeHandlerExec::worker(RclConfig *conf, const string &fn, 
-			     const string &mtype, Rcl::Doc &docout)
+MimeHandler::Status 
+MimeHandlerExec::worker(RclConfig *conf, const string &fn, 
+			const string &mtype, Rcl::Doc &docout, string&)
 {
    if (params.empty()) {
 	// Hu ho
 	LOGERR(("MimeHandlerExec::worker: empty params for mime %s\n",
 		mtype.c_str()));
-	return false;
+	return MimeHandler::MHError;
    }
    // Command name
    string cmd = find_filter(conf, params.front());
@ -92,7 +95,7 @@ bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
    if (status) {
 	LOGERR(("MimeHandlerExec: command status 0x%x: %s\n", 
 		status, cmd.c_str()));
-	return false;
+	return MimeHandler::MHError;
    }

    // Process/index  the html
@ -106,6 +109,10 @@ static MimeHandler *mhfact(const string &mime)
 	return new MimeHandlerText;
    else if (!stringlowercmp("text/html", mime))
 	return new MimeHandlerHtml;
+    else if (!stringlowercmp("text/x-mail", mime))
+	return new MimeHandlerMail;
+    else if (!stringlowercmp("message/rfc822", mime))
+	return new MimeHandlerMail;
    return 0;
 }

@ -117,7 +124,7 @@ MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
    // Return handler definition for mime type
    string hs;
    if (!mhandlers->get(mtype, hs, "index")) {
-	LOGDEB(("getMimeHandler: no handler for %s\n", mtype.c_str()));
+	LOGDEB(("getMimeHandler: no handler for '%s'\n", mtype.c_str()));
 	return 0;
    }

--- a/src/internfile/mimehandler.h
+++ b/src/internfile/mimehandler.h
@ -1,6 +1,6 @@
 #ifndef _MIMEHANDLER_H_INCLUDED_
 #define _MIMEHANDLER_H_INCLUDED_
-/* @(#$Id: mimehandler.h,v 1.5 2005-02-04 09:39:44 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: mimehandler.h,v 1.6 2005-03-25 09:40:27 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>
 #include <list>
@ -10,13 +10,34 @@


 /**
- * Document interner class. We sometimes have data to pass to an interner
+ * Document interner class. 
 */
 class MimeHandler {
 public:
    virtual ~MimeHandler() {}
-    virtual bool worker(RclConfig *, const std::string &filename, 
-			const std::string &mimetype, Rcl::Doc& outdoc) = 0;
+
+    /**
+     * Transform external data into internal utf8 document
+     *
+     * @param conf the global configuration
+     * @param filename File from which the data comes from
+     * @param mimetype its mime type (from the mimemap configuration file)
+     * @param outdoc   The output document
+     * @param ipath the access path for the document inside the file. 
+     *              For mono-document file types, this will always be empty. 
+     *              It is used, for example for mbox files which may contain
+     *              multiple emails. If this is not empty in input, then the
+     *              caller is requesting a single document (ie: for display).
+     *              If this is empty (during indexation), it will be filled-up
+     *              by the function, and all the file's documents will be 
+     *              returned by successive calls.
+     * @return the return value indicates if there are more documents to be 
+     *         fetched from the same file.
+     */
+    enum Status {MHError, MHDone, MHAgain};
+    virtual Status worker(RclConfig * conf, const std::string &filename, 
+			  const std::string &mimetype, Rcl::Doc& outdoc,
+			  string& ipath) = 0;
 };

 /**
@ -30,6 +51,11 @@ extern MimeHandler *getMimeHandler(const std::string &mtyp, ConfTree *mhdlers);
 */
 extern std::string getMimeViewer(const std::string &mtyp, ConfTree *mhandlers);

+/** 
+ * Return command to uncompress the given type. The returned command has
+ * substitutable places for input file name and temp dir name, and will
+ * return output name
+ */
 bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
 		     std::list<std::string>& cmd);

--- a/src/internfile/myhtmlparse.cpp
+++ b/src/internfile/myhtmlparse.cpp
@ -150,7 +150,8 @@ MyHtmlParser::opening_tag(const string &tag, const map<string,string> &p)
 			lowercase_term(hequiv);
 			if (hequiv == "content-type") {
 			    string value = i->second;
-			    MimeHeaderValue p = parseMimeHeaderValue(value);
+			    MimeHeaderValue p;
+			    parseMimeHeaderValue(value, p);
 			    map<string, string>::const_iterator k;
 			    if ((k = p.params.find("charset")) != 
 				p.params.end()) {
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@ -8,14 +8,15 @@ all: $(LIBS)

 OBJS = conftree.o csguess.o debuglog.o \
     execmd.o wipedir.o \
-     fstreewalk.o html.o htmlparse.o indexer.o internfile.o \
+     fstreewalk.o html.o mail.o htmlparse.o indexer.o internfile.o \
     mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
     rclconfig.o rcldb.o readfile.o smallut.o \
     textsplit.o transcode.o \
     unacpp.o unac.o
 SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
     ../utils/execmd.cpp ../utils/wipedir.cpp \
-     ../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
+     ../utils/fstreewalk.cpp ../common/html.cpp ../common/mail.cpp \
+     ../common/htmlparse.cpp \
     ../index/indexer.cpp ../common/internfile.cpp \
     ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
     ../common/myhtmlparse.cpp ../utils/pathut.cpp \
@ -51,6 +52,8 @@ indexer.o : ../index/indexer.cpp
 	$(CXX) $(CXXFLAGS) -c $<
 internfile.o : ../common/internfile.cpp 
 	$(CXX) $(CXXFLAGS) -c $<
+mail.o : ../common/mail.cpp 
+	$(CXX) $(CXXFLAGS) -c $<
 mimehandler.o : ../common/mimehandler.cpp 
 	$(CXX) $(CXXFLAGS) -c $<
 mimeparse.o : ../utils/mimeparse.cpp 
--- a/src/mk/FreeBSD
+++ b/src/mk/FreeBSD
@ -1,5 +1,5 @@
 CXXFLAGS = -pthread -Wall -g -I. -I../index -I../utils -I../common \
-	 -I../unac -I/usr/local/include
+	 -I../unac -I../bincimapmime -I/usr/local/include 

 LIBXAPIAN = -L/usr/local/lib -lxapian
 LIBICONV =  -L/usr/local/lib -liconv
--- a/src/qtgui/recoll.pro
+++ b/src/qtgui/recoll.pro
@ -23,7 +23,7 @@ unix {
  UI_DIR = .ui
  MOC_DIR = .moc
  OBJECTS_DIR = .obj
-  LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv
+  LIBS += ../lib/librcl.a ../bincimapmime/libmime.a -L/usr/local/lib -lxapian -liconv
  INCLUDEPATH += ../common ../index ../query ../unac ../utils 
 }

--- a/src/qtgui/recollmain.ui.h
+++ b/src/qtgui/recollmain.ui.h
@ -197,7 +197,8 @@ void RecollMain::reslistTE_clicked(int par, int car)
    // for preview:
    string fn = urltolocalpath(doc.url);
    Rcl::Doc fdoc;
-    if (!internfile(fn, rclconfig, fdoc, tmpdir)) {
+    FileInterner interner(fn, rclconfig, tmpdir);
+    if (interner.internfile(fdoc, doc.ipath) != FileInterner::FIDone) {
 	QMessageBox::warning(0, "Recoll",
 			     QString("Can't turn doc into internal rep ") +
 			     doc.mimetype.c_str());
--- a/src/rcldb/rcldb.h
+++ b/src/rcldb/rcldb.h
@ -1,6 +1,6 @@
 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.12 2005-02-10 15:21:12 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.13 2005-03-25 09:40:27 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>
 #include <list>
@ -31,8 +31,9 @@ namespace Rcl {
 */
 class Doc {
 public:
-    // This fields potentially go into the document data record
+    // These fields potentially go into the document data record
    string url;
+    string ipath;
    string mimetype;
    string mtime;       // Modification time as decimal ascii
    string origcharset;
@ -41,8 +42,10 @@ class Doc {
    string abstract;

    string text;
+
    void erase() {
 	url.erase();
+	ipath.erase();
 	mimetype.erase();
 	mtime.erase();
 	origcharset.erase();
--- a/src/sampleconf/mimeconf
+++ b/src/sampleconf/mimeconf
@ -1,4 +1,4 @@
-# @(#$Id: mimeconf,v 1.4 2005-03-17 14:02:05 dockes Exp $  (C) 2004 J.F.Dockes
+# @(#$Id: mimeconf,v 1.5 2005-03-25 09:40:28 dockes Exp $  (C) 2004 J.F.Dockes

 # Recoll : associations of mime types to processing filters.
 # There are different sections for decompression, 'interning' for indexing
@ -27,6 +27,9 @@ application/x-bzip2 =  uncompress rcluncomp bunzip2 %f %t
 [index]
 text/plain = internal 
 text/html  = internal 
+text/x-mail = internal
+message/rfc822 = internal
+
 application/pdf = exec rclpdf
 application/postscript = exec rclps
 application/msword = exec rcldoc
@ -46,7 +49,7 @@ application/vnd.sun.xml.writer.template = exec rclsoff
 # External viewers, launched when you double-click a result entry
 [view]
 text/plain = xemacs %f
-text/html = firefox -a firefox -remote "openFile(%u)"
+text/html = firefox -remote "openFile(%u)"
 application/pdf  = xpdf %f
 application/postscript = gv %f
 application/msword = openoffice-1.1.3-swriter %f
--- a/src/utils/mimeparse.cpp
+++ b/src/utils/mimeparse.cpp
@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.2 2005-03-17 14:02:06 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.3 2005-03-25 09:40:28 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif

 #ifndef TEST_MIMEPARSE
@ -7,71 +7,345 @@ static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.2 2005-03-17 14:02:06 dockes Ex
 #include <string>
 #include <ctype.h>
 #include <stdio.h>
+#include <ctype.h>

 #include "mimeparse.h"

 using namespace std;
-#define WHITE " \t\n"

-static void stripw_lc(string &in)
+// Parsing a header value. Only content-type has parameters, but
+// others are compatible with content-type syntax, only, parameters
+// are not used. So we can parse all like content-type:
+//    headertype: value [; paramname=paramvalue] ...
+// Value and paramvalues can be quoted strings, and there can be
+// comments in there
+
+
+
+// The lexical token returned by find_next_token
+class Lexical {
+ public:
+    enum kind {none, token, separator};
+    kind   what;
+    string value;
+    string error;
+    char quote;
+    Lexical() : what(none), quote(0) {}
+    void reset() {what = none; value.erase(); error.erase();quote = 0;}
+};
+
+// Skip mime comment. This must be called with in[start] == '('
+int skip_comment(const string &in, unsigned int start, Lexical &lex)
 {
-    // fprintf(stderr, "In: '%s'\n", in.c_str());
-    string::size_type pos, pos1;
-    pos = in.find_first_not_of(WHITE);
-    if (pos == string::npos) {
-	// All white
-	in = "";
-	return;
+    int commentlevel = 0;
+    for (; start < in.size(); start++) {
+	if (in[start] == '\\') {
+	    // Skip escaped char. 
+	    if (start+1 < in.size()) {
+		start++;
+		continue;
+	    } else {
+		lex.error.append("\\ at end of string ");
+		return string::npos;
+	    }
+	}
+	if (in[start] == '(')
+	    commentlevel++;
+	if (in[start] == ')') {
+	    if (--commentlevel == 0)
+		break;
+	}
    }
-    in.replace(0, pos, "");
-    pos1 = in.find_last_not_of(WHITE); 
-    if (pos1 != in.length() -1)
-	in  = in.replace(pos1+1, string::npos, "");
-    string::iterator i;
-    for (i = in.begin(); i != in.end(); i++)
-	*i = tolower(*i);
+    if (start == in.size()) {
+	lex.error.append("Unclosed comment ");
+	return string::npos;
+    }
+    return start;
 }

-MimeHeaderValue parseMimeHeaderValue(const string &ein)
+// Skip initial whitespace and (possibly nested) comments. 
+int skip_whitespace_and_comment(const string &in, unsigned int start, 
+				Lexical &lex)
 {
-    string in = ein;
-    MimeHeaderValue out;
-    string::size_type pos;
+    while (1) {
+	if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
+	    return in.size();
+	if (in[start] == '(') {
+	    if ((start = skip_comment(in, start, lex)) == string::npos)
+		return string::npos;
+	} else {
+	    break;
+	}
+    }
+    return start;
+}

-    pos = in.find_first_not_of(WHITE);
-    if (pos == string::npos)
-	return out;
-    in = in.substr(pos, string::npos);
-    if ((pos = in.find_first_of(";")) == string::npos) {
-	out.value = in;
-	return out;
-    } 
-    out.value = in.substr(0, pos);
-    stripw_lc(out.value);
-    in = in.substr(pos+1, string::npos);
-    for (;;) {
-	// Skip whitespace
-	if ((pos = in.find_first_not_of(WHITE)) == string::npos)
-	    return out;
-	in = in.substr(pos, string::npos);
+/// Find next token in mime header value string. 
+/// @return the next starting position in string, string::npos for error 
+///   (ie unbalanced quoting)
+/// @param in the input string
+/// @param start the starting position
+/// @param lex  the returned token and its description
+/// @param delims separators we should look for
+int find_next_token(const string &in, unsigned int start, 
+		    Lexical &lex, string delims = ";=")
+{
+    char oquot, cquot;

-	if ((pos = in.find_first_of("=")) == string::npos)
-	    return out;
-	string pname = in.substr(0, pos);
-	stripw_lc(pname);
-	in = in.substr(pos+1, string::npos);
+    start = skip_whitespace_and_comment(in, start, lex);
+    if (start == string::npos || start == in.size())
+	return start;

-	pos = in.find_first_of(";");
-	string pvalue = in.substr(0, pos);
-	stripw_lc(pvalue);
-	out.params[pname] = pvalue;
-	if (pos == string::npos)
-	    return out;
-	in = in.substr(pos+1, string::npos);
+    // Begins with separator ? return it.
+    unsigned int delimi = delims.find_first_of(in[start]);
+    if (delimi != string::npos) {
+	lex.what = Lexical::separator;
+	lex.value = delims[delimi];
+	return start+1;
    }

-    return out;
+    // Check for start of quoted string
+    oquot = in[start];
+    switch (oquot) {
+    case '<': cquot = '>';break;
+    case '"': cquot = '"';break;
+    default: cquot = 0; break;
+    }

+    if (cquot != 0) {
+	// Quoted string parsing
+	unsigned int end;
+	start++; // Skip quote character
+	for (end = start;end < in.size() && in[end] != cquot; end++) {
+	    if (in[end] == '\\') {
+		// Skip escaped char. 
+		if (end+1 < in.size()) {
+		    end++;
+		} else {
+		    // backslash at end of string: error
+		    lex.error.append("\\ at end of string ");
+		    return string::npos;
+		}
+	    }
+	}
+	if (end == in.size()) {
+	    // Found end of string before closing quote character: error
+	    lex.error.append("Unclosed quoted string ");
+	    return string::npos;
+	}
+	lex.what = Lexical::token;
+	lex.value = in.substr(start, end-start);
+	lex.quote = oquot;
+	return ++end;
+    } else {
+	unsigned int end = in.find_first_of(delims + " \t(", start);
+	lex.what = Lexical::token;
+	lex.quote = 0;
+	if (end == string::npos) {
+	    end = in.size();
+	    lex.value = in.substr(start);
+	} else {
+	    lex.value = in.substr(start, end-start);
+	}
+	return end;
+    }
+}
+
+void stringtolower(string &out, const string& in)
+{
+    for (unsigned int i = 0; i < in.size(); i++)
+	out.append(1, char(tolower(in[i])));
+}
+
+bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
+{
+    parsed.value.erase();
+    parsed.params.clear();
+
+    Lexical lex;
+    unsigned int start = 0;
+    start = find_next_token(value, start, lex);
+    if (start == string::npos || lex.what != Lexical::token) 
+	return false;
+
+    parsed.value = lex.value;
+
+    for (;;) {
+	string paramname, paramvalue;
+	lex.reset();
+	start = find_next_token(value, start, lex);
+	if (start == value.size())
+	    return true;
+	if (start == string::npos)
+	    return false;
+	if (lex.what == Lexical::separator && lex.value[0] == ';')
+	    continue;
+	if (lex.what != Lexical::token) 
+	    return false;
+	stringtolower(paramname, lex.value);
+
+	start = find_next_token(value, start, lex);
+	if (start == string::npos || lex.what != Lexical::separator || 
+	    lex.value[0] != '=') 
+	    return false;
+
+	start = find_next_token(value, start, lex);
+	if (start == string::npos || lex.what != Lexical::token)
+	    return false;
+	paramvalue = lex.value;
+	parsed.params[paramname] = paramvalue;
+    }
+    return true;
+}
+
+// Decode a string encoded with quoted-printable encoding. 
+bool qp_decode(const string& in, string &out) 
+{
+    out.reserve(in.length());
+    unsigned int ii;
+    for (ii = 0; ii < in.length(); ii++) {
+	if (in[ii] == '=') {
+	    ii++; // Skip '='
+	    if(ii >= in.length() - 1) { // Need at least 2 more chars
+		break;
+	    } else if (in[ii] == '\r' && in[ii+1] == '\n') { // Soft nl, skip
+		ii++;
+	    } else if (in[ii] != '\n' && in[ii] != '\r') { // decode
+		char c = in[ii];
+		char co;
+		if(c >= 'A' && c <= 'F') {
+		    co = char((c - 'A' + 10) * 16);
+		} else if (c >= 'a' && c <= 'f') {
+		    co = char((c - 'a' + 10) * 16);
+		} else if (c >= '0' && c <= '9') {
+		    co = char((c - '0') * 16);
+		} else {
+		    return false;
+		}
+		if(++ii >= in.length()) 
+		    break;
+		c = in[ii];
+		if (c >= 'A' && c <= 'F') {
+		    co += char(c - 'A' + 10);
+		} else if (c >= 'a' && c <= 'f') {
+		    co += char(c - 'a' + 10);
+		} else if (c >= '0' && c <= '9') {
+		    co += char(c - '0');
+		} else {
+		    return false;
+		}
+		out += co;
+	    }
+	} else {
+	    out += in[ii];
+	}
+    }
+    return true;
+}
+
+
+// This is adapted from FreeBSD's code.
+static const char Base64[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char Pad64 = '=';
+bool base64_decode(const string& in, string& out)
+{
+    int io = 0, state = 0, ch;
+    char *pos;
+    unsigned int ii = 0;
+    out.reserve(in.length());
+
+    for (ii = 0; ii < in.length(); ii++) {
+	ch = in[ii];
+	if (isspace((unsigned char)ch))        /* Skip whitespace anywhere. */
+	    continue;
+
+	if (ch == Pad64)
+	    break;
+
+	pos = strchr(Base64, ch);
+	if (pos == 0) 		/* A non-base64 character. */
+	    return false;
+
+	switch (state) {
+	case 0:
+	    out[io] = (pos - Base64) << 2;
+	    state = 1;
+	    break;
+	case 1:
+	    out[io]   |=  (pos - Base64) >> 4;
+	    out[io+1]  = ((pos - Base64) & 0x0f) << 4 ;
+	    io++;
+	    state = 2;
+	    break;
+	case 2:
+	    out[io]   |=  (pos - Base64) >> 2;
+	    out[io+1]  = ((pos - Base64) & 0x03) << 6;
+	    io++;
+	    state = 3;
+	    break;
+	case 3:
+	    out[io] |= (pos - Base64);
+	    io++;
+	    state = 0;
+	    break;
+	default:
+	    return false;
+	}
+    }
+
+    /*
+     * We are done decoding Base-64 chars.  Let's see if we ended
+     * on a byte boundary, and/or with erroneous trailing characters.
+     */
+
+    if (ch == Pad64) {		/* We got a pad char. */
+	ch = in[ii++];		/* Skip it, get next. */
+	switch (state) {
+	case 0:		/* Invalid = in first position */
+	case 1:		/* Invalid = in second position */
+	    return false;
+
+	case 2:		/* Valid, means one byte of info */
+			/* Skip any number of spaces. */
+	    for (; ii < in.length(); ch = in[ii++])
+		if (!isspace((unsigned char)ch))
+		    break;
+	    /* Make sure there is another trailing = sign. */
+	    if (ch != Pad64)
+		return false;
+	    ch = in[ii++];		/* Skip the = */
+	    /* Fall through to "single trailing =" case. */
+	    /* FALLTHROUGH */
+
+	case 3:		/* Valid, means two bytes of info */
+			/*
+			 * We know this char is an =.  Is there anything but
+			 * whitespace after it?
+			 */
+	    for ((void)NULL; ii < in.length(); ch = in[ii++])
+		if (!isspace((unsigned char)ch))
+		    return false;
+
+	    /*
+	     * Now make sure for cases 2 and 3 that the "extra"
+	     * bits that slopped past the last full byte were
+	     * zeros.  If we don't check them, they become a
+	     * subliminal channel.
+	     */
+	    if (out[io] != 0)
+		return false;
+	}
+    } else {
+	/*
+	 * We ended by seeing the end of the string.  Make sure we
+	 * have no partial bytes lying around.
+	 */
+	if (state != 0)
+	    return false;
+    }
+
+    return true;
 }

 #else 
@ -82,19 +356,47 @@ using namespace std;
 int
 main(int argc, const char **argv)
 {
+#if 0
+    //    const char *tr = "text/html; charset=utf-8; otherparam=garb";
+    const char *tr = "text/html;charset = UTF-8 ; otherparam=garb; \n"
+	"QUOTEDPARAM=\"quoted value\"";

    MimeHeaderValue parsed;

-    //    const char *tr = "text/html; charset=utf-8; otherparam=garb";
-    const char *tr = "text/html;charset = UTF-8 ; otherparam=garb;";
-
-    parsed = parseMimeHeaderValue(tr);
+    if (!parseMimeHeaderValue(tr, parsed)) {
+	fprintf(stderr, "PARSE ERROR\n");
+    }
    
    printf("'%s' \n", parsed.value.c_str());
    map<string, string>::iterator it;
    for (it = parsed.params.begin();it != parsed.params.end();it++) {
 	printf("  '%s' = '%s'\n", it->first.c_str(), it->second.c_str());
    }
+#elif 0
+    const char *qp = "=41=68 =e0 boire=\r\n continue 1ere\ndeuxieme\n\r3eme "
+	"agrave is: '=E0' probable skipped decode error: =\n"
+	"Actual decode error =xx this wont show";
+
+    string out;
+    if (!qp_decode(string(qp), out)) {
+	fprintf(stderr, "qp_decode returned error\n");
+    }
+    printf("Decoded: '%s'\n", out.c_str());
+#else
+    //'C'est à boire qu'il nous faut éviter l'excès.'
+    //'Deuxième ligne'
+    //'Troisième ligne'
+    //'Et la fin (pas de nl). '
+    const char *b64 = 
+ "Qydlc3Qg4CBib2lyZSBxdSdpbCBub3VzIGZhdXQg6XZpdGVyIGwnZXhj6HMuCkRldXhp6G1l\r\n"
+	"IGxpZ25lClRyb2lzaehtZSBsaWduZQpFdCBsYSBmaW4gKHBhcyBkZSBubCkuIA==\r\n";
+
+    string out;
+    if (!base64_decode(string(b64), out)) {
+	fprintf(stderr, "base64_decode returned error\n");
+    }
+    printf("Decoded: '%s'\n", out.c_str());
+#endif
 }

 #endif // TEST_MIMEPARSE
--- a/src/utils/mimeparse.h
+++ b/src/utils/mimeparse.h
@ -1,6 +1,6 @@
 #ifndef _MIME_H_INCLUDED_
 #define _MIME_H_INCLUDED_
-/* @(#$Id: mimeparse.h,v 1.1 2005-01-26 11:45:55 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: mimeparse.h,v 1.2 2005-03-25 09:40:28 dockes Exp $  (C) 2004 J.F.Dockes */

 #include <string>
 #include <map>
@ -11,7 +11,9 @@ class MimeHeaderValue {
    std::string value;
    std::map<std::string, std::string> params;
 };
-extern MimeHeaderValue parseMimeHeaderValue(const std::string &in);
+extern bool parseMimeHeaderValue(const std::string& in, MimeHeaderValue& psd);

+bool qp_decode(const std::string& in, std::string &out);
+bool base64_decode(const std::string& in, std::string &out);

 #endif /* _MIME_H_INCLUDED_ */