recoll/src/utils/mimeparse.cpp

/* Copyright (C) 2004 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#ifndef TEST_MIMEPARSE
#include "autoconfig.h"

#include <string>
#include <vector>

#include <ctype.h>
#include <stdio.h>
#include <ctype.h>
#include <time.h>
#include <cstdlib>
#include <cstring>

#include "mimeparse.h"
#include "base64.h"
#include "transcode.h"
#include "smallut.h"

using namespace std;

//#define DEBUG_MIMEPARSE
#ifdef DEBUG_MIMEPARSE
#define DPRINT(X) fprintf X
#else
#define DPRINT(X)
#endif

// Parsing a header value. Only content-type and content-disposition
// have parameters, but others are compatible with content-type
// syntax, only, parameters are not used. So we can parse all like:
//
//    headertype: value [; paramname=paramvalue] ...
//
// Value and paramvalues can be quoted strings, and there can be
// comments too. Note that RFC2047 is explicitly forbidden for
// parameter values (RFC2231 must be used), but I have seen it used
// anyway (ie: thunderbird 1.0)
//
// Ref: RFC2045/6/7 (MIME) RFC2183/2231 (content-disposition and encodings)


/** Decode a MIME parameter value encoded according to rfc2231
 *
 * Example input withs input charset == "":
 *     [iso-8859-1'french'RE%A0%3A_Smoke_Tests%20bla]
 * Or (if charset is set) : RE%A0%3A_Smoke_Tests%20bla
 *
 * @param in input string, ascii with rfc2231 markup
 * @param out output string
 * @param charset if empty: decode string like 'charset'lang'more%20stuff,
 *      else just do the %XX part
 * @return out output string encoded in utf-8
 */
bool rfc2231_decode(const string &in, string &out, string &charset)
{
    string::size_type pos1, pos2=0;

    if (charset.empty()) {
        if ((pos1 = in.find("'")) == string::npos)
            return false;
        charset = in.substr(0, pos1);
        // fprintf(stderr, "Charset: [%s]\n", charset.c_str());
        pos1++;

        if ((pos2 = in.find("'", pos1)) == string::npos)
            return false;
        // We have no use for lang for now
        // string lang = in.substr(pos1, pos2-pos1);
        // fprintf(stderr, "Lang: [%s]\n", lang.c_str());
        pos2++;
    }

    string raw;
    qp_decode(in.substr(pos2), raw, '%');
    // fprintf(stderr, "raw [%s]\n", raw.c_str());
    if (!transcode(raw, out, charset, "UTF-8"))
        return false;
    return true;
}


/////////////////////////////////////////
/// Decoding of MIME fields values and parameters

// The lexical token returned by find_next_token
class Lexical {
public:
    enum kind {none, token, separator};
    kind   what;
    string value;
    string error;
    char quote;
    Lexical() : what(none), quote(0) {}
    void reset() {what = none; value.erase(); error.erase();quote = 0;}
};

// Skip mime comment. This must be called with in[start] == '('
static string::size_type
skip_comment(const string &in, string::size_type start, Lexical &lex)
{
    int commentlevel = 0;
    for (; start < in.size(); start++) {
        if (in[start] == '\\') {
            // Skip escaped char.
            if (start+1 < in.size()) {
                start++;
                continue;
            } else {
                lex.error.append("\\ at end of string ");
                return in.size();
            }
        }
        if (in[start] == '(')
            commentlevel++;
        if (in[start] == ')') {
            if (--commentlevel == 0)
                break;
        }
    }
    if (start == in.size() && commentlevel != 0) {
        lex.error.append("Unclosed comment ");
        return in.size();
    }
    return start;
}

// Skip initial whitespace and (possibly nested) comments.
static string::size_type
skip_whitespace_and_comment(const string &in, string::size_type start,
                            Lexical &lex)
{
    while (1) {
        if ((start = in.find_first_not_of(" \t\r\n", start)) == string::npos)
            return in.size();
        if (in[start] == '(') {
            if ((start = skip_comment(in, start, lex)) == string::npos)
                return string::npos;
        } else {
            break;
        }
    }
    return start;
}

/// Find next token in mime header value string.
/// @return the next starting position in string, string::npos for error
/// @param in the input string
/// @param start the starting position
/// @param lex  the returned token and its description
/// @param delims separators we should look for
static string::size_type
find_next_token(const string &in, string::size_type start,
                Lexical &lex, string delims = ";=")
{
    char oquot, cquot;

    start = skip_whitespace_and_comment(in, start, lex);
    if (start == string::npos || start == in.size())
        return in.size();

    // Begins with separator ? return it.
    string::size_type delimi = delims.find_first_of(in[start]);
    if (delimi != string::npos) {
        lex.what = Lexical::separator;
        lex.value = delims[delimi];
        return start+1;
    }

    // Check for start of quoted string
    oquot = in[start];
    switch (oquot) {
    case '<': cquot = '>';break;
    case '"': cquot = '"';break;
    default: cquot = 0; break;
    }

    if (cquot != 0) {
        // Quoted string parsing
        string::size_type end;
        start++; // Skip quote character
        for (end = start;end < in.size() && in[end] != cquot; end++) {
            if (in[end] == '\\') {
                // Skip escaped char.
                if (end+1 < in.size()) {
                    end++;
                } else {
                    // backslash at end of string: error
                    lex.error.append("\\ at end of string ");
                    return string::npos;
                }
            }
        }
        if (end == in.size()) {
            // Found end of string before closing quote character: error
            lex.error.append("Unclosed quoted string ");
            return string::npos;
        }
        lex.what = Lexical::token;
        lex.value = in.substr(start, end-start);
        lex.quote = oquot;
        return ++end;
    } else {
        string::size_type end = in.find_first_of(delims + "\r\n \t(", start);
        lex.what = Lexical::token;
        lex.quote = 0;
        if (end == string::npos) {
            end = in.size();
            lex.value = in.substr(start);
        } else {
            lex.value = in.substr(start, end-start);
        }
        return end;
    }
}

// Classes for handling rfc2231 value continuations
class Chunk {
public:
    Chunk() : decode(false) {}
    bool decode;
    string value;
};
class Chunks {
public:
    vector<Chunk> chunks;
};

void stringtolower(string &out, const string& in)
{
    for (string::size_type i = 0; i < in.size(); i++)
        out.append(1, char(tolower(in[i])));
}

// Parse MIME field value. Should look like:
//  somevalue ; param1=val1;param2=val2
bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
{
    parsed.value.erase();
    parsed.params.clear();

    Lexical lex;
    string::size_type start = 0;

    // Get the field value
    start = find_next_token(value, start, lex);
    if (start == string::npos || lex.what != Lexical::token)
        return false;
    parsed.value = lex.value;

    map<string, string> rawparams;
    // Look for parameters
    for (;;) {
        string paramname, paramvalue;
        lex.reset();
        start = find_next_token(value, start, lex);
        if (start == value.size())
            break;
        if (start == string::npos) {
            //fprintf(stderr, "Find_next_token error(1)\n");
            return false;
        }
        if (lex.what == Lexical::separator && lex.value[0] == ';')
            continue;
        if (lex.what != Lexical::token)
            return false;
        stringtolower(paramname, lex.value);

        start = find_next_token(value, start, lex);
        if (start == string::npos || lex.what != Lexical::separator ||
            lex.value[0] != '=') {
            //fprintf(stderr, "Find_next_token error (2)\n");
            return false;
        }

        start = find_next_token(value, start, lex);
        if (start == string::npos || lex.what != Lexical::token) {
            //fprintf(stderr, "Parameter has no value!");
            return false;
        }
        paramvalue = lex.value;
        rawparams[paramname] = paramvalue;
        //fprintf(stderr, "RAW: name [%s], value [%s]\n", paramname.c_str(),
        //              paramvalue.c_str());
    }
    //    fprintf(stderr, "Number of raw params %d\n", rawparams.size());

    // RFC2231 handling:
    // - if a parameter name ends in * it must be decoded
    // - If a parameter name looks line name*ii[*] it is a
    //   partial value, and must be concatenated with other such.

    map<string, Chunks> chunks;
    for (map<string, string>::const_iterator it = rawparams.begin();
         it != rawparams.end(); it++) {
        string nm = it->first;
        //      fprintf(stderr, "NM: [%s]\n", nm.c_str());
        if (nm.empty()) // ??
            continue;

        Chunk chunk;
        if (nm[nm.length()-1] == '*') {
            nm.erase(nm.length() - 1);
            chunk.decode = true;
        } else
            chunk.decode = false;
        //      fprintf(stderr, "NM1: [%s]\n", nm.c_str());

        chunk.value = it->second;

        // Look for another asterisk in nm. If none, assign index 0
        string::size_type aster;
        int idx = 0;
        if ((aster = nm.rfind("*")) != string::npos) {
            string num = nm.substr(aster+1);
            //fprintf(stderr, "NUM: [%s]\n", num.c_str());
            nm.erase(aster);
            idx = atoi(num.c_str());
        }
        Chunks empty;
        if (chunks.find(nm) == chunks.end())
            chunks[nm] = empty;
        chunks[nm].chunks.resize(idx+1);
        chunks[nm].chunks[idx] = chunk;
        //fprintf(stderr, "CHNKS: nm [%s], idx %d, decode %d, value [%s]\n",
        // nm.c_str(), idx, int(chunk.decode), chunk.value.c_str());
    }

    // For each parameter name, concatenate its chunks and possibly
    // decode Note that we pass the whole concatenated string to
    // decoding if the first chunk indicates that decoding is needed,
    // which is not right because there might be uncoded chunks
    // according to the rfc.
    for (map<string, Chunks>::const_iterator it = chunks.begin();
         it != chunks.end(); it++) {
        if (it->second.chunks.empty())
            continue;
        string nm = it->first;
        // Create the name entry
        if (parsed.params.find(nm) == parsed.params.end())
            parsed.params[nm].clear();
        // Concatenate all chunks and decode the whole if the first one needs
        // to. Yes, this is not quite right.
        string value;
        for (vector<Chunk>::const_iterator vi = it->second.chunks.begin();
             vi != it->second.chunks.end(); vi++) {
            value += vi->value;
        }
        if (it->second.chunks[0].decode) {
            string charset;
            rfc2231_decode(value, parsed.params[nm], charset);
        } else {
            // rfc2047 MUST NOT but IS used by some agents
            rfc2047_decode(value, parsed.params[nm]);
        }
        //fprintf(stderr, "FINAL: nm [%s], value [%s]\n",
        //nm.c_str(), parsed.params[nm].c_str());
    }

    return true;
}

// Decode a string encoded with quoted-printable encoding.
// we reuse the code for rfc2231 % encoding, even if the eol
// processing is not useful in this case
bool qp_decode(const string& in, string &out, char esc)
{
    out.reserve(in.length());
    string::size_type ii;
    for (ii = 0; ii < in.length(); ii++) {
        if (in[ii] == esc) {
            ii++; // Skip '=' or '%'
            if(ii >= in.length() - 1) { // Need at least 2 more chars
                break;
            } else if (in[ii] == '\r' && in[ii+1] == '\n') { // Soft nl, skip
                ii++;
            } else if (in[ii] != '\n' && in[ii] != '\r') { // decode
                char c = in[ii];
                char co;
                if(c >= 'A' && c <= 'F') {
                    co = char((c - 'A' + 10) * 16);
                } else if (c >= 'a' && c <= 'f') {
                    co = char((c - 'a' + 10) * 16);
                } else if (c >= '0' && c <= '9') {
                    co = char((c - '0') * 16);
                } else {
                    return false;
                }
                if(++ii >= in.length())
                    break;
                c = in[ii];
                if (c >= 'A' && c <= 'F') {
                    co += char(c - 'A' + 10);
                } else if (c >= 'a' && c <= 'f') {
                    co += char(c - 'a' + 10);
                } else if (c >= '0' && c <= '9') {
                    co += char(c - '0');
                } else {
                    return false;
                }
                out += co;
            }
        } else {
            out += in[ii];
        }
    }
    return true;
}

// Decode an word encoded as quoted printable or base 64
static bool rfc2047_decodeParsed(const std::string& charset,
                                 const std::string& encoding,
                                 const std::string& value,
                                 std::string &utf8)
{
    DPRINT((stderr, "DecodeParsed: charset [%s] enc [%s] val [%s]\n",
            charset.c_str(), encoding.c_str(), value.c_str()));
    utf8.clear();

    string decoded;
    if (!stringlowercmp("b", encoding)) {
        if (!base64_decode(value, decoded))
            return false;
        DPRINT((stderr, "FromB64: [%s]\n", decoded.c_str()));
    } else if (!stringlowercmp("q", encoding)) {
        if (!qp_decode(value, decoded))
            return false;
        // Need to translate _ to ' ' here
        string temp;
        for (string::size_type pos = 0; pos < decoded.length(); pos++)
            if (decoded[pos] == '_')
                temp += ' ';
            else
                temp += decoded[pos];
        decoded = temp;
        DPRINT((stderr, "FromQP: [%s]\n", decoded.c_str()));
    } else {
        DPRINT((stderr, "Bad encoding [%s]\n", encoding.c_str()));
        return false;
    }

    if (!transcode(decoded, utf8, charset, "UTF-8")) {
        DPRINT((stderr, "Transcode failed\n"));
        return false;
    }
    return true;
}

// Parse a mail header value encoded according to RFC2047.
// This is not supposed to be used for MIME parameter values, but it
// happens.
// Bugs:
//    - We should turn off decoding while inside quoted strings
//
typedef enum  {rfc2047ready, rfc2047open_eq,
               rfc2047charset, rfc2047encoding,
               rfc2047value, rfc2047close_q} Rfc2047States;

bool rfc2047_decode(const std::string& in, std::string &out)
{
    DPRINT((stderr, "rfc2047_decode: [%s]\n", in.c_str()));

    Rfc2047States state = rfc2047ready;
    string encoding, charset, value, utf8;

    out.clear();

    for (string::size_type ii = 0; ii < in.length(); ii++) {
        char ch = in[ii];
        switch (state) {
        case rfc2047ready:
        {
            DPRINT((stderr, "STATE: ready, ch %c\n", ch));
            switch (ch) {
                // Whitespace: stay ready
            case ' ': case '\t': value += ch;break;
                // '=' -> forward to next state
            case '=': state = rfc2047open_eq; break;
                DPRINT((stderr, "STATE: open_eq\n"));
                // Other: go back to sleep
            default: value += ch; state = rfc2047ready;
            }
        }
        break;
        case rfc2047open_eq:
        {
            DPRINT((stderr, "STATE: open_eq, ch %c\n", ch));
            switch (ch) {
            case '?':
            {
                // Transcode current (unencoded part) value:
                // we sometimes find 8-bit chars in
                // there. Interpret as Iso8859.
                if (value.length() > 0) {
                    transcode(value, utf8, "ISO-8859-1", "UTF-8");
                    out += utf8;
                    value.clear();
                }
                state = rfc2047charset;
            }
            break;
            default: state = rfc2047ready; value += '='; value += ch;break;
            }
        }
        break;
        case rfc2047charset:
        {
            DPRINT((stderr, "STATE: charset, ch %c\n", ch));
            switch (ch) {
            case '?': state = rfc2047encoding; break;
            default: charset += ch; break;
            }
        }
        break;
        case rfc2047encoding:
        {
            DPRINT((stderr, "STATE: encoding, ch %c\n", ch));
            switch (ch) {
            case '?': state = rfc2047value; break;
            default: encoding += ch; break;
            }
        }
        break;
        case rfc2047value:
        {
            DPRINT((stderr, "STATE: value, ch %c\n", ch));
            switch (ch) {
            case '?': state = rfc2047close_q; break;
            default: value += ch;break;
            }
        }
        break;
        case rfc2047close_q:
        {
            DPRINT((stderr, "STATE: close_q, ch %c\n", ch));
            switch (ch) {
            case '=':
            {
                DPRINT((stderr, "End of encoded area. Charset %s, Encoding %s\n", charset.c_str(), encoding.c_str()));
                string utf8;
                state = rfc2047ready;
                if (!rfc2047_decodeParsed(charset, encoding, value,
                                          utf8)) {
                    return false;
                }
                out += utf8;
                charset.clear();
                encoding.clear();
                value.clear();
            }
            break;
            default: state = rfc2047value; value += '?';value += ch;break;
            }
        }
        break;
        default: // ??
            DPRINT((stderr, "STATE: default ?? ch %c\n", ch));
            return false;
        }
    }

    if (value.length() > 0) {
        transcode(value, utf8, "CP1252", "UTF-8");
        out += utf8;
        value.clear();
    }
    if (state != rfc2047ready)
        return false;
    return true;
}

#define DEBUGDATE 0
#if DEBUGDATE
#define DATEDEB(X) fprintf X
#else
#define DATEDEB(X)
#endif

// Convert rfc822 date to unix time. A date string normally looks like:
//  Mon, 3 Jul 2006 09:51:58 +0200
// But there are many close common variations
// And also hopeless things like: Fri Nov  3 13:13:33 2006
time_t rfc2822DateToUxTime(const string& dt)
{
    // Strip everything up to first comma if any, we don't need weekday,
    // then break into tokens
    vector<string> toks;
    string::size_type idx;
    if ((idx = dt.find_first_of(",")) != string::npos) {
        if (idx == dt.length() - 1) {
            DATEDEB((stderr, "Bad rfc822 date format (short1): [%s]\n",
                     dt.c_str()));
            return (time_t)-1;
        }
        string date = dt.substr(idx+1);
        stringToTokens(date, toks, " \t:");
    } else {
        // No comma. Enter strangeland
        stringToTokens(dt, toks, " \t:");
        // Test for date like: Sun Nov 19 06:18:41 2006
        //                      0   1  2   3 4  5  6
        // and change to:      19 Nov 2006 06:18:41
        if (toks.size() == 7) {
            if (toks[0].length() == 3 &&
                toks[0].find_first_of("0123456789") == string::npos) {
                swap(toks[0], toks[2]);
                swap(toks[6], toks[2]);
                toks.pop_back();
            }
        }
    }

#if DEBUGDATE
    for (list<string>::iterator it = toks.begin(); it != toks.end(); it++) {
        DATEDEB((stderr, "[%s] ", it->c_str()));
    }
    DATEDEB((stderr, "\n"));
#endif

    if (toks.size() < 6) {
        DATEDEB((stderr, "Bad rfc822 date format (toks cnt): [%s]\n",
                 dt.c_str()));
        return (time_t)-1;
    }

    if (toks.size() == 6) {
        // Probably no timezone, sometimes happens
        toks.push_back("+0000");
    }

    struct tm tm;
    memset(&tm, 0, sizeof(tm));

    // Load struct tm with appropriate tokens, possibly converting
    // when needed

    vector<string>::iterator it = toks.begin();

    // Day of month: no conversion needed
    tm.tm_mday = atoi(it->c_str());
    it++;

    // Month. Only Jan-Dec are legal. January, February do happen
    // though. Convert to 0-11
    if (*it == "Jan" || *it == "January") tm.tm_mon = 0; else if
        (*it == "Feb" || *it == "February") tm.tm_mon = 1; else if
        (*it == "Mar" || *it == "March") tm.tm_mon = 2; else if
        (*it == "Apr" || *it == "April") tm.tm_mon = 3; else if
        (*it == "May") tm.tm_mon = 4; else if
        (*it == "Jun" || *it == "June") tm.tm_mon = 5; else if
        (*it == "Jul" || *it == "July") tm.tm_mon = 6; else if
        (*it == "Aug" || *it == "August") tm.tm_mon = 7; else if
        (*it == "Sep" || *it == "September") tm.tm_mon = 8; else if
        (*it == "Oct" || *it == "October") tm.tm_mon = 9; else if
        (*it == "Nov" || *it == "November") tm.tm_mon = 10; else if
        (*it == "Dec" || *it == "December") tm.tm_mon = 11; else {
        DATEDEB((stderr, "Bad rfc822 date format (month): [%s]\n",
                 dt.c_str()));
        return (time_t)-1;
    }
    it++;

    // Year. Struct tm counts from 1900. 2 char years are quite rare
    // but do happen. I've seen 00 happen so count small values from 2000
    tm.tm_year = atoi(it->c_str());
    if (it->length() == 2) {
        if (tm.tm_year < 10)
            tm.tm_year += 2000;
        else
            tm.tm_year += 1900;
    }
    if (tm.tm_year > 1900)
        tm.tm_year -= 1900;
    it++;

    // Hour minute second need no adjustments
    tm.tm_hour = atoi(it->c_str()); it++;
    tm.tm_min  = atoi(it->c_str()); it++;
    tm.tm_sec  = atoi(it->c_str()); it++;


    // Timezone is supposed to be either +-XYZT or a zone name
    int zonesecs = 0;
    if (it->length() < 1) {
        DATEDEB((stderr, "Bad rfc822 date format (zlen): [%s]\n", dt.c_str()));
        return (time_t)-1;
    }
    if (it->at(0) == '-' || it->at(0) == '+') {
        // Note that +xy:zt (instead of +xyzt) sometimes happen, we
        // may want to process it one day
        if (it->length() < 5) {
            DATEDEB((stderr, "Bad rfc822 date format (zlen1): [%s]\n",
                     dt.c_str()));
            goto nozone;
        }
        zonesecs = 3600*((it->at(1)-'0') * 10 + it->at(2)-'0')+
            (it->at(3)-'0')*10 + it->at(4)-'0';
        zonesecs = it->at(0) == '+' ? -1 * zonesecs : zonesecs;
    } else {
        int hours;
        if (*it == "A") hours= 1; else if (*it == "B") hours= 2;
        else if (*it == "C") hours= 3; else if (*it == "D") hours= 4;
        else if (*it == "E") hours= 5; else if (*it == "F") hours= 6;
        else if (*it == "G") hours= 7; else if (*it == "H") hours= 8;
        else if (*it == "I") hours= 9; else if (*it == "K") hours= 10;
        else if (*it == "L") hours= 11; else if (*it == "M") hours= 12;
        else if (*it == "N") hours= -1; else if (*it == "O") hours= -2;
        else if (*it == "P") hours= -3; else if (*it == "Q") hours= -4;
        else if (*it == "R") hours= -5; else if (*it == "S") hours= -6;
        else if (*it == "T") hours= -7; else if (*it == "U") hours= -8;
        else if (*it == "V") hours= -9; else if (*it == "W") hours= -10;
        else if (*it == "X") hours= -11; else if (*it == "Y") hours= -12;
        else if (*it == "Z") hours=  0; else if  (*it == "UT") hours= 0;
        else if (*it == "GMT") hours= 0; else if (*it == "EST") hours= 5;
        else if (*it == "EDT") hours= 4; else if (*it == "CST") hours= 6;
        else if (*it == "CDT") hours= 5; else if (*it == "MST") hours= 7;
        else if (*it == "MDT") hours= 6; else if (*it == "PST") hours= 8;
        else if (*it == "PDT") hours= 7;
        // Non standard names
        // Standard Time (or Irish Summer Time?) is actually +5.5
        else if (*it == "CET") hours= -1; else if (*it == "JST") hours= -9;
        else if (*it == "IST") hours= -5; else if (*it == "WET") hours= 0;
        else if (*it == "MET") hours= -1;
        else {
            DATEDEB((stderr, "Bad rfc822 date format (zname): [%s]\n",
                     dt.c_str()));
            // Forget tz
            goto nozone;
        }
        zonesecs = 3600 * hours;
    }
    DATEDEB((stderr, "Tz: [%s] -> %d\n", it->c_str(), zonesecs));
nozone:

    // Compute the UTC Unix time value
#ifndef sun
    time_t tim = timegm(&tm);
#else
    // No timegm on Sun. Use mktime, then correct for local timezone
    time_t tim = mktime(&tm);
    // altzone and timezone hold the difference in seconds between UTC
    // and local. They are negative for places east of greenwich
    //
    // mktime takes our buffer to be local time, so it adds timezone
    // to the conversion result (if timezone is < 0 it's currently
    // earlier in greenwhich).
    //
    // We have to substract it back (hey! hopefully! maybe we have to
    // add it). Who can really know?
    tim -= timezone;
#endif

    // And add in the correction from the email's Tz
    tim += zonesecs;

    DATEDEB((stderr, "Date: %s  uxtime %ld \n", ctime(&tim), tim));
    return tim;
}

#else

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include <string>
#include "mimeparse.h"
#include "readfile.h"


using namespace std;
extern bool rfc2231_decode(const string& in, string& out, string& charset);
extern time_t rfc2822DateToUxTime(const string& date);
static const char *thisprog;

static char usage [] =
    "-p: header value and parameter test\n"
    "-q: qp decoding\n"
    "-b: base64\n"
    "-7: rfc2047\n"
    "-1: rfc2331\n"
    "-t: date time\n"
    "  \n\n"
    ;
static void
Usage(void)
{
    fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
    exit(1);
}

static int     op_flags;
#define OPT_MOINS 0x1
#define OPT_p     0x2
#define OPT_q     0x4
#define OPT_b     0x8
#define OPT_7     0x10
#define OPT_1     0x20
#define OPT_t     0x40
int
main(int argc, const char **argv)
{
    int count = 10;

    thisprog = argv[0];
    argc--; argv++;

    while (argc > 0 && **argv == '-') {
        (*argv)++;
        if (!(**argv))
            /* Cas du "adb - core" */
            Usage();
        while (**argv)
            switch (*(*argv)++) {
            case 'p':   op_flags |= OPT_p; break;
            case 'q':   op_flags |= OPT_q; break;
            case 'b':   op_flags |= OPT_b; break;
            case '1':   op_flags |= OPT_1; break;
            case '7':   op_flags |= OPT_7; break;
            case 't':   op_flags |= OPT_t; break;
            default: Usage();   break;
            }
    b1: argc--; argv++;
    }

    if (argc != 0)
        Usage();

    if (op_flags & OPT_p) {
        // Mime header value and parameters extraction
        const char *tr[] = {
            "text/html;charset = UTF-8 ; otherparam=garb; \n"
            "QUOTEDPARAM=\"quoted value\"",

            "text/plain; charset=ASCII\r\n name=\"809D3016_5691DPS_5.2.LIC\"",

            "application/x-stuff;"
            "title*0*=us-ascii'en'This%20is%20even%20more%20;"
            "title*1*=%2A%2A%2Afun%2A%2A%2A%20;"
            "title*2=\"isn't it!\"",

            // The following are all invalid, trying to crash the parser...
            "",
            // This does not parse because of whitespace in the value.
            " complete garbage;",
            // This parses, but only the first word gets into the value
            " some value",
            " word ;",  ";",  "=",  "; = ",  "a;=\"toto tutu\"=", ";;;;a=b",
        };

        for (unsigned int i = 0; i < sizeof(tr) / sizeof(char *); i++) {
            MimeHeaderValue parsed;
            if (!parseMimeHeaderValue(tr[i], parsed)) {
                fprintf(stderr, "PARSE ERROR for [%s]\n", tr[i]);
                continue;
            }
            printf("Field value: [%s]\n", parsed.value.c_str());
            map<string, string>::iterator it;
            for (it = parsed.params.begin();it != parsed.params.end();it++) {
                if (it == parsed.params.begin())
                    printf("Parameters:\n");
                printf("  [%s] = [%s]\n", it->first.c_str(), it->second.c_str());
            }
        }

    } else if (op_flags & OPT_q) {
        // Quoted printable stuff
        const char *qp =
            "=41=68 =e0 boire=\r\n continue 1ere\ndeuxieme\n\r3eme "
            "agrave is: '=E0' probable skipped decode error: =\n"
            "Actual decode error =xx this wont show";

        string out;
        if (!qp_decode(string(qp), out)) {
            fprintf(stderr, "qp_decode returned error\n");
        }
        printf("Decoded: '%s'\n", out.c_str());
    } else if (op_flags & OPT_b) {
        // Base64
        //'C'est <20> boire qu'il nous faut <20>viter l'exc<78>s.'
        //'Deuxi<78>me ligne'
        //'Troisi<73>me ligne'
        //'Et la fin (pas de nl). '
        const char *b64 =
            "Qydlc3Qg4CBib2lyZSBxdSdpbCBub3VzIGZhdXQg6XZpdGVyIGwnZXhj6HMuCkRldXhp6G1l\r\n"
            "IGxpZ25lClRyb2lzaehtZSBsaWduZQpFdCBsYSBmaW4gKHBhcyBkZSBubCkuIA==\r\n";

        string out;
        if (!base64_decode(string(b64), out)) {
            fprintf(stderr, "base64_decode returned error\n");
            exit(1);
        }
        printf("Decoded: [%s]\n", out.c_str());
#if 0
        string coded, decoded;
        const char *fname = "/tmp/recoll_decodefail";
        if (!file_to_string(fname, coded)) {
            fprintf(stderr, "Cant read %s\n", fname);
            exit(1);
        }

        if (!base64_decode(coded, decoded)) {
            fprintf(stderr, "base64_decode returned error\n");
            exit(1);
        }
        printf("Decoded: [%s]\n", decoded.c_str());
#endif

    } else if (op_flags & (OPT_7|OPT_1)) {
        // rfc2047
        char line [1024];
        string out;
        bool res;
        while (fgets(line, 1023, stdin)) {
            int l = strlen(line);
            if (l == 0)
                continue;
            line[l-1] = 0;
            fprintf(stderr, "Line: [%s]\n", line);
            string charset;
            if (op_flags & OPT_7) {
                res = rfc2047_decode(line, out);
            } else {
                res = rfc2231_decode(line, out, charset);
            }
            if (res)
                fprintf(stderr, "Out:  [%s] cs %s\n", out.c_str(), charset.c_str());
            else
                fprintf(stderr, "Decoding failed\n");
        }
    } else if (op_flags & OPT_t) {
        time_t t;

        const char *dates[] = {
            " Wed, 13 Sep 2006 11:40:26 -0700 (PDT)",
            " Mon, 3 Jul 2006 09:51:58 +0200",
            " Wed, 13 Sep 2006 08:19:48 GMT-07:00",
            " Wed, 13 Sep 2006 11:40:26 -0700 (PDT)",
            " Sat, 23 Dec 89 19:27:12 EST",
            "   13 Jan 90 08:23:29 GMT"};

        for (unsigned int i = 0; i <sizeof(dates) / sizeof(char *); i++) {
            t = rfc2822DateToUxTime(dates[i]);
            struct tm *tm = localtime(&t);
            char datebuf[100];
            strftime(datebuf, 99, "&nbsp;%Y-%m-%d&nbsp;%H:%M:%S %z", tm);
            printf("[%s] -> [%s]\n", dates[i], datebuf);
        }
        printf("Enter date:\n");
        char line [1024];
        while (fgets(line, 1023, stdin)) {
            int l = strlen(line);
            if (l == 0) continue;
            line[l-1] = 0;
            t = rfc2822DateToUxTime(line);
            struct tm *tm = localtime(&t);
            char datebuf[100];
            strftime(datebuf, 99, "&nbsp;%Y-%m-%d&nbsp;%H:%M:%S %z", tm);
            printf("[%s] -> [%s]\n", line, datebuf);
        }


    }
    exit(0);
}

#endif // TEST_MIMEPARSE