implement rfc2231 decoding for mime parameter values
This commit is contained in:
parent
804b79ee56
commit
2e079f7ba8
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.11 2006-09-05 08:04:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.12 2006-09-06 09:14:43 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -21,12 +21,17 @@ static char rcsid[] = "@(#$Id: mimeparse.cpp,v 1.11 2006-09-05 08:04:36 dockes E
|
|||||||
#ifndef TEST_MIMEPARSE
|
#ifndef TEST_MIMEPARSE
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
#include "mimeparse.h"
|
#include "mimeparse.h"
|
||||||
#include "base64.h"
|
#include "base64.h"
|
||||||
|
#include "transcode.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -34,12 +39,61 @@ using namespace std;
|
|||||||
|
|
||||||
// Parsing a header value. Only content-type and content-disposition
|
// Parsing a header value. Only content-type and content-disposition
|
||||||
// have parameters, but others are compatible with content-type
|
// have parameters, but others are compatible with content-type
|
||||||
// syntax, only, parameters are not used. So we can parse all like
|
// syntax, only, parameters are not used. So we can parse all like:
|
||||||
// content-type:
|
//
|
||||||
// headertype: value [; paramname=paramvalue] ...
|
// headertype: value [; paramname=paramvalue] ...
|
||||||
|
//
|
||||||
// Value and paramvalues can be quoted strings, and there can be
|
// Value and paramvalues can be quoted strings, and there can be
|
||||||
// comments too
|
// comments too. Note that RFC2047 is explicitely forbidden for
|
||||||
// Ref: RFC2045/6/7 (MIME) RFC1806 (content-disposition)
|
// parameter values (RFC2231 must be used), but I have seen it used
|
||||||
|
// anyway (ie: thunderbird 1.0)
|
||||||
|
//
|
||||||
|
// Ref: RFC2045/6/7 (MIME) RFC2183/2231 (content-disposition and encodings)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** Decode a MIME parameter value encoded according to rfc2231
|
||||||
|
*
|
||||||
|
* Example input withs input charset == "":
|
||||||
|
* [iso-8859-1'french'RE%A0%3A_Smoke_Tests%20bla]
|
||||||
|
* Or (if charset is set) : RE%A0%3A_Smoke_Tests%20bla
|
||||||
|
*
|
||||||
|
* @param in input string, ascii with rfc2231 markup
|
||||||
|
* @param out output string
|
||||||
|
* @param charset if empty: decode string like 'charset'lang'more%20stuff,
|
||||||
|
* else just do the %XX part
|
||||||
|
* @return out output string encoded in utf-8
|
||||||
|
*/
|
||||||
|
bool rfc2231_decode(const string &in, string &out, string &charset)
|
||||||
|
{
|
||||||
|
string::size_type pos1, pos2=0;
|
||||||
|
|
||||||
|
if (charset.empty()) {
|
||||||
|
if ((pos1 = in.find("'")) == string::npos)
|
||||||
|
return false;
|
||||||
|
charset = in.substr(0, pos1);
|
||||||
|
// fprintf(stderr, "Charset: [%s]\n", charset.c_str());
|
||||||
|
pos1++;
|
||||||
|
|
||||||
|
if ((pos2 = in.find("'", pos1)) == string::npos)
|
||||||
|
return false;
|
||||||
|
// We have no use for lang for now
|
||||||
|
// string lang = in.substr(pos1, pos2-pos1);
|
||||||
|
// fprintf(stderr, "Lang: [%s]\n", lang.c_str());
|
||||||
|
pos2++;
|
||||||
|
}
|
||||||
|
|
||||||
|
string raw;
|
||||||
|
qp_decode(in.substr(pos2), raw, '%');
|
||||||
|
// fprintf(stderr, "raw [%s]\n", raw.c_str());
|
||||||
|
if (!transcode(raw, out, charset, "UTF-8"))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////////////////////////
|
||||||
|
/// Decoding of MIME fields values and parameters
|
||||||
|
|
||||||
// The lexical token returned by find_next_token
|
// The lexical token returned by find_next_token
|
||||||
class Lexical {
|
class Lexical {
|
||||||
@ -54,8 +108,8 @@ class Lexical {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Skip mime comment. This must be called with in[start] == '('
|
// Skip mime comment. This must be called with in[start] == '('
|
||||||
string::size_type skip_comment(const string &in, string::size_type start,
|
static string::size_type
|
||||||
Lexical &lex)
|
skip_comment(const string &in, string::size_type start, Lexical &lex)
|
||||||
{
|
{
|
||||||
int commentlevel = 0;
|
int commentlevel = 0;
|
||||||
for (; start < in.size(); start++) {
|
for (; start < in.size(); start++) {
|
||||||
@ -66,7 +120,7 @@ string::size_type skip_comment(const string &in, string::size_type start,
|
|||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
lex.error.append("\\ at end of string ");
|
lex.error.append("\\ at end of string ");
|
||||||
return string::npos;
|
return in.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (in[start] == '(')
|
if (in[start] == '(')
|
||||||
@ -76,16 +130,16 @@ string::size_type skip_comment(const string &in, string::size_type start,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (start == in.size()) {
|
if (start == in.size() && commentlevel != 0) {
|
||||||
lex.error.append("Unclosed comment ");
|
lex.error.append("Unclosed comment ");
|
||||||
return string::npos;
|
return in.size();
|
||||||
}
|
}
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip initial whitespace and (possibly nested) comments.
|
// Skip initial whitespace and (possibly nested) comments.
|
||||||
string::size_type skip_whitespace_and_comment(const string &in,
|
static string::size_type
|
||||||
string::size_type start,
|
skip_whitespace_and_comment(const string &in, string::size_type start,
|
||||||
Lexical &lex)
|
Lexical &lex)
|
||||||
{
|
{
|
||||||
while (1) {
|
while (1) {
|
||||||
@ -103,19 +157,19 @@ string::size_type skip_whitespace_and_comment(const string &in,
|
|||||||
|
|
||||||
/// Find next token in mime header value string.
|
/// Find next token in mime header value string.
|
||||||
/// @return the next starting position in string, string::npos for error
|
/// @return the next starting position in string, string::npos for error
|
||||||
/// (ie unbalanced quoting)
|
|
||||||
/// @param in the input string
|
/// @param in the input string
|
||||||
/// @param start the starting position
|
/// @param start the starting position
|
||||||
/// @param lex the returned token and its description
|
/// @param lex the returned token and its description
|
||||||
/// @param delims separators we should look for
|
/// @param delims separators we should look for
|
||||||
string::size_type find_next_token(const string &in, string::size_type start,
|
static string::size_type
|
||||||
|
find_next_token(const string &in, string::size_type start,
|
||||||
Lexical &lex, string delims = ";=")
|
Lexical &lex, string delims = ";=")
|
||||||
{
|
{
|
||||||
char oquot, cquot;
|
char oquot, cquot;
|
||||||
|
|
||||||
start = skip_whitespace_and_comment(in, start, lex);
|
start = skip_whitespace_and_comment(in, start, lex);
|
||||||
if (start == string::npos || start == in.size())
|
if (start == string::npos || start == in.size())
|
||||||
return start;
|
return in.size();
|
||||||
|
|
||||||
// Begins with separator ? return it.
|
// Begins with separator ? return it.
|
||||||
string::size_type delimi = delims.find_first_of(in[start]);
|
string::size_type delimi = delims.find_first_of(in[start]);
|
||||||
@ -172,12 +226,26 @@ string::size_type find_next_token(const string &in, string::size_type start,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Classes for handling rfc2231 value continuations
|
||||||
|
class Chunk {
|
||||||
|
public:
|
||||||
|
Chunk() : decode(false) {}
|
||||||
|
bool decode;
|
||||||
|
string value;
|
||||||
|
};
|
||||||
|
class Chunks {
|
||||||
|
public:
|
||||||
|
vector<Chunk> chunks;
|
||||||
|
};
|
||||||
|
|
||||||
void stringtolower(string &out, const string& in)
|
void stringtolower(string &out, const string& in)
|
||||||
{
|
{
|
||||||
for (string::size_type i = 0; i < in.size(); i++)
|
for (string::size_type i = 0; i < in.size(); i++)
|
||||||
out.append(1, char(tolower(in[i])));
|
out.append(1, char(tolower(in[i])));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse MIME field value. Should look like:
|
||||||
|
// somevalue ; param1=val1;param2=val2
|
||||||
bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
|
bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
|
||||||
{
|
{
|
||||||
parsed.value.erase();
|
parsed.value.erase();
|
||||||
@ -185,20 +253,25 @@ bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
|
|||||||
|
|
||||||
Lexical lex;
|
Lexical lex;
|
||||||
string::size_type start = 0;
|
string::size_type start = 0;
|
||||||
|
|
||||||
|
// Get the field value
|
||||||
start = find_next_token(value, start, lex);
|
start = find_next_token(value, start, lex);
|
||||||
if (start == string::npos || lex.what != Lexical::token)
|
if (start == string::npos || lex.what != Lexical::token)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
parsed.value = lex.value;
|
parsed.value = lex.value;
|
||||||
|
|
||||||
|
map<string, string> rawparams;
|
||||||
|
// Look for parameters
|
||||||
for (;;) {
|
for (;;) {
|
||||||
string paramname, paramvalue;
|
string paramname, paramvalue;
|
||||||
lex.reset();
|
lex.reset();
|
||||||
start = find_next_token(value, start, lex);
|
start = find_next_token(value, start, lex);
|
||||||
if (start == value.size())
|
if (start == value.size())
|
||||||
return true;
|
break;
|
||||||
if (start == string::npos)
|
if (start == string::npos) {
|
||||||
|
//fprintf(stderr, "Find_next_token error(1)\n");
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
if (lex.what == Lexical::separator && lex.value[0] == ';')
|
if (lex.what == Lexical::separator && lex.value[0] == ';')
|
||||||
continue;
|
continue;
|
||||||
if (lex.what != Lexical::token)
|
if (lex.what != Lexical::token)
|
||||||
@ -207,26 +280,108 @@ bool parseMimeHeaderValue(const string& value, MimeHeaderValue& parsed)
|
|||||||
|
|
||||||
start = find_next_token(value, start, lex);
|
start = find_next_token(value, start, lex);
|
||||||
if (start == string::npos || lex.what != Lexical::separator ||
|
if (start == string::npos || lex.what != Lexical::separator ||
|
||||||
lex.value[0] != '=')
|
lex.value[0] != '=') {
|
||||||
|
//fprintf(stderr, "Find_next_token error (2)\n");
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
start = find_next_token(value, start, lex);
|
start = find_next_token(value, start, lex);
|
||||||
if (start == string::npos || lex.what != Lexical::token)
|
if (start == string::npos || lex.what != Lexical::token) {
|
||||||
|
//fprintf(stderr, "Parameter has no value!");
|
||||||
return false;
|
return false;
|
||||||
paramvalue = lex.value;
|
|
||||||
parsed.params[paramname] = paramvalue;
|
|
||||||
}
|
}
|
||||||
|
paramvalue = lex.value;
|
||||||
|
rawparams[paramname] = paramvalue;
|
||||||
|
//fprintf(stderr, "RAW: name [%s], value [%s]\n", paramname.c_str(),
|
||||||
|
// paramvalue.c_str());
|
||||||
|
}
|
||||||
|
// fprintf(stderr, "Number of raw params %d\n", rawparams.size());
|
||||||
|
|
||||||
|
// RFC2231 handling:
|
||||||
|
// - if a parameter name ends in * it must be decoded
|
||||||
|
// - If a parameter name looks line name*ii[*] it is a
|
||||||
|
// partial value, and must be concatenated with other such.
|
||||||
|
|
||||||
|
map<string, Chunks> chunks;
|
||||||
|
for (map<string, string>::const_iterator it = rawparams.begin();
|
||||||
|
it != rawparams.end(); it++) {
|
||||||
|
string nm = it->first;
|
||||||
|
// fprintf(stderr, "NM: [%s]\n", nm.c_str());
|
||||||
|
if (nm.empty()) // ??
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Chunk chunk;
|
||||||
|
if (nm[nm.length()-1] == '*') {
|
||||||
|
nm.erase(nm.length() - 1);
|
||||||
|
chunk.decode = true;
|
||||||
|
} else
|
||||||
|
chunk.decode = false;
|
||||||
|
// fprintf(stderr, "NM1: [%s]\n", nm.c_str());
|
||||||
|
|
||||||
|
chunk.value = it->second;
|
||||||
|
|
||||||
|
// Look for another asterisk in nm. If none, assign index 0
|
||||||
|
string::size_type aster;
|
||||||
|
int idx = 0;
|
||||||
|
if ((aster = nm.rfind("*")) != string::npos) {
|
||||||
|
string num = nm.substr(aster+1);
|
||||||
|
//fprintf(stderr, "NUM: [%s]\n", num.c_str());
|
||||||
|
nm.erase(aster);
|
||||||
|
idx = atoi(num.c_str());
|
||||||
|
}
|
||||||
|
Chunks empty;
|
||||||
|
if (chunks.find(nm) == chunks.end())
|
||||||
|
chunks[nm] = empty;
|
||||||
|
chunks[nm].chunks.resize(idx+1);
|
||||||
|
chunks[nm].chunks[idx] = chunk;
|
||||||
|
//fprintf(stderr, "CHNKS: nm [%s], idx %d, decode %d, value [%s]\n",
|
||||||
|
// nm.c_str(), idx, int(chunk.decode), chunk.value.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each parameter name, concatenate its chunks and possibly
|
||||||
|
// decode Note that we pass the whole concatenated string to
|
||||||
|
// decoding if the first chunk indicates that decoding is needed,
|
||||||
|
// which is not right because there might be uncoded chunks
|
||||||
|
// according to the rfc.
|
||||||
|
for (map<string, Chunks>::const_iterator it = chunks.begin();
|
||||||
|
it != chunks.end(); it++) {
|
||||||
|
if (it->second.chunks.empty())
|
||||||
|
continue;
|
||||||
|
string nm = it->first;
|
||||||
|
// Create the name entry
|
||||||
|
if (parsed.params.find(nm) == parsed.params.end())
|
||||||
|
parsed.params[nm] = "";
|
||||||
|
// Concatenate all chunks and decode the whole if the first one needs
|
||||||
|
// to. Yes, this is not quite right.
|
||||||
|
string value;
|
||||||
|
for (vector<Chunk>::const_iterator vi = it->second.chunks.begin();
|
||||||
|
vi != it->second.chunks.end(); vi++) {
|
||||||
|
value += vi->value;
|
||||||
|
}
|
||||||
|
if (it->second.chunks[0].decode) {
|
||||||
|
string charset;
|
||||||
|
rfc2231_decode(value, parsed.params[nm], charset);
|
||||||
|
} else {
|
||||||
|
// rfc2047 MUST NOT but IS used by some agents
|
||||||
|
rfc2047_decode(value, parsed.params[nm]);
|
||||||
|
}
|
||||||
|
//fprintf(stderr, "FINAL: nm [%s], value [%s]\n",
|
||||||
|
//nm.c_str(), parsed.params[nm].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode a string encoded with quoted-printable encoding.
|
// Decode a string encoded with quoted-printable encoding.
|
||||||
bool qp_decode(const string& in, string &out)
|
// we reuse the code for rfc2231 % encoding, even if the eol
|
||||||
|
// processing is not useful in this case
|
||||||
|
bool qp_decode(const string& in, string &out, char esc)
|
||||||
{
|
{
|
||||||
out.reserve(in.length());
|
out.reserve(in.length());
|
||||||
string::size_type ii;
|
string::size_type ii;
|
||||||
for (ii = 0; ii < in.length(); ii++) {
|
for (ii = 0; ii < in.length(); ii++) {
|
||||||
if (in[ii] == '=') {
|
if (in[ii] == esc) {
|
||||||
ii++; // Skip '='
|
ii++; // Skip '=' or '%'
|
||||||
if(ii >= in.length() - 1) { // Need at least 2 more chars
|
if(ii >= in.length() - 1) { // Need at least 2 more chars
|
||||||
break;
|
break;
|
||||||
} else if (in[ii] == '\r' && in[ii+1] == '\n') { // Soft nl, skip
|
} else if (in[ii] == '\r' && in[ii+1] == '\n') { // Soft nl, skip
|
||||||
@ -264,11 +419,7 @@ bool qp_decode(const string& in, string &out)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decode an word encoded as quoted printable or base 64
|
||||||
#include "transcode.h"
|
|
||||||
#include "smallut.h"
|
|
||||||
|
|
||||||
// Decode a parsed encoded word
|
|
||||||
static bool rfc2047_decodeParsed(const std::string& charset,
|
static bool rfc2047_decodeParsed(const std::string& charset,
|
||||||
const std::string& encoding,
|
const std::string& encoding,
|
||||||
const std::string& value,
|
const std::string& value,
|
||||||
@ -307,13 +458,19 @@ static bool rfc2047_decodeParsed(const std::string& charset,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse a mail header encoded value
|
// Parse a mail header value encoded according to RFC2047.
|
||||||
typedef enum {rfc2047base, rfc2047open_eq, rfc2047charset, rfc2047encoding,
|
// This is not supposed to be used for MIME parameter values, but it
|
||||||
|
// happens.
|
||||||
|
// Bugs:
|
||||||
|
// - We should turn off decoding while inside quoted strings
|
||||||
|
//
|
||||||
|
typedef enum {rfc2047base, rfc2047ready, rfc2047open_eq,
|
||||||
|
rfc2047charset, rfc2047encoding,
|
||||||
rfc2047value, rfc2047close_q} Rfc2047States;
|
rfc2047value, rfc2047close_q} Rfc2047States;
|
||||||
|
|
||||||
bool rfc2047_decode(const std::string& in, std::string &out)
|
bool rfc2047_decode(const std::string& in, std::string &out)
|
||||||
{
|
{
|
||||||
Rfc2047States state = rfc2047base;
|
Rfc2047States state = rfc2047ready;
|
||||||
string encoding, charset, value, utf8;
|
string encoding, charset, value, utf8;
|
||||||
|
|
||||||
out = "";
|
out = "";
|
||||||
@ -323,9 +480,23 @@ bool rfc2047_decode(const std::string& in, std::string &out)
|
|||||||
switch (state) {
|
switch (state) {
|
||||||
case rfc2047base:
|
case rfc2047base:
|
||||||
{
|
{
|
||||||
|
value += ch;
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
|
// Linear whitespace
|
||||||
|
case ' ': case ' ': state = rfc2047ready; break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case rfc2047ready:
|
||||||
|
{
|
||||||
|
switch (ch) {
|
||||||
|
// Whitespace: stay ready
|
||||||
|
case ' ': case ' ': value += ch;break;
|
||||||
|
// '=' -> forward to next state
|
||||||
case '=': state = rfc2047open_eq; break;
|
case '=': state = rfc2047open_eq; break;
|
||||||
default: value += ch;
|
// Other: go back to sleep
|
||||||
|
default: value += ch; state = rfc2047base;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -409,30 +580,41 @@ bool rfc2047_decode(const std::string& in, std::string &out)
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "mimeparse.h"
|
#include "mimeparse.h"
|
||||||
#include "readfile.h"
|
#include "readfile.h"
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
extern bool rfc2231_decode(const string& in, string& out, string& charset);
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, const char **argv)
|
main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
#if 0
|
#if 0
|
||||||
//const char *tr = "text/html; charset=utf-8; otherparam=garb";
|
//const char *tr = "text/html; charset=utf-8; otherparam=garb";
|
||||||
|
|
||||||
const char *tr = "text/html;charset = UTF-8 ; otherparam=garb; \n"
|
const char *tr = "text/html;charset = UTF-8 ; otherparam=garb; \n"
|
||||||
"QUOTEDPARAM=\"quoted value\"";
|
"QUOTEDPARAM=\"quoted value\"";
|
||||||
|
|
||||||
|
// const char *tr = "application/x-stuff;"
|
||||||
|
// "title*0*=us-ascii'en'This%20is%20even%20more%20;"
|
||||||
|
//"title*1*=%2A%2A%2Afun%2A%2A%2A%20;"
|
||||||
|
//"title*2=\"isn't it!\"";
|
||||||
|
|
||||||
MimeHeaderValue parsed;
|
MimeHeaderValue parsed;
|
||||||
|
|
||||||
if (!parseMimeHeaderValue(tr, parsed)) {
|
if (!parseMimeHeaderValue(tr, parsed)) {
|
||||||
fprintf(stderr, "PARSE ERROR\n");
|
fprintf(stderr, "PARSE ERROR\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("'%s' \n", parsed.value.c_str());
|
printf("Field value: [%s]\n", parsed.value.c_str());
|
||||||
map<string, string>::iterator it;
|
map<string, string>::iterator it;
|
||||||
for (it = parsed.params.begin();it != parsed.params.end();it++) {
|
for (it = parsed.params.begin();it != parsed.params.end();it++) {
|
||||||
printf(" '%s' = '%s'\n", it->first.c_str(), it->second.c_str());
|
if (it == parsed.params.begin())
|
||||||
|
printf("Parameters:\n");
|
||||||
|
printf(" [%s] = [%s]\n", it->first.c_str(), it->second.c_str());
|
||||||
}
|
}
|
||||||
#elif 0
|
#elif 0
|
||||||
const char *qp = "=41=68 =e0 boire=\r\n continue 1ere\ndeuxieme\n\r3eme "
|
const char *qp = "=41=68 =e0 boire=\r\n continue 1ere\ndeuxieme\n\r3eme "
|
||||||
@ -459,19 +641,28 @@ main(int argc, const char **argv)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
printf("Decoded: '%s'\n", out.c_str());
|
printf("Decoded: '%s'\n", out.c_str());
|
||||||
#elif 0
|
#elif 1
|
||||||
char line [1024];
|
char line [1024];
|
||||||
string out;
|
string out;
|
||||||
|
bool res;
|
||||||
while (fgets(line, 1023, stdin)) {
|
while (fgets(line, 1023, stdin)) {
|
||||||
int l = strlen(line);
|
int l = strlen(line);
|
||||||
if (l == 0)
|
if (l == 0)
|
||||||
continue;
|
continue;
|
||||||
line[l-1] = 0;
|
line[l-1] = 0;
|
||||||
fprintf(stderr, "Line: [%s]\n", line);
|
fprintf(stderr, "Line: [%s]\n", line);
|
||||||
rfc2047_decode(line, out);
|
#if 0
|
||||||
|
res = rfc2047_decode(line, out);
|
||||||
|
#else
|
||||||
|
string charset;
|
||||||
|
res = rfc2231_decode(line, out, charset);
|
||||||
|
#endif
|
||||||
|
if (res)
|
||||||
fprintf(stderr, "Out: [%s]\n", out.c_str());
|
fprintf(stderr, "Out: [%s]\n", out.c_str());
|
||||||
|
else
|
||||||
|
fprintf(stderr, "Decoding failed\n");
|
||||||
}
|
}
|
||||||
#elif 1
|
#elif 0
|
||||||
string coded, decoded;
|
string coded, decoded;
|
||||||
const char *fname = "/tmp/recoll_decodefail";
|
const char *fname = "/tmp/recoll_decodefail";
|
||||||
if (!file_to_string(fname, coded)) {
|
if (!file_to_string(fname, coded)) {
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _MIME_H_INCLUDED_
|
#ifndef _MIME_H_INCLUDED_
|
||||||
#define _MIME_H_INCLUDED_
|
#define _MIME_H_INCLUDED_
|
||||||
/* @(#$Id: mimeparse.h,v 1.6 2006-09-05 08:04:36 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mimeparse.h,v 1.7 2006-09-06 09:14:43 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -38,14 +38,18 @@ class MimeHeaderValue {
|
|||||||
*/
|
*/
|
||||||
extern bool parseMimeHeaderValue(const std::string& in, MimeHeaderValue& psd);
|
extern bool parseMimeHeaderValue(const std::string& in, MimeHeaderValue& psd);
|
||||||
|
|
||||||
/** Quoted printable decoding */
|
/** Quoted printable decoding. Doubles up as rfc2231 decoder, hence the esc */
|
||||||
extern bool qp_decode(const std::string& in, std::string &out);
|
extern bool qp_decode(const std::string& in, std::string &out,
|
||||||
|
char esc = '=');
|
||||||
|
|
||||||
/** Decode an Internet mail header value encoded according to rfc2047
|
/** Decode an Internet mail field value encoded according to rfc2047
|
||||||
|
*
|
||||||
|
* Example input: Some words =?iso-8859-1?Q?RE=A0=3A_Smoke_Tests?= more input
|
||||||
|
*
|
||||||
|
* Note that MIME parameter values are explicitely NOT to be encoded with
|
||||||
|
* this encoding which is only for headers like Subject:, To:. But it
|
||||||
|
* is sometimes used anyway...
|
||||||
*
|
*
|
||||||
* Example input: =?iso-8859-1?Q?RE=A0=3A_Smoke_Tests?=
|
|
||||||
* The input normally comes from parseMimeHeaderValue() output
|
|
||||||
* and no comments or quoting are expected.
|
|
||||||
* @param in input string, ascii with rfc2047 markup
|
* @param in input string, ascii with rfc2047 markup
|
||||||
* @return out output string encoded in utf-8
|
* @return out output string encoded in utf-8
|
||||||
*/
|
*/
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user