mimeparse: use cp1252 instead of iso-8859 on values with residual 8bit chars.

Also: comments and missing std:: qualifiers
This commit is contained in:
Jean-Francois Dockes 2018-11-22 17:41:43 +01:00
parent 01cea06201
commit 0cdcaea437
3 changed files with 32 additions and 23 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 J.F.Dockes
/* Copyright (C) 2011-2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@ -27,7 +27,6 @@
// when they are used in a fast loop or are shared.
#include <string>
using std::string;
// The following slightly hacky preprocessing directives and the
// companion code in the cpp file looks complicated, but it just
@ -35,9 +34,9 @@ using std::string;
// extern declaration and the definition.
#ifdef RCLIN_CSTR_CPPFILE
#undef DEF_CSTR
#define DEF_CSTR(NM, STR) const string cstr_##NM(STR)
#define DEF_CSTR(NM, STR) const std::string cstr_##NM(STR)
#else
#define DEF_CSTR(NM, STR) extern const string cstr_##NM
#define DEF_CSTR(NM, STR) extern const std::string cstr_##NM
#endif
DEF_CSTR(caption, "caption");
@ -63,24 +62,35 @@ DEF_CSTR(fldhtm, "\007");
DEF_CSTR(wildSpecStChars, "*?[");
DEF_CSTR(regSpecStChars, "(.[{");
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
// used to store all data generated by format-translating filters. It is
// different from Rcl::Doc for mostly historical reasons. The translation
// from Filter to Doc occurs inside internfile.cpp
DEF_CSTR(dj_keyds, "description");
DEF_CSTR(dj_keyfn, "filename");
DEF_CSTR(dj_keymd, "modificationdate");
// Values used as keys inside Dijon::Filter::metaData[].
// The document data.
DEF_CSTR(dj_keycontent, "content");
// These fields go from the topmost handler (text/plain) into the
// Rcl::Doc::meta, possibly with some massaging.
DEF_CSTR(dj_keyanc, "rclanc");
DEF_CSTR(dj_keyorigcharset, "origcharset");
DEF_CSTR(dj_keyds, "description");
DEF_CSTR(dj_keyabstract, "abstract");
// Built or inherited along the handler stack, then copied to doc
DEF_CSTR(dj_keyipath, "ipath");
DEF_CSTR(dj_keyfn, "filename");
DEF_CSTR(dj_keyauthor, "author");
DEF_CSTR(dj_keymd, "modificationdate");
// charset and mimetype are explicitely blocked from going into the doc meta
DEF_CSTR(dj_keycharset, "charset");
DEF_CSTR(dj_keymt, "mimetype");
// All other meta fields are directly copied from
// Dijon::Filter::metaData to Rcl::Doc::meta. The defininitions which
// follow are just for well-known names, with no particular processing
// in internfile.
DEF_CSTR(dj_keytitle, "title");
DEF_CSTR(dj_keyrecipient, "recipient");
DEF_CSTR(dj_keymsgid, "msgid");
DEF_CSTR(dj_keyabstract, "abstract");
DEF_CSTR(dj_keyauthor, "author");
DEF_CSTR(dj_keycharset, "charset");
DEF_CSTR(dj_keycontent, "content");
DEF_CSTR(dj_keyipath, "ipath");
DEF_CSTR(dj_keymd5, "md5");
DEF_CSTR(dj_keymt, "mimetype");
DEF_CSTR(dj_keyanc, "rclanc");
#endif /* _CSTR_H_INCLUDED_ */

View File

@ -174,10 +174,9 @@ protected:
/// The MIME type handled by the filter.
std::string m_mimeType;
/// Current Metadata dictionary. For multi-document files,
/// this may be rebuilt for each sub-document. See
/// common/cstr.h for the common key definitions. The document
/// text is "content"
/// Current Metadata dictionary. For multi-document files, this
/// may be rebuilt for each sub-document. See common/cstr.h for
/// the common key definitions.
std::map<std::string, std::string> m_metaData;
};

View File

@ -577,7 +577,7 @@ bool rfc2047_decode(const std::string& in, std::string &out)
}
if (value.length() > 0) {
transcode(value, utf8, "ISO-8859-1", "UTF-8");
transcode(value, utf8, "CP1252", "UTF-8");
out += utf8;
value.clear();
}