mimeparse: use cp1252 instead of iso-8859 on values with residual 8bit chars.

Also: comments and missing std:: qualifiers
This commit is contained in:
Jean-Francois Dockes 2018-11-22 17:41:43 +01:00
parent 01cea06201
commit 0cdcaea437
3 changed files with 32 additions and 23 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 J.F.Dockes /* Copyright (C) 2011-2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -27,7 +27,6 @@
// when they are used in a fast loop or are shared. // when they are used in a fast loop or are shared.
#include <string> #include <string>
using std::string;
// The following slightly hacky preprocessing directives and the // The following slightly hacky preprocessing directives and the
// companion code in the cpp file looks complicated, but it just // companion code in the cpp file looks complicated, but it just
@ -35,9 +34,9 @@ using std::string;
// extern declaration and the definition. // extern declaration and the definition.
#ifdef RCLIN_CSTR_CPPFILE #ifdef RCLIN_CSTR_CPPFILE
#undef DEF_CSTR #undef DEF_CSTR
#define DEF_CSTR(NM, STR) const string cstr_##NM(STR) #define DEF_CSTR(NM, STR) const std::string cstr_##NM(STR)
#else #else
#define DEF_CSTR(NM, STR) extern const string cstr_##NM #define DEF_CSTR(NM, STR) extern const std::string cstr_##NM
#endif #endif
DEF_CSTR(caption, "caption"); DEF_CSTR(caption, "caption");
@ -63,24 +62,35 @@ DEF_CSTR(fldhtm, "\007");
DEF_CSTR(wildSpecStChars, "*?["); DEF_CSTR(wildSpecStChars, "*?[");
DEF_CSTR(regSpecStChars, "(.[{"); DEF_CSTR(regSpecStChars, "(.[{");
// Values used as keys inside Dijon::Filter::metaData[]. This structure is // Values used as keys inside Dijon::Filter::metaData[].
// used to store all data generated by format-translating filters. It is
// different from Rcl::Doc for mostly historical reasons. The translation // The document data.
// from Filter to Doc occurs inside internfile.cpp DEF_CSTR(dj_keycontent, "content");
DEF_CSTR(dj_keyds, "description");
DEF_CSTR(dj_keyfn, "filename"); // These fields go from the topmost handler (text/plain) into the
DEF_CSTR(dj_keymd, "modificationdate"); // Rcl::Doc::meta, possibly with some massaging.
DEF_CSTR(dj_keyanc, "rclanc");
DEF_CSTR(dj_keyorigcharset, "origcharset"); DEF_CSTR(dj_keyorigcharset, "origcharset");
DEF_CSTR(dj_keyds, "description");
DEF_CSTR(dj_keyabstract, "abstract");
// Built or inherited along the handler stack, then copied to doc
DEF_CSTR(dj_keyipath, "ipath");
DEF_CSTR(dj_keyfn, "filename");
DEF_CSTR(dj_keyauthor, "author");
DEF_CSTR(dj_keymd, "modificationdate");
// charset and mimetype are explicitely blocked from going into the doc meta
DEF_CSTR(dj_keycharset, "charset");
DEF_CSTR(dj_keymt, "mimetype");
// All other meta fields are directly copied from
// Dijon::Filter::metaData to Rcl::Doc::meta. The defininitions which
// follow are just for well-known names, with no particular processing
// in internfile.
DEF_CSTR(dj_keytitle, "title"); DEF_CSTR(dj_keytitle, "title");
DEF_CSTR(dj_keyrecipient, "recipient"); DEF_CSTR(dj_keyrecipient, "recipient");
DEF_CSTR(dj_keymsgid, "msgid"); DEF_CSTR(dj_keymsgid, "msgid");
DEF_CSTR(dj_keyabstract, "abstract");
DEF_CSTR(dj_keyauthor, "author");
DEF_CSTR(dj_keycharset, "charset");
DEF_CSTR(dj_keycontent, "content");
DEF_CSTR(dj_keyipath, "ipath");
DEF_CSTR(dj_keymd5, "md5"); DEF_CSTR(dj_keymd5, "md5");
DEF_CSTR(dj_keymt, "mimetype");
DEF_CSTR(dj_keyanc, "rclanc");
#endif /* _CSTR_H_INCLUDED_ */ #endif /* _CSTR_H_INCLUDED_ */

View File

@ -174,10 +174,9 @@ protected:
/// The MIME type handled by the filter. /// The MIME type handled by the filter.
std::string m_mimeType; std::string m_mimeType;
/// Current Metadata dictionary. For multi-document files, /// Current Metadata dictionary. For multi-document files, this
/// this may be rebuilt for each sub-document. See /// may be rebuilt for each sub-document. See common/cstr.h for
/// common/cstr.h for the common key definitions. The document /// the common key definitions.
/// text is "content"
std::map<std::string, std::string> m_metaData; std::map<std::string, std::string> m_metaData;
}; };

View File

@ -577,7 +577,7 @@ bool rfc2047_decode(const std::string& in, std::string &out)
} }
if (value.length() > 0) { if (value.length() > 0) {
transcode(value, utf8, "ISO-8859-1", "UTF-8"); transcode(value, utf8, "CP1252", "UTF-8");
out += utf8; out += utf8;
value.clear(); value.clear();
} }