From 0cdcaea437622a3394ce11266ec9be1ddeb18d0c Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 22 Nov 2018 17:41:43 +0100 Subject: [PATCH] mimeparse: use cp1252 instead of iso-8859 on values with residual 8bit chars. Also: comments and missing std:: qualifiers --- src/common/cstr.h | 46 +++++++++++++++++++++++++---------------- src/internfile/Filter.h | 7 +++---- src/utils/mimeparse.cpp | 2 +- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/common/cstr.h b/src/common/cstr.h index 17dead32..0b6660d7 100644 --- a/src/common/cstr.h +++ b/src/common/cstr.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 J.F.Dockes +/* Copyright (C) 2011-2018 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -27,7 +27,6 @@ // when they are used in a fast loop or are shared. #include -using std::string; // The following slightly hacky preprocessing directives and the // companion code in the cpp file looks complicated, but it just @@ -35,9 +34,9 @@ using std::string; // extern declaration and the definition. #ifdef RCLIN_CSTR_CPPFILE #undef DEF_CSTR -#define DEF_CSTR(NM, STR) const string cstr_##NM(STR) +#define DEF_CSTR(NM, STR) const std::string cstr_##NM(STR) #else -#define DEF_CSTR(NM, STR) extern const string cstr_##NM +#define DEF_CSTR(NM, STR) extern const std::string cstr_##NM #endif DEF_CSTR(caption, "caption"); @@ -63,24 +62,35 @@ DEF_CSTR(fldhtm, "\007"); DEF_CSTR(wildSpecStChars, "*?["); DEF_CSTR(regSpecStChars, "(.[{"); -// Values used as keys inside Dijon::Filter::metaData[]. This structure is -// used to store all data generated by format-translating filters. It is -// different from Rcl::Doc for mostly historical reasons. The translation -// from Filter to Doc occurs inside internfile.cpp -DEF_CSTR(dj_keyds, "description"); -DEF_CSTR(dj_keyfn, "filename"); -DEF_CSTR(dj_keymd, "modificationdate"); +// Values used as keys inside Dijon::Filter::metaData[]. + +// The document data. +DEF_CSTR(dj_keycontent, "content"); + +// These fields go from the topmost handler (text/plain) into the +// Rcl::Doc::meta, possibly with some massaging. +DEF_CSTR(dj_keyanc, "rclanc"); DEF_CSTR(dj_keyorigcharset, "origcharset"); +DEF_CSTR(dj_keyds, "description"); +DEF_CSTR(dj_keyabstract, "abstract"); + +// Built or inherited along the handler stack, then copied to doc +DEF_CSTR(dj_keyipath, "ipath"); +DEF_CSTR(dj_keyfn, "filename"); +DEF_CSTR(dj_keyauthor, "author"); +DEF_CSTR(dj_keymd, "modificationdate"); +// charset and mimetype are explicitely blocked from going into the doc meta +DEF_CSTR(dj_keycharset, "charset"); +DEF_CSTR(dj_keymt, "mimetype"); + +// All other meta fields are directly copied from +// Dijon::Filter::metaData to Rcl::Doc::meta. The defininitions which +// follow are just for well-known names, with no particular processing +// in internfile. DEF_CSTR(dj_keytitle, "title"); DEF_CSTR(dj_keyrecipient, "recipient"); DEF_CSTR(dj_keymsgid, "msgid"); -DEF_CSTR(dj_keyabstract, "abstract"); -DEF_CSTR(dj_keyauthor, "author"); -DEF_CSTR(dj_keycharset, "charset"); -DEF_CSTR(dj_keycontent, "content"); -DEF_CSTR(dj_keyipath, "ipath"); DEF_CSTR(dj_keymd5, "md5"); -DEF_CSTR(dj_keymt, "mimetype"); -DEF_CSTR(dj_keyanc, "rclanc"); + #endif /* _CSTR_H_INCLUDED_ */ diff --git a/src/internfile/Filter.h b/src/internfile/Filter.h index 35c5e584..c125efa7 100644 --- a/src/internfile/Filter.h +++ b/src/internfile/Filter.h @@ -174,10 +174,9 @@ protected: /// The MIME type handled by the filter. std::string m_mimeType; - /// Current Metadata dictionary. For multi-document files, - /// this may be rebuilt for each sub-document. See - /// common/cstr.h for the common key definitions. The document - /// text is "content" + /// Current Metadata dictionary. For multi-document files, this + /// may be rebuilt for each sub-document. See common/cstr.h for + /// the common key definitions. std::map m_metaData; }; diff --git a/src/utils/mimeparse.cpp b/src/utils/mimeparse.cpp index 876b29d6..8417d400 100644 --- a/src/utils/mimeparse.cpp +++ b/src/utils/mimeparse.cpp @@ -577,7 +577,7 @@ bool rfc2047_decode(const std::string& in, std::string &out) } if (value.length() > 0) { - transcode(value, utf8, "ISO-8859-1", "UTF-8"); + transcode(value, utf8, "CP1252", "UTF-8"); out += utf8; value.clear(); }