diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 0ca7ac5d..ff007fad 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -33,6 +33,7 @@ #include "myhtmlparse.h" #include "indextext.h" #include "mh_html.h" +#include "smallut.h" #include using namespace std; @@ -101,7 +102,7 @@ MimeHandlerHtml::mkDoc(RclConfig *conf, const string &, } catch (bool) { pres = p; if (!pres.doccharset.empty() && - pres.doccharset != pres.ocharset) { + !samecharset(pres.doccharset, pres.ocharset)) { LOGDEB(("textHtmlToDoc: charset '%s' doc charset '%s'," "reparse\n", charset.c_str(),pres.doccharset.c_str())); charset = pres.doccharset; diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 83aab07d..bbc40d89 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: smallut.cpp,v 1.6 2005-10-21 08:14:42 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: smallut.cpp,v 1.7 2005-11-23 10:16:28 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #ifndef TEST_SMALLUT #include @@ -156,6 +156,24 @@ int stringuppercmp(const string & s1, const string& s2) } } +// Compare charset names, removing the more common spelling variations +bool samecharset(const string &cs1, const string &cs2) +{ + string mcs1, mcs2; + // Remove all - and _, turn to lowecase + for (int i = 0; i < cs1.length();i++) { + if (cs1[i] != '_' && cs1[i] != '-') { + mcs1 += ::tolower(cs1[i]); + } + } + for (int i = 0; i < cs2.length();i++) { + if (cs2[i] != '_' && cs2[i] != '-') { + mcs2 += ::tolower(cs2[i]); + } + } + return mcs1 == mcs2; +} + #else #include diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 44f0a8aa..e5c0c847 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -1,6 +1,6 @@ #ifndef _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_ -/* @(#$Id: smallut.h,v 1.6 2005-04-06 10:20:11 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: smallut.h,v 1.7 2005-11-23 10:16:28 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include #include @@ -11,6 +11,9 @@ extern int stringicmp(const string& s1, const string& s2); extern int stringlowercmp(const string& alreadylower, const string& s2); extern int stringuppercmp(const string& alreadyupper, const string& s2); +// Compare charset names, removing the more common spelling variations +extern bool samecharset(const string &cs1, const string &cs2); + extern bool maketmpdir(string& tdir); extern string stringlistdisp(const list& strs);