diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index fa14b49b..e3d6a69a 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -50,9 +50,10 @@ using namespace std; // The array is actually a remnant of the original version which did no utf8. // Only the lower 127 slots are now used, but keep it at 256 // because it makes some tests in the code simpler. +const unsigned int charclasses_size = 256; enum CharClass {LETTER=256, SPACE=257, DIGIT=258, WILD=259, A_ULETTER=260, A_LLETTER=261}; -static int charclasses[256]; +static int charclasses[charclasses_size]; // Real UTF-8 characters are handled with sets holding all characters // with interesting properties. This is far from full-blown management @@ -454,7 +455,7 @@ bool TextSplit::text_to_words(const string &in) // This emits "camel" when hitting the 'C' of camelCase case A_ULETTER: if (m_span.length() && - charclasses[(unsigned int)m_span[m_span.length() - 1]] == + charclasses[(unsigned char)m_span[m_span.length() - 1]] == A_LLETTER) { if (m_wordLen) { if (!doemit(false, it.getBpos())) @@ -471,7 +472,7 @@ bool TextSplit::text_to_words(const string &in) // Emit the uppercase word before proceeding case A_LLETTER: if (m_span.length() && - charclasses[(unsigned int)m_span[m_span.length() - 1]] == + charclasses[(unsigned char)m_span[m_span.length() - 1]] == A_ULETTER && m_wordLen > 1) { // Multiple upper-case letters. Single letter word // or acronym which we want to emit now @@ -611,7 +612,7 @@ bool TextSplit::hasVisibleWhite(const string &in) setcharclasses(); Utf8Iter it(in); for (; !it.eof(); it++) { - unsigned int c = *it; + unsigned int c = (unsigned char)*it; LOGDEB3(("TextSplit::hasVisibleWhite: testing 0x%04x\n", c)); if (c == (unsigned int)-1) { LOGERR(("hasVisibleWhite: error while scanning UTF-8 string\n")); diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index 6d9bbe96..9799a18e 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -131,7 +131,7 @@ bool MimeHandlerHtml::next_document() // because the file was transcoded. It seems that just // inserting one is enough (only the 1st one seems to // be used by browsers/qtextedit). - unsigned int idx = m_html.find(""); + string::size_type idx = m_html.find(""); if (idx == string::npos) idx = m_html.find(""); if (idx != string::npos) diff --git a/src/query/plaintorich.cpp b/src/query/plaintorich.cpp index d56351f9..7c718cff 100644 --- a/src/query/plaintorich.cpp +++ b/src/query/plaintorich.cpp @@ -378,7 +378,7 @@ bool PlainToRich::plaintorich(const string& in, // My tag state int inrcltag = 0; - unsigned int headend = 0; + string::size_type headend = 0; if (m_inputhtml) { headend = in.find(""); if (headend == string::npos) diff --git a/src/utils/base64.cpp b/src/utils/base64.cpp index 73e0abb4..ef2dec58 100644 --- a/src/utils/base64.cpp +++ b/src/utils/base64.cpp @@ -125,7 +125,7 @@ bool base64_decode(const string& in, string& out) for (; ii < in.length(); ch = in[ii++]) if (!isspace((unsigned char)ch)) { DPRINT((stderr, "base64_dec: non-white at eod: 0x%x\n", - (unsigned int)ch)); + (unsigned int)((unsigned char)ch))); // Well, there are bad encoders out there. Let it pass //return false; }