small amd64 fixes: 64 bits size_type, signed chars

2009-12-13 16:13:59 +00:00 · 2009-12-13 16:13:59 +00:00 · bf3ac8e053
commit bf3ac8e053
parent 658a21d5ad
4 changed files with 8 additions and 7 deletions
--- a/src/common/textsplit.cpp
+++ b/src/common/textsplit.cpp
@ -50,9 +50,10 @@ using namespace std;
 // The array is actually a remnant of the original version which did no utf8.
 // Only the lower 127 slots are  now used, but keep it at 256
 // because it makes some tests in the code simpler.
+const unsigned int charclasses_size = 256;
 enum CharClass {LETTER=256, SPACE=257, DIGIT=258, WILD=259, 
                A_ULETTER=260, A_LLETTER=261};
-static int charclasses[256];
+static int charclasses[charclasses_size];

 // Real UTF-8 characters are handled with sets holding all characters
 // with interesting properties. This is far from full-blown management
@ -454,7 +455,7 @@ bool TextSplit::text_to_words(const string &in)
            // This emits "camel" when hitting the 'C' of camelCase
 	case A_ULETTER:
 	    if (m_span.length() && 
-                charclasses[(unsigned int)m_span[m_span.length() - 1]] == 
+                charclasses[(unsigned char)m_span[m_span.length() - 1]] == 
                A_LLETTER) {
                if (m_wordLen) {
                    if (!doemit(false, it.getBpos()))
@ -471,7 +472,7 @@ bool TextSplit::text_to_words(const string &in)
            // Emit the uppercase word before proceeding
        case A_LLETTER:
 	    if (m_span.length() && 
-                charclasses[(unsigned int)m_span[m_span.length() - 1]] == 
+                charclasses[(unsigned char)m_span[m_span.length() - 1]] == 
                A_ULETTER && m_wordLen > 1) {
                // Multiple upper-case letters. Single letter word
                // or acronym which we want to emit now
@ -611,7 +612,7 @@ bool TextSplit::hasVisibleWhite(const string &in)
    setcharclasses();
    Utf8Iter it(in);
    for (; !it.eof(); it++) {
-	unsigned int c = *it;
+	unsigned int c = (unsigned char)*it;
 	LOGDEB3(("TextSplit::hasVisibleWhite: testing 0x%04x\n", c));
 	if (c == (unsigned int)-1) {
 	    LOGERR(("hasVisibleWhite: error while scanning UTF-8 string\n"));
--- a/src/internfile/mh_html.cpp
+++ b/src/internfile/mh_html.cpp
@ -131,7 +131,7 @@ bool MimeHandlerHtml::next_document()
 		    // because the file was transcoded. It seems that just
 		    // inserting one is enough (only the 1st one seems to
 		    // be used by browsers/qtextedit).
-		    unsigned int idx = m_html.find("<head>");
+                    string::size_type idx = m_html.find("<head>");
 		    if (idx == string::npos)
 			idx = m_html.find("<HEAD>");
 		    if (idx != string::npos)
--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@ -378,7 +378,7 @@ bool PlainToRich::plaintorich(const string& in,
    // My tag state
    int inrcltag = 0;

-    unsigned int headend = 0;
+    string::size_type headend = 0;
    if (m_inputhtml) {
 	headend = in.find("</head>");
 	if (headend == string::npos)
--- a/src/utils/base64.cpp
+++ b/src/utils/base64.cpp
@ -125,7 +125,7 @@ bool base64_decode(const string& in, string& out)
 	    for (; ii < in.length(); ch = in[ii++])
 		if (!isspace((unsigned char)ch)) {
 		    DPRINT((stderr, "base64_dec: non-white at eod: 0x%x\n", 
-			    (unsigned int)ch));
+			    (unsigned int)((unsigned char)ch)));
 		    // Well, there are bad encoders out there. Let it pass
 		    //return false;
 		}