From c1c73573d894d950b775cea8f04d0f3d4cb22cbe Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Wed, 2 Sep 2015 07:34:59 +0200 Subject: [PATCH] more int fixups --HG-- branch : WINDOWSPORT --- src/bincimapmime/convert.h | 1 + src/bincimapmime/mime-parsefull.cc | 4 +-- src/common/textsplit.cpp | 4 +-- src/common/textsplit.h | 2 +- src/query/plaintorich.cpp | 4 +-- src/rcldb/searchdata.h | 2 +- src/unac/unac.c | 8 +++--- src/utils/base64.cpp | 2 +- src/utils/smallut.cpp | 33 ++++++++++++++++++++-- unac/unac.c | 45 +++++++++++++++--------------- 10 files changed, 68 insertions(+), 37 deletions(-) diff --git a/src/bincimapmime/convert.h b/src/bincimapmime/convert.h index a5304f02..2ed9b3a7 100644 --- a/src/bincimapmime/convert.h +++ b/src/bincimapmime/convert.h @@ -25,6 +25,7 @@ */ #ifndef convert_h_included #define convert_h_included +#include #include #include #include diff --git a/src/bincimapmime/mime-parsefull.cc b/src/bincimapmime/mime-parsefull.cc index de7c670a..e4a3e3a0 100644 --- a/src/bincimapmime/mime-parsefull.cc +++ b/src/bincimapmime/mime-parsefull.cc @@ -308,7 +308,7 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter, { string::size_type endpos = delimiter.length(); char *delimiterqueue = 0; - int delimiterpos = 0; + string::size_type delimiterpos = 0; const char *delimiterStr = delimiter.c_str(); if (delimiter != "") { delimiterqueue = new char[endpos]; @@ -540,7 +540,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary, string line; bool toboundaryIsEmpty = (toboundary == ""); char c; - int boundarypos = 0; + string::size_type boundarypos = 0; while (mimeSource->getChar(&c)) { if (c == '\n') { ++*nbodylines; ++*nlines; } diff --git a/src/common/textsplit.cpp b/src/common/textsplit.cpp index 2adc380b..df9b151f 100644 --- a/src/common/textsplit.cpp +++ b/src/common/textsplit.cpp @@ -223,7 +223,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos, { LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos)); - size_t l = w.length(); + int l = int(w.length()); #ifdef TEXTSPLIT_STATS // Update word length statistics. Do this before we filter out @@ -232,7 +232,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos, m_stats.newsamp(m_wordChars); #endif - if (l > 0 && l < (unsigned)m_maxWordLength) { + if (l > 0 && l < m_maxWordLength) { // 1 byte word: we index single ascii letters and digits, but // nothing else. We might want to turn this into a test for a // single utf8 character instead ? diff --git a/src/common/textsplit.h b/src/common/textsplit.h index 66a49675..d408bb2e 100644 --- a/src/common/textsplit.h +++ b/src/common/textsplit.h @@ -184,7 +184,7 @@ private: // Current span. Might be jf.dockes@wanadoo.f std::string m_span; - std::vector > m_words_in_span; + std::vector > m_words_in_span; // Current word: no punctuation at all in there. Byte offset // relative to the current span and byte length diff --git a/src/query/plaintorich.cpp b/src/query/plaintorich.cpp index 50b6d1ba..2ad45fed 100644 --- a/src/query/plaintorich.cpp +++ b/src/query/plaintorich.cpp @@ -15,7 +15,7 @@ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +#include #include #include #include @@ -417,7 +417,7 @@ bool PlainToRich::plaintorich(const string& in, // If we still have terms positions, check (byte) position. If // we are at or after a term match, mark. if (tPosIt != tPosEnd) { - size_t ibyteidx = chariter.getBpos(); + int ibyteidx = int(chariter.getBpos()); if (ibyteidx == tPosIt->offs.first) { if (!intag && ibyteidx >= (int)headend) { *olit += startMatch((unsigned int)(tPosIt->grpidx)); diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index 62d8c64e..a83399ec 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -270,7 +270,7 @@ public: { return m_parentSearch ? m_parentSearch->getMaxExp() : 10000; } - int getMaxCl() + size_t getMaxCl() { return m_parentSearch ? m_parentSearch->getMaxCl() : 100000; } diff --git a/src/unac/unac.c b/src/unac/unac.c index 358c88f3..1c310831 100644 --- a/src/unac/unac.c +++ b/src/unac/unac.c @@ -14171,8 +14171,8 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length, { char* out; size_t out_size; - int out_length; - unsigned int i; + size_t out_length; + size_t i; out_size = in_length > 0 ? in_length : 1024; @@ -14191,7 +14191,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length, unsigned short c; unsigned short* p; size_t l; - int k; + size_t k; c = (in[i] << 8) | (in[i + 1] & 0xff); /* * Lookup the tables for decomposition information @@ -14236,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length, if(l == 0) { DEBUG_APPEND("untouched\n"); } else { - int i; + size_t i; for(i = 0; i < l; i++) DEBUG_APPEND("0x%04x ", p[i]); DEBUG_APPEND("\n"); diff --git a/src/utils/base64.cpp b/src/utils/base64.cpp index fb5e2f09..82651b15 100644 --- a/src/utils/base64.cpp +++ b/src/utils/base64.cpp @@ -244,7 +244,7 @@ void base64_encode(const string &in, string &out) if (0 != srclength) { /* Get what's left. */ input[0] = input[1] = input[2] = '\0'; - for (int i = 0; i < srclength; i++) + for (string::size_type i = 0; i < srclength; i++) input[i] = in[sidx++]; output[0] = input[0] >> 2; diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index cfb8716b..f8ab9431 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1341,7 +1341,7 @@ int main(int argc, char **argv) { thisprog = *argv++;argc--; -#if 1 +#if 0 if (argc <=0 ) { cerr << "Usage: smallut " << endl; exit(1); @@ -1446,8 +1446,37 @@ int main(int argc, char **argv) string out; stringsToCSV(tokens, out); cout << "CSV line: [" << out << "]" << endl; -#endif +#elif 1 + string sshort("ABC"); + string slong("ABCD"); + string sshortsmaller("ABB"); + + vector > cmps; + cmps.push_back(pair(sshort,sshort)); + cmps.push_back(pair(sshort,slong)); + cmps.push_back(pair(slong,sshort)); + cmps.push_back(pair(sshortsmaller,sshort)); + cmps.push_back(pair(sshort, sshortsmaller)); + for (vector >::const_iterator it = cmps.begin(); + it != cmps.end(); it++) { + cout << it->first << " " << it->second << " " << + stringicmp(it->first, it->second) << endl; + } + cout << endl; + for (vector >::const_iterator it = cmps.begin(); + it != cmps.end(); it++) { + cout << it->first << " " << it->second << " " << + stringlowercmp(stringtolower(it->first), it->second) << endl; + } + cout << endl; + for (vector >::const_iterator it = cmps.begin(); + it != cmps.end(); it++) { + cout << it->first << " " << it->second << " " << + stringuppercmp(it->first, it->second) << endl; + } + +#endif } #endif diff --git a/unac/unac.c b/unac/unac.c index 8356e6ae..1c310831 100644 --- a/unac/unac.c +++ b/unac/unac.c @@ -16,22 +16,22 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#ifdef HAVE_CONFIG_H -#ifdef RECOLL_DATADIR +#ifdef BUILDING_RECOLL #include "autoconfig.h" #else #include "config.h" #endif /* RECOLL */ -#endif /* HAVE_CONFIG_H */ -#ifdef RECOLL_DATADIR +#ifdef BUILDING_RECOLL /* Yes, recoll unac is actually c++, lets face modernity, I will not be caught writing another binary search */ #include #include #include #include -#include "unordered_defs.h" +#include +#include UNORDERED_MAP_INCLUDE + using std::string; #include "smallut.h" @@ -52,7 +52,7 @@ static inline bool is_except_char(unsigned short c, string& trans) trans = it->second; return true; } -#endif /* RECOLL_DATADIR */ +#endif /* BUILDING_RECOLL*/ /* * If configure.in has not defined this symbol, assume const. It @@ -14170,9 +14170,9 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp, int what) { char* out; - int out_size; - int out_length; - unsigned int i; + size_t out_size; + size_t out_length; + size_t i; out_size = in_length > 0 ? in_length : 1024; @@ -14190,13 +14190,13 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length, for(i = 0; i < in_length; i += 2) { unsigned short c; unsigned short* p; - int l; - int k; + size_t l; + size_t k; c = (in[i] << 8) | (in[i + 1] & 0xff); /* * Lookup the tables for decomposition information */ -#ifdef RECOLL_DATADIR +#ifdef BUILDING_RECOLL // Exception unac/fold values set by user. There should be 3 arrays for // unac/fold/unac+fold. For now there is only one array, which used to // be set for unac+fold, and is mostly or only used to prevent diacritics @@ -14219,11 +14219,11 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length, l = trans.size() / 2; } } else { -#endif /* RECOLL_DATADIR */ +#endif /* BUILDING_RECOLL */ unac_uf_char_utf16_(c, p, l, what) -#ifdef RECOLL_DATADIR +#ifdef BUILDING_RECOLL } -#endif /* RECOLL_DATADIR */ +#endif /* BUILDING_RECOLL */ /* * Explain what's done in great detail @@ -14236,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length, if(l == 0) { DEBUG_APPEND("untouched\n"); } else { - int i; + size_t i; for(i = 0; i < l; i++) DEBUG_APPEND("0x%04x ", p[i]); DEBUG_APPEND("\n"); @@ -14436,10 +14436,11 @@ static int convert(const char* from, const char* to, const char* tmp = space; size_t tmp_length = 2; if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) { - if(errno == E2BIG) + if(errno == E2BIG) { /* fall thru to the E2BIG case below */; - else - goto out; + } else { + goto out; + } } else { /* The offending character was replaced by a SPACE, skip it. */ in += 2; @@ -14455,7 +14456,7 @@ static int convert(const char* from, const char* to, /* * The output does not fit in the current out buffer, enlarge it. */ - int length = out - out_base; + size_t length = out - out_base; out_size *= 2; { char *saved = out_base; @@ -14561,7 +14562,7 @@ const char* unac_version(void) return UNAC_VERSION; } -#ifdef RECOLL_DATADIR +#ifdef BUILDING_RECOLL void unac_set_except_translations(const char *spectrans) { except_trans.clear(); @@ -14614,4 +14615,4 @@ void unac_set_except_translations(const char *spectrans) free(out); } } -#endif /* RECOLL_DATADIR */ +#endif /* BUILDING_RECOLL */