more int fixups
--HG-- branch : WINDOWSPORT
This commit is contained in:
parent
1cbf02f713
commit
c1c73573d8
@ -25,6 +25,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef convert_h_included
|
#ifndef convert_h_included
|
||||||
#define convert_h_included
|
#define convert_h_included
|
||||||
|
#include <stddef.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
|||||||
@ -308,7 +308,7 @@ bool Binc::MimePart::skipUntilBoundary(const string &delimiter,
|
|||||||
{
|
{
|
||||||
string::size_type endpos = delimiter.length();
|
string::size_type endpos = delimiter.length();
|
||||||
char *delimiterqueue = 0;
|
char *delimiterqueue = 0;
|
||||||
int delimiterpos = 0;
|
string::size_type delimiterpos = 0;
|
||||||
const char *delimiterStr = delimiter.c_str();
|
const char *delimiterStr = delimiter.c_str();
|
||||||
if (delimiter != "") {
|
if (delimiter != "") {
|
||||||
delimiterqueue = new char[endpos];
|
delimiterqueue = new char[endpos];
|
||||||
@ -540,7 +540,7 @@ void Binc::MimePart::parseSinglePart(const string &toboundary,
|
|||||||
string line;
|
string line;
|
||||||
bool toboundaryIsEmpty = (toboundary == "");
|
bool toboundaryIsEmpty = (toboundary == "");
|
||||||
char c;
|
char c;
|
||||||
int boundarypos = 0;
|
string::size_type boundarypos = 0;
|
||||||
while (mimeSource->getChar(&c)) {
|
while (mimeSource->getChar(&c)) {
|
||||||
if (c == '\n') { ++*nbodylines; ++*nlines; }
|
if (c == '\n') { ++*nbodylines; ++*nlines; }
|
||||||
|
|
||||||
|
|||||||
@ -223,7 +223,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
|||||||
{
|
{
|
||||||
LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));
|
LOGDEB2(("TextSplit::emitterm: [%s] pos %d\n", w.c_str(), pos));
|
||||||
|
|
||||||
size_t l = w.length();
|
int l = int(w.length());
|
||||||
|
|
||||||
#ifdef TEXTSPLIT_STATS
|
#ifdef TEXTSPLIT_STATS
|
||||||
// Update word length statistics. Do this before we filter out
|
// Update word length statistics. Do this before we filter out
|
||||||
@ -232,7 +232,7 @@ inline bool TextSplit::emitterm(bool isspan, string &w, int pos,
|
|||||||
m_stats.newsamp(m_wordChars);
|
m_stats.newsamp(m_wordChars);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (l > 0 && l < (unsigned)m_maxWordLength) {
|
if (l > 0 && l < m_maxWordLength) {
|
||||||
// 1 byte word: we index single ascii letters and digits, but
|
// 1 byte word: we index single ascii letters and digits, but
|
||||||
// nothing else. We might want to turn this into a test for a
|
// nothing else. We might want to turn this into a test for a
|
||||||
// single utf8 character instead ?
|
// single utf8 character instead ?
|
||||||
|
|||||||
@ -184,7 +184,7 @@ private:
|
|||||||
// Current span. Might be jf.dockes@wanadoo.f
|
// Current span. Might be jf.dockes@wanadoo.f
|
||||||
std::string m_span;
|
std::string m_span;
|
||||||
|
|
||||||
std::vector <std::pair<unsigned int, unsigned int> > m_words_in_span;
|
std::vector <std::pair<int, int> > m_words_in_span;
|
||||||
|
|
||||||
// Current word: no punctuation at all in there. Byte offset
|
// Current word: no punctuation at all in there. Byte offset
|
||||||
// relative to the current span and byte length
|
// relative to the current span and byte length
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -417,7 +417,7 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
// If we still have terms positions, check (byte) position. If
|
// If we still have terms positions, check (byte) position. If
|
||||||
// we are at or after a term match, mark.
|
// we are at or after a term match, mark.
|
||||||
if (tPosIt != tPosEnd) {
|
if (tPosIt != tPosEnd) {
|
||||||
size_t ibyteidx = chariter.getBpos();
|
int ibyteidx = int(chariter.getBpos());
|
||||||
if (ibyteidx == tPosIt->offs.first) {
|
if (ibyteidx == tPosIt->offs.first) {
|
||||||
if (!intag && ibyteidx >= (int)headend) {
|
if (!intag && ibyteidx >= (int)headend) {
|
||||||
*olit += startMatch((unsigned int)(tPosIt->grpidx));
|
*olit += startMatch((unsigned int)(tPosIt->grpidx));
|
||||||
|
|||||||
@ -270,7 +270,7 @@ public:
|
|||||||
{
|
{
|
||||||
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
|
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
|
||||||
}
|
}
|
||||||
int getMaxCl()
|
size_t getMaxCl()
|
||||||
{
|
{
|
||||||
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
|
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14171,8 +14171,8 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
{
|
{
|
||||||
char* out;
|
char* out;
|
||||||
size_t out_size;
|
size_t out_size;
|
||||||
int out_length;
|
size_t out_length;
|
||||||
unsigned int i;
|
size_t i;
|
||||||
|
|
||||||
out_size = in_length > 0 ? in_length : 1024;
|
out_size = in_length > 0 ? in_length : 1024;
|
||||||
|
|
||||||
@ -14191,7 +14191,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
unsigned short c;
|
unsigned short c;
|
||||||
unsigned short* p;
|
unsigned short* p;
|
||||||
size_t l;
|
size_t l;
|
||||||
int k;
|
size_t k;
|
||||||
c = (in[i] << 8) | (in[i + 1] & 0xff);
|
c = (in[i] << 8) | (in[i + 1] & 0xff);
|
||||||
/*
|
/*
|
||||||
* Lookup the tables for decomposition information
|
* Lookup the tables for decomposition information
|
||||||
@ -14236,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
if(l == 0) {
|
if(l == 0) {
|
||||||
DEBUG_APPEND("untouched\n");
|
DEBUG_APPEND("untouched\n");
|
||||||
} else {
|
} else {
|
||||||
int i;
|
size_t i;
|
||||||
for(i = 0; i < l; i++)
|
for(i = 0; i < l; i++)
|
||||||
DEBUG_APPEND("0x%04x ", p[i]);
|
DEBUG_APPEND("0x%04x ", p[i]);
|
||||||
DEBUG_APPEND("\n");
|
DEBUG_APPEND("\n");
|
||||||
|
|||||||
@ -244,7 +244,7 @@ void base64_encode(const string &in, string &out)
|
|||||||
if (0 != srclength) {
|
if (0 != srclength) {
|
||||||
/* Get what's left. */
|
/* Get what's left. */
|
||||||
input[0] = input[1] = input[2] = '\0';
|
input[0] = input[1] = input[2] = '\0';
|
||||||
for (int i = 0; i < srclength; i++)
|
for (string::size_type i = 0; i < srclength; i++)
|
||||||
input[i] = in[sidx++];
|
input[i] = in[sidx++];
|
||||||
|
|
||||||
output[0] = input[0] >> 2;
|
output[0] = input[0] >> 2;
|
||||||
|
|||||||
@ -1341,7 +1341,7 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
thisprog = *argv++;argc--;
|
thisprog = *argv++;argc--;
|
||||||
|
|
||||||
#if 1
|
#if 0
|
||||||
if (argc <=0 ) {
|
if (argc <=0 ) {
|
||||||
cerr << "Usage: smallut <stringtosplit>" << endl;
|
cerr << "Usage: smallut <stringtosplit>" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
@ -1446,8 +1446,37 @@ int main(int argc, char **argv)
|
|||||||
string out;
|
string out;
|
||||||
stringsToCSV(tokens, out);
|
stringsToCSV(tokens, out);
|
||||||
cout << "CSV line: [" << out << "]" << endl;
|
cout << "CSV line: [" << out << "]" << endl;
|
||||||
#endif
|
#elif 1
|
||||||
|
string sshort("ABC");
|
||||||
|
string slong("ABCD");
|
||||||
|
string sshortsmaller("ABB");
|
||||||
|
|
||||||
|
vector<pair<string,string> > cmps;
|
||||||
|
cmps.push_back(pair<string,string>(sshort,sshort));
|
||||||
|
cmps.push_back(pair<string,string>(sshort,slong));
|
||||||
|
cmps.push_back(pair<string,string>(slong,sshort));
|
||||||
|
cmps.push_back(pair<string,string>(sshortsmaller,sshort));
|
||||||
|
cmps.push_back(pair<string,string>(sshort, sshortsmaller));
|
||||||
|
|
||||||
|
for (vector<pair<string,string> >::const_iterator it = cmps.begin();
|
||||||
|
it != cmps.end(); it++) {
|
||||||
|
cout << it->first << " " << it->second << " " <<
|
||||||
|
stringicmp(it->first, it->second) << endl;
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
for (vector<pair<string,string> >::const_iterator it = cmps.begin();
|
||||||
|
it != cmps.end(); it++) {
|
||||||
|
cout << it->first << " " << it->second << " " <<
|
||||||
|
stringlowercmp(stringtolower(it->first), it->second) << endl;
|
||||||
|
}
|
||||||
|
cout << endl;
|
||||||
|
for (vector<pair<string,string> >::const_iterator it = cmps.begin();
|
||||||
|
it != cmps.end(); it++) {
|
||||||
|
cout << it->first << " " << it->second << " " <<
|
||||||
|
stringuppercmp(it->first, it->second) << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
45
unac/unac.c
45
unac/unac.c
@ -16,22 +16,22 @@
|
|||||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef BUILDING_RECOLL
|
||||||
#ifdef RECOLL_DATADIR
|
|
||||||
#include "autoconfig.h"
|
#include "autoconfig.h"
|
||||||
#else
|
#else
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif /* RECOLL */
|
#endif /* RECOLL */
|
||||||
#endif /* HAVE_CONFIG_H */
|
|
||||||
|
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
|
/* Yes, recoll unac is actually c++, lets face modernity, I will not be
|
||||||
caught writing another binary search */
|
caught writing another binary search */
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "unordered_defs.h"
|
#include <iostream>
|
||||||
|
#include UNORDERED_MAP_INCLUDE
|
||||||
|
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
@ -52,7 +52,7 @@ static inline bool is_except_char(unsigned short c, string& trans)
|
|||||||
trans = it->second;
|
trans = it->second;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If configure.in has not defined this symbol, assume const. It
|
* If configure.in has not defined this symbol, assume const. It
|
||||||
@ -14170,9 +14170,9 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
char** outp, size_t* out_lengthp, int what)
|
char** outp, size_t* out_lengthp, int what)
|
||||||
{
|
{
|
||||||
char* out;
|
char* out;
|
||||||
int out_size;
|
size_t out_size;
|
||||||
int out_length;
|
size_t out_length;
|
||||||
unsigned int i;
|
size_t i;
|
||||||
|
|
||||||
out_size = in_length > 0 ? in_length : 1024;
|
out_size = in_length > 0 ? in_length : 1024;
|
||||||
|
|
||||||
@ -14190,13 +14190,13 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
for(i = 0; i < in_length; i += 2) {
|
for(i = 0; i < in_length; i += 2) {
|
||||||
unsigned short c;
|
unsigned short c;
|
||||||
unsigned short* p;
|
unsigned short* p;
|
||||||
int l;
|
size_t l;
|
||||||
int k;
|
size_t k;
|
||||||
c = (in[i] << 8) | (in[i + 1] & 0xff);
|
c = (in[i] << 8) | (in[i + 1] & 0xff);
|
||||||
/*
|
/*
|
||||||
* Lookup the tables for decomposition information
|
* Lookup the tables for decomposition information
|
||||||
*/
|
*/
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
// Exception unac/fold values set by user. There should be 3 arrays for
|
// Exception unac/fold values set by user. There should be 3 arrays for
|
||||||
// unac/fold/unac+fold. For now there is only one array, which used to
|
// unac/fold/unac+fold. For now there is only one array, which used to
|
||||||
// be set for unac+fold, and is mostly or only used to prevent diacritics
|
// be set for unac+fold, and is mostly or only used to prevent diacritics
|
||||||
@ -14219,11 +14219,11 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
l = trans.size() / 2;
|
l = trans.size() / 2;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL */
|
||||||
unac_uf_char_utf16_(c, p, l, what)
|
unac_uf_char_utf16_(c, p, l, what)
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Explain what's done in great detail
|
* Explain what's done in great detail
|
||||||
@ -14236,7 +14236,7 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
if(l == 0) {
|
if(l == 0) {
|
||||||
DEBUG_APPEND("untouched\n");
|
DEBUG_APPEND("untouched\n");
|
||||||
} else {
|
} else {
|
||||||
int i;
|
size_t i;
|
||||||
for(i = 0; i < l; i++)
|
for(i = 0; i < l; i++)
|
||||||
DEBUG_APPEND("0x%04x ", p[i]);
|
DEBUG_APPEND("0x%04x ", p[i]);
|
||||||
DEBUG_APPEND("\n");
|
DEBUG_APPEND("\n");
|
||||||
@ -14436,10 +14436,11 @@ static int convert(const char* from, const char* to,
|
|||||||
const char* tmp = space;
|
const char* tmp = space;
|
||||||
size_t tmp_length = 2;
|
size_t tmp_length = 2;
|
||||||
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
||||||
if(errno == E2BIG)
|
if(errno == E2BIG) {
|
||||||
/* fall thru to the E2BIG case below */;
|
/* fall thru to the E2BIG case below */;
|
||||||
else
|
} else {
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/* The offending character was replaced by a SPACE, skip it. */
|
/* The offending character was replaced by a SPACE, skip it. */
|
||||||
in += 2;
|
in += 2;
|
||||||
@ -14455,7 +14456,7 @@ static int convert(const char* from, const char* to,
|
|||||||
/*
|
/*
|
||||||
* The output does not fit in the current out buffer, enlarge it.
|
* The output does not fit in the current out buffer, enlarge it.
|
||||||
*/
|
*/
|
||||||
int length = out - out_base;
|
size_t length = out - out_base;
|
||||||
out_size *= 2;
|
out_size *= 2;
|
||||||
{
|
{
|
||||||
char *saved = out_base;
|
char *saved = out_base;
|
||||||
@ -14561,7 +14562,7 @@ const char* unac_version(void)
|
|||||||
return UNAC_VERSION;
|
return UNAC_VERSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RECOLL_DATADIR
|
#ifdef BUILDING_RECOLL
|
||||||
void unac_set_except_translations(const char *spectrans)
|
void unac_set_except_translations(const char *spectrans)
|
||||||
{
|
{
|
||||||
except_trans.clear();
|
except_trans.clear();
|
||||||
@ -14614,4 +14615,4 @@ void unac_set_except_translations(const char *spectrans)
|
|||||||
free(out);
|
free(out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* RECOLL_DATADIR */
|
#endif /* BUILDING_RECOLL */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user