utf8iter: store pointer to the source string instead of ref, makes Utf8Iter copyable
This commit is contained in:
parent
8cf5f8963a
commit
71b4be883c
@ -32,12 +32,12 @@
|
|||||||
class Utf8Iter {
|
class Utf8Iter {
|
||||||
public:
|
public:
|
||||||
Utf8Iter(const std::string &in)
|
Utf8Iter(const std::string &in)
|
||||||
: m_s(in), m_cl(0), m_pos(0), m_charpos(0)
|
: m_sp(&in), m_cl(0), m_pos(0), m_charpos(0)
|
||||||
{
|
{
|
||||||
update_cl();
|
update_cl();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string& buffer() const {return m_s;}
|
const std::string& buffer() const {return (*m_sp);}
|
||||||
|
|
||||||
void rewind()
|
void rewind()
|
||||||
{
|
{
|
||||||
@ -59,14 +59,14 @@ public:
|
|||||||
mycp = m_charpos;
|
mycp = m_charpos;
|
||||||
}
|
}
|
||||||
int l;
|
int l;
|
||||||
while (mypos < m_s.length() && mycp != charpos) {
|
while (mypos < m_sp->length() && mycp != charpos) {
|
||||||
l = get_cl(mypos);
|
l = get_cl(mypos);
|
||||||
if (l <= 0 || !poslok(mypos, l) || !checkvalidat(mypos, l))
|
if (l <= 0 || !poslok(mypos, l) || !checkvalidat(mypos, l))
|
||||||
return (unsigned int)-1;
|
return (unsigned int)-1;
|
||||||
mypos += l;
|
mypos += l;
|
||||||
++mycp;
|
++mycp;
|
||||||
}
|
}
|
||||||
if (mypos < m_s.length() && mycp == charpos) {
|
if (mypos < m_sp->length() && mycp == charpos) {
|
||||||
l = get_cl(mypos);
|
l = get_cl(mypos);
|
||||||
if (poslok(mypos, l) && checkvalidat(mypos, l))
|
if (poslok(mypos, l) && checkvalidat(mypos, l))
|
||||||
return getvalueat(mypos, l);
|
return getvalueat(mypos, l);
|
||||||
@ -106,7 +106,7 @@ public:
|
|||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(m_cl != 0);
|
assert(m_cl != 0);
|
||||||
#endif
|
#endif
|
||||||
out.append(&m_s[m_pos], m_cl);
|
out.append(&(*m_sp)[m_pos], m_cl);
|
||||||
return m_cl;
|
return m_cl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,11 +115,11 @@ public:
|
|||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(m_cl != 0);
|
assert(m_cl != 0);
|
||||||
#endif
|
#endif
|
||||||
return m_cl > 0 ? m_s.substr(m_pos, m_cl) : std::string();
|
return m_cl > 0 ? m_sp->substr(m_pos, m_cl) : std::string();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool eof() const {
|
bool eof() const {
|
||||||
return m_pos == m_s.length();
|
return m_pos == m_sp->length();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool error() const {
|
bool error() const {
|
||||||
@ -143,7 +143,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
// String we're working with
|
// String we're working with
|
||||||
const std::string& m_s;
|
const std::string* m_sp;
|
||||||
// Character length at current position. A value of zero indicates
|
// Character length at current position. A value of zero indicates
|
||||||
// an error.
|
// an error.
|
||||||
unsigned int m_cl;
|
unsigned int m_cl;
|
||||||
@ -155,9 +155,9 @@ private:
|
|||||||
// Check position and cl against string length
|
// Check position and cl against string length
|
||||||
bool poslok(std::string::size_type p, int l) const {
|
bool poslok(std::string::size_type p, int l) const {
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(p != std::string::npos && l > 0 && p + l <= m_s.length());
|
assert(p != std::string::npos && l > 0 && p + l <= m_sp->length());
|
||||||
#endif
|
#endif
|
||||||
return p != std::string::npos && l > 0 && p + l <= m_s.length();
|
return p != std::string::npos && l > 0 && p + l <= m_sp->length();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update current char length in object state, check
|
// Update current char length in object state, check
|
||||||
@ -165,13 +165,13 @@ private:
|
|||||||
inline void update_cl()
|
inline void update_cl()
|
||||||
{
|
{
|
||||||
m_cl = 0;
|
m_cl = 0;
|
||||||
if (m_pos >= m_s.length())
|
if (m_pos >= m_sp->length())
|
||||||
return;
|
return;
|
||||||
m_cl = get_cl(m_pos);
|
m_cl = get_cl(m_pos);
|
||||||
if (!poslok(m_pos, m_cl)) {
|
if (!poslok(m_pos, m_cl)) {
|
||||||
// Used to set eof here for safety, but this is bad because it
|
// Used to set eof here for safety, but this is bad because it
|
||||||
// basically prevents the caller to discriminate error and eof.
|
// basically prevents the caller to discriminate error and eof.
|
||||||
// m_pos = m_s.length();
|
// m_pos = m_sp->length();
|
||||||
m_cl = 0;
|
m_cl = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -184,20 +184,20 @@ private:
|
|||||||
{
|
{
|
||||||
switch (l) {
|
switch (l) {
|
||||||
case 1:
|
case 1:
|
||||||
return (unsigned char)m_s[p] < 128;
|
return (unsigned char)(*m_sp)[p] < 128;
|
||||||
case 2:
|
case 2:
|
||||||
return (((unsigned char)m_s[p]) & 224) == 192
|
return (((unsigned char)(*m_sp)[p]) & 224) == 192
|
||||||
&& (((unsigned char)m_s[p+1]) & 192) == 128;
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128;
|
||||||
case 3:
|
case 3:
|
||||||
return (((unsigned char)m_s[p]) & 240) == 224
|
return (((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||||
&& (((unsigned char)m_s[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)m_s[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
;
|
;
|
||||||
case 4:
|
case 4:
|
||||||
return (((unsigned char)m_s[p]) & 248) == 240
|
return (((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||||
&& (((unsigned char)m_s[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)m_s[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
&& (((unsigned char)m_s[p+3]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||||
;
|
;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@ -207,7 +207,7 @@ private:
|
|||||||
// Get character byte length at specified position. Returns 0 for error.
|
// Get character byte length at specified position. Returns 0 for error.
|
||||||
inline int get_cl(std::string::size_type p) const
|
inline int get_cl(std::string::size_type p) const
|
||||||
{
|
{
|
||||||
unsigned int z = (unsigned char)m_s[p];
|
unsigned int z = (unsigned char)(*m_sp)[p];
|
||||||
if (z <= 127) {
|
if (z <= 127) {
|
||||||
return 1;
|
return 1;
|
||||||
} else if ((z & 224) == 192) {
|
} else if ((z & 224) == 192) {
|
||||||
@ -230,44 +230,44 @@ private:
|
|||||||
switch (l) {
|
switch (l) {
|
||||||
case 1:
|
case 1:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert((unsigned char)m_s[p] < 128);
|
assert((unsigned char)(*m_sp)[p] < 128);
|
||||||
#endif
|
#endif
|
||||||
return (unsigned char)m_s[p];
|
return (unsigned char)(*m_sp)[p];
|
||||||
case 2:
|
case 2:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(
|
assert(
|
||||||
((unsigned char)m_s[p] & 224) == 192
|
((unsigned char)(*m_sp)[p] & 224) == 192
|
||||||
&& ((unsigned char)m_s[p+1] & 192) == 128
|
&& ((unsigned char)(*m_sp)[p+1] & 192) == 128
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
return ((unsigned char)m_s[p] - 192) * 64 +
|
return ((unsigned char)(*m_sp)[p] - 192) * 64 +
|
||||||
(unsigned char)m_s[p+1] - 128 ;
|
(unsigned char)(*m_sp)[p+1] - 128 ;
|
||||||
case 3:
|
case 3:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(
|
assert(
|
||||||
(((unsigned char)m_s[p]) & 240) == 224
|
(((unsigned char)(*m_sp)[p]) & 240) == 224
|
||||||
&& (((unsigned char)m_s[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)m_s[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return ((unsigned char)m_s[p] - 224) * 4096 +
|
return ((unsigned char)(*m_sp)[p] - 224) * 4096 +
|
||||||
((unsigned char)m_s[p+1] - 128) * 64 +
|
((unsigned char)(*m_sp)[p+1] - 128) * 64 +
|
||||||
(unsigned char)m_s[p+2] - 128;
|
(unsigned char)(*m_sp)[p+2] - 128;
|
||||||
case 4:
|
case 4:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
assert(
|
assert(
|
||||||
(((unsigned char)m_s[p]) & 248) == 240
|
(((unsigned char)(*m_sp)[p]) & 248) == 240
|
||||||
&& (((unsigned char)m_s[p+1]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+1]) & 192) == 128
|
||||||
&& (((unsigned char)m_s[p+2]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+2]) & 192) == 128
|
||||||
&& (((unsigned char)m_s[p+3]) & 192) == 128
|
&& (((unsigned char)(*m_sp)[p+3]) & 192) == 128
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return ((unsigned char)m_s[p]-240)*262144 +
|
return ((unsigned char)(*m_sp)[p]-240)*262144 +
|
||||||
((unsigned char)m_s[p+1]-128)*4096 +
|
((unsigned char)(*m_sp)[p+1]-128)*4096 +
|
||||||
((unsigned char)m_s[p+2]-128)*64 +
|
((unsigned char)(*m_sp)[p+2]-128)*64 +
|
||||||
(unsigned char)m_s[p+3]-128;
|
(unsigned char)(*m_sp)[p+3]-128;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
#ifdef UTF8ITER_CHECK
|
#ifdef UTF8ITER_CHECK
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user