remove use of - as span-building character.
This commit is contained in:
parent
b0e296235f
commit
dcf937d650
@ -355,6 +355,7 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0;
|
m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0;
|
||||||
int curspanglue = 0;
|
int curspanglue = 0;
|
||||||
bool pagepending = false;
|
bool pagepending = false;
|
||||||
|
bool softhyphenpending = false;
|
||||||
|
|
||||||
// Running count of non-alphanum chars. Reset when we see one;
|
// Running count of non-alphanum chars. Reset when we see one;
|
||||||
int nonalnumcnt = 0;
|
int nonalnumcnt = 0;
|
||||||
@ -393,6 +394,16 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
int cc = whatcc(c);
|
int cc = whatcc(c);
|
||||||
switch (cc) {
|
switch (cc) {
|
||||||
case SKIP:
|
case SKIP:
|
||||||
|
// Special-case soft-hyphen. To work, this depends on the
|
||||||
|
// fact that only SKIP calls "continue" inside the
|
||||||
|
// switch. All the others will do the softhyphenpending
|
||||||
|
// reset after the switch
|
||||||
|
if (c == 0xad) {
|
||||||
|
softhyphenpending = true;
|
||||||
|
} else {
|
||||||
|
softhyphenpending = false;
|
||||||
|
}
|
||||||
|
// Skips the softhyphenpending reset
|
||||||
continue;
|
continue;
|
||||||
case DIGIT:
|
case DIGIT:
|
||||||
if (m_wordLen == 0)
|
if (m_wordLen == 0)
|
||||||
@ -444,10 +455,7 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
goto SPACE;
|
goto SPACE;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!doemit(false, it.getBpos()))
|
goto SPACE;
|
||||||
return false;
|
|
||||||
m_inNumber = false;
|
|
||||||
m_wordStart += it.appendchartostring(m_span);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -555,14 +563,15 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
break;
|
break;
|
||||||
case '\n':
|
case '\n':
|
||||||
case '\r':
|
case '\r':
|
||||||
if (m_span.length() && m_span[m_span.length() - 1] == '-') {
|
if ((m_span.length() && m_span[m_span.length() - 1] == '-') ||
|
||||||
|
softhyphenpending) {
|
||||||
// if '-' is the last char before end of line, just
|
// if '-' is the last char before end of line, just
|
||||||
// ignore the line change. This is the right thing to
|
// ignore the line change. This is the right thing to
|
||||||
// do almost always. We'd then need a way to check if
|
// do almost always. We'd then need a way to check if
|
||||||
// the - was added as part of the word hyphenation, or was
|
// the - was added as part of the word hyphenation, or was
|
||||||
// there in the first place, but this would need a dictionary.
|
// there in the first place, but this would need a dictionary.
|
||||||
// Also we'd need to check for a soft-hyphen and remove it,
|
// Don't reset soft-hyphen
|
||||||
// but this would require more utf-8 magic
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// Handle like a normal separator
|
// Handle like a normal separator
|
||||||
goto SPACE;
|
goto SPACE;
|
||||||
@ -622,6 +631,7 @@ bool TextSplit::text_to_words(const string &in)
|
|||||||
nonalnumcnt = 0;
|
nonalnumcnt = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
softhyphenpending = false;
|
||||||
}
|
}
|
||||||
if (m_wordLen || m_span.length()) {
|
if (m_wordLen || m_span.length()) {
|
||||||
if (!doemit(true, it.getBpos()))
|
if (!doemit(true, it.getBpos()))
|
||||||
@ -891,6 +901,10 @@ static string teststring =
|
|||||||
" ,able,test-domain "
|
" ,able,test-domain "
|
||||||
" -wl,--export-dynamic "
|
" -wl,--export-dynamic "
|
||||||
" ~/.xsession-errors "
|
" ~/.xsession-errors "
|
||||||
|
"soft\xc2\xadhyphen "
|
||||||
|
"soft\xc2\xad\nhyphen "
|
||||||
|
"soft\xc2\xad\n\rhyphen "
|
||||||
|
"hard-\nhyphen "
|
||||||
;
|
;
|
||||||
|
|
||||||
static string teststring1 = " nouvel-an ";
|
static string teststring1 = " nouvel-an ";
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user