remove use of - as span-building character.

This commit is contained in:
Jean-Francois Dockes 2013-03-04 12:16:11 +01:00
parent b0e296235f
commit dcf937d650

View File

@ -355,6 +355,7 @@ bool TextSplit::text_to_words(const string &in)
m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0; m_wordStart = m_wordLen = m_prevpos = m_prevlen = m_wordpos = m_spanpos = 0;
int curspanglue = 0; int curspanglue = 0;
bool pagepending = false; bool pagepending = false;
bool softhyphenpending = false;
// Running count of non-alphanum chars. Reset when we see one; // Running count of non-alphanum chars. Reset when we see one;
int nonalnumcnt = 0; int nonalnumcnt = 0;
@ -393,6 +394,16 @@ bool TextSplit::text_to_words(const string &in)
int cc = whatcc(c); int cc = whatcc(c);
switch (cc) { switch (cc) {
case SKIP: case SKIP:
// Special-case soft-hyphen. To work, this depends on the
// fact that only SKIP calls "continue" inside the
// switch. All the others will do the softhyphenpending
// reset after the switch
if (c == 0xad) {
softhyphenpending = true;
} else {
softhyphenpending = false;
}
// Skips the softhyphenpending reset
continue; continue;
case DIGIT: case DIGIT:
if (m_wordLen == 0) if (m_wordLen == 0)
@ -444,10 +455,7 @@ bool TextSplit::text_to_words(const string &in)
goto SPACE; goto SPACE;
} }
} else { } else {
if (!doemit(false, it.getBpos())) goto SPACE;
return false;
m_inNumber = false;
m_wordStart += it.appendchartostring(m_span);
} }
break; break;
@ -555,14 +563,15 @@ bool TextSplit::text_to_words(const string &in)
break; break;
case '\n': case '\n':
case '\r': case '\r':
if (m_span.length() && m_span[m_span.length() - 1] == '-') { if ((m_span.length() && m_span[m_span.length() - 1] == '-') ||
softhyphenpending) {
// if '-' is the last char before end of line, just // if '-' is the last char before end of line, just
// ignore the line change. This is the right thing to // ignore the line change. This is the right thing to
// do almost always. We'd then need a way to check if // do almost always. We'd then need a way to check if
// the - was added as part of the word hyphenation, or was // the - was added as part of the word hyphenation, or was
// there in the first place, but this would need a dictionary. // there in the first place, but this would need a dictionary.
// Also we'd need to check for a soft-hyphen and remove it, // Don't reset soft-hyphen
// but this would require more utf-8 magic continue;
} else { } else {
// Handle like a normal separator // Handle like a normal separator
goto SPACE; goto SPACE;
@ -622,6 +631,7 @@ bool TextSplit::text_to_words(const string &in)
nonalnumcnt = 0; nonalnumcnt = 0;
break; break;
} }
softhyphenpending = false;
} }
if (m_wordLen || m_span.length()) { if (m_wordLen || m_span.length()) {
if (!doemit(true, it.getBpos())) if (!doemit(true, it.getBpos()))
@ -891,6 +901,10 @@ static string teststring =
" ,able,test-domain " " ,able,test-domain "
" -wl,--export-dynamic " " -wl,--export-dynamic "
" ~/.xsession-errors " " ~/.xsession-errors "
"soft\xc2\xadhyphen "
"soft\xc2\xad\nhyphen "
"soft\xc2\xad\n\rhyphen "
"hard-\nhyphen "
; ;
static string teststring1 = " nouvel-an "; static string teststring1 = " nouvel-an ";