small fix : remove diaeresis from seps + comments

This commit is contained in:
dockes 2009-01-13 16:02:18 +00:00
parent 5d144d6145
commit 3991b11d2b

View File

@ -19,17 +19,15 @@
/* @(#$Id: uproplist.h,v 1.3 2008-12-05 11:09:31 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: uproplist.h,v 1.3 2008-12-05 11:09:31 dockes Exp $ (C) 2004 J.F.Dockes */
/* /**
* A subset of Unicode chars that we consider whitespace when we split text in * A subset of Unicode chars that we consider word breaks when we
* words. * split text in words.
*
* This is used as a quick fix to the ascii-based code, and is not correct. * This is used as a quick fix to the ascii-based code, and is not correct.
* the correct way would be to do what http://www.unicode.org/reports/tr29/ * the correct way would be to do what http://www.unicode.org/reports/tr29/
* says. We should then convert first to ucs-4, and then strictly use * says.
* character properties, which might actually be simpler than the current
* solution...
* *
* From: * Data from:
# PropList-4.0.1.txt # PropList-4.0.1.txt
# Date: 2004-03-02, 02:42:40 GMT [MD] # Date: 2004-03-02, 02:42:40 GMT [MD]
# #
@ -48,9 +46,7 @@ static const unsigned int uniign[] = {
0x00A5, /* YEN SIGN;Sc; */ 0x00A5, /* YEN SIGN;Sc; */
0x00A6, /* BROKEN BAR;So */ 0x00A6, /* BROKEN BAR;So */
0x00A7, /* SECTION SIGN;So; */ 0x00A7, /* SECTION SIGN;So; */
0x00A8, /* DIAERESIS;Sk; */
0x00A9, /* COPYRIGHT SIGN;So */ 0x00A9, /* COPYRIGHT SIGN;So */
0x00AA, /* FEMININE ORDINAL INDICATOR;Ll */
0x00AB, /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK;Pi */ 0x00AB, /* LEFT-POINTING DOUBLE ANGLE QUOTATION MARK;Pi */
0x00AC, /* NOT SIGN;Sm */ 0x00AC, /* NOT SIGN;Sm */
0x00AE, /* registered sign */ 0x00AE, /* registered sign */
@ -160,6 +156,9 @@ static const unsigned int uniign[] = {
0xFF1F, /* ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK*/ 0xFF1F, /* ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK*/
0xFF61, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP*/ 0xFF61, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP*/
0xFF64, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA*/ 0xFF64, /* ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA*/
/* STerm means Sentence Terminal. Some of these are in Terminal_Punctuation
but not all ?? */
0x0021, /* ; STerm # Po EXCLAMATION MARK*/ 0x0021, /* ; STerm # Po EXCLAMATION MARK*/
0x002E, /* ; STerm # Po FULL STOP*/ 0x002E, /* ; STerm # Po FULL STOP*/
0x003F, /* ; STerm # Po QUESTION MARK*/ 0x003F, /* ; STerm # Po QUESTION MARK*/