no going out of the basic plane!

This commit is contained in:
dockes 2008-12-18 11:58:13 +00:00
parent caf54d1d7f
commit 36919ab728
2 changed files with 12 additions and 3116 deletions

View File

@ -61,6 +61,10 @@ sub main {
$uppercase_mapping,
$lowercase_mapping,
$titlecase_mapping) = split(/;/, $_);
# Basic plane only
last if (hex $code_value > 0xffff);
#
# Generate ranges of values that are not explicitly listed.
# CJK ideographs for instance.
@ -85,6 +89,8 @@ sub main {
# For kana japanese characters, we don't strip accents. Note: we just
# need to test for the main kana (hiragana + katakana 3040-30ff) block,
# characters such as halfwidth variations will be first decomposed into it
#
# We also forbid any excursion out of the basic plane. Sorry, Dave.
my($from, $to);
while(($from, $to) = each(%decomposition)) {
my(@code_values) = split(' ', $to);
@ -92,6 +98,10 @@ sub main {
my(@decomposition);
while(@code_values) {
my($code_value) = shift(@code_values);
if (hex $code_value > 0xffff) {
undef @decomposition;
last;
}
if(exists($decomposition{$code_value})) {
push(@code_values, split(' ', $decomposition{$code_value}));
} elsif (!exists($mark{$code_value}) ||

File diff suppressed because it is too large Load Diff