no going out of the basic plane!

This commit is contained in:
dockes 2008-12-18 11:58:13 +00:00
parent caf54d1d7f
commit 36919ab728
2 changed files with 12 additions and 3116 deletions

View File

@ -61,6 +61,10 @@ sub main {
$uppercase_mapping, $uppercase_mapping,
$lowercase_mapping, $lowercase_mapping,
$titlecase_mapping) = split(/;/, $_); $titlecase_mapping) = split(/;/, $_);
# Basic plane only
last if (hex $code_value > 0xffff);
# #
# Generate ranges of values that are not explicitly listed. # Generate ranges of values that are not explicitly listed.
# CJK ideographs for instance. # CJK ideographs for instance.
@ -85,6 +89,8 @@ sub main {
# For kana japanese characters, we don't strip accents. Note: we just # For kana japanese characters, we don't strip accents. Note: we just
# need to test for the main kana (hiragana + katakana 3040-30ff) block, # need to test for the main kana (hiragana + katakana 3040-30ff) block,
# characters such as halfwidth variations will be first decomposed into it # characters such as halfwidth variations will be first decomposed into it
#
# We also forbid any excursion out of the basic plane. Sorry, Dave.
my($from, $to); my($from, $to);
while(($from, $to) = each(%decomposition)) { while(($from, $to) = each(%decomposition)) {
my(@code_values) = split(' ', $to); my(@code_values) = split(' ', $to);
@ -92,6 +98,10 @@ sub main {
my(@decomposition); my(@decomposition);
while(@code_values) { while(@code_values) {
my($code_value) = shift(@code_values); my($code_value) = shift(@code_values);
if (hex $code_value > 0xffff) {
undef @decomposition;
last;
}
if(exists($decomposition{$code_value})) { if(exists($decomposition{$code_value})) {
push(@code_values, split(' ', $decomposition{$code_value})); push(@code_values, split(' ', $decomposition{$code_value}));
} elsif (!exists($mark{$code_value}) || } elsif (!exists($mark{$code_value}) ||

File diff suppressed because it is too large Load Diff