no going out of the basic plane!
This commit is contained in:
parent
caf54d1d7f
commit
36919ab728
@ -61,6 +61,10 @@ sub main {
|
|||||||
$uppercase_mapping,
|
$uppercase_mapping,
|
||||||
$lowercase_mapping,
|
$lowercase_mapping,
|
||||||
$titlecase_mapping) = split(/;/, $_);
|
$titlecase_mapping) = split(/;/, $_);
|
||||||
|
|
||||||
|
# Basic plane only
|
||||||
|
last if (hex $code_value > 0xffff);
|
||||||
|
|
||||||
#
|
#
|
||||||
# Generate ranges of values that are not explicitly listed.
|
# Generate ranges of values that are not explicitly listed.
|
||||||
# CJK ideographs for instance.
|
# CJK ideographs for instance.
|
||||||
@ -85,6 +89,8 @@ sub main {
|
|||||||
# For kana japanese characters, we don't strip accents. Note: we just
|
# For kana japanese characters, we don't strip accents. Note: we just
|
||||||
# need to test for the main kana (hiragana + katakana 3040-30ff) block,
|
# need to test for the main kana (hiragana + katakana 3040-30ff) block,
|
||||||
# characters such as halfwidth variations will be first decomposed into it
|
# characters such as halfwidth variations will be first decomposed into it
|
||||||
|
#
|
||||||
|
# We also forbid any excursion out of the basic plane. Sorry, Dave.
|
||||||
my($from, $to);
|
my($from, $to);
|
||||||
while(($from, $to) = each(%decomposition)) {
|
while(($from, $to) = each(%decomposition)) {
|
||||||
my(@code_values) = split(' ', $to);
|
my(@code_values) = split(' ', $to);
|
||||||
@ -92,6 +98,10 @@ sub main {
|
|||||||
my(@decomposition);
|
my(@decomposition);
|
||||||
while(@code_values) {
|
while(@code_values) {
|
||||||
my($code_value) = shift(@code_values);
|
my($code_value) = shift(@code_values);
|
||||||
|
if (hex $code_value > 0xffff) {
|
||||||
|
undef @decomposition;
|
||||||
|
last;
|
||||||
|
}
|
||||||
if(exists($decomposition{$code_value})) {
|
if(exists($decomposition{$code_value})) {
|
||||||
push(@code_values, split(' ', $decomposition{$code_value}));
|
push(@code_values, split(' ', $decomposition{$code_value}));
|
||||||
} elsif (!exists($mark{$code_value}) ||
|
} elsif (!exists($mark{$code_value}) ||
|
||||||
|
|||||||
3118
unac/unac.c
3118
unac/unac.c
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user