Make unac suppress combining accents found in input. Input in decomposed form was previously not unaccented
This commit is contained in:
parent
ea61e85b8f
commit
0d24b5620b
3164
src/unac/unac.c
3164
src/unac/unac.c
File diff suppressed because it is too large
Load Diff
@ -35,7 +35,7 @@ extern "C" {
|
|||||||
#define UNAC_BLOCK_SHIFT 4
|
#define UNAC_BLOCK_SHIFT 4
|
||||||
#define UNAC_BLOCK_MASK ((1 << UNAC_BLOCK_SHIFT) - 1)
|
#define UNAC_BLOCK_MASK ((1 << UNAC_BLOCK_SHIFT) - 1)
|
||||||
#define UNAC_BLOCK_SIZE (1 << UNAC_BLOCK_SHIFT)
|
#define UNAC_BLOCK_SIZE (1 << UNAC_BLOCK_SHIFT)
|
||||||
#define UNAC_BLOCK_COUNT 355
|
#define UNAC_BLOCK_COUNT 418
|
||||||
#define UNAC_INDEXES_SIZE (0x10000 >> UNAC_BLOCK_SHIFT)
|
#define UNAC_INDEXES_SIZE (0x10000 >> UNAC_BLOCK_SHIFT)
|
||||||
/* Generated by builder. Do not modify. End defines */
|
/* Generated by builder. Do not modify. End defines */
|
||||||
|
|
||||||
@ -521,6 +521,69 @@ extern unsigned short unac_data351[];
|
|||||||
extern unsigned short unac_data352[];
|
extern unsigned short unac_data352[];
|
||||||
extern unsigned short unac_data353[];
|
extern unsigned short unac_data353[];
|
||||||
extern unsigned short unac_data354[];
|
extern unsigned short unac_data354[];
|
||||||
|
extern unsigned short unac_data355[];
|
||||||
|
extern unsigned short unac_data356[];
|
||||||
|
extern unsigned short unac_data357[];
|
||||||
|
extern unsigned short unac_data358[];
|
||||||
|
extern unsigned short unac_data359[];
|
||||||
|
extern unsigned short unac_data360[];
|
||||||
|
extern unsigned short unac_data361[];
|
||||||
|
extern unsigned short unac_data362[];
|
||||||
|
extern unsigned short unac_data363[];
|
||||||
|
extern unsigned short unac_data364[];
|
||||||
|
extern unsigned short unac_data365[];
|
||||||
|
extern unsigned short unac_data366[];
|
||||||
|
extern unsigned short unac_data367[];
|
||||||
|
extern unsigned short unac_data368[];
|
||||||
|
extern unsigned short unac_data369[];
|
||||||
|
extern unsigned short unac_data370[];
|
||||||
|
extern unsigned short unac_data371[];
|
||||||
|
extern unsigned short unac_data372[];
|
||||||
|
extern unsigned short unac_data373[];
|
||||||
|
extern unsigned short unac_data374[];
|
||||||
|
extern unsigned short unac_data375[];
|
||||||
|
extern unsigned short unac_data376[];
|
||||||
|
extern unsigned short unac_data377[];
|
||||||
|
extern unsigned short unac_data378[];
|
||||||
|
extern unsigned short unac_data379[];
|
||||||
|
extern unsigned short unac_data380[];
|
||||||
|
extern unsigned short unac_data381[];
|
||||||
|
extern unsigned short unac_data382[];
|
||||||
|
extern unsigned short unac_data383[];
|
||||||
|
extern unsigned short unac_data384[];
|
||||||
|
extern unsigned short unac_data385[];
|
||||||
|
extern unsigned short unac_data386[];
|
||||||
|
extern unsigned short unac_data387[];
|
||||||
|
extern unsigned short unac_data388[];
|
||||||
|
extern unsigned short unac_data389[];
|
||||||
|
extern unsigned short unac_data390[];
|
||||||
|
extern unsigned short unac_data391[];
|
||||||
|
extern unsigned short unac_data392[];
|
||||||
|
extern unsigned short unac_data393[];
|
||||||
|
extern unsigned short unac_data394[];
|
||||||
|
extern unsigned short unac_data395[];
|
||||||
|
extern unsigned short unac_data396[];
|
||||||
|
extern unsigned short unac_data397[];
|
||||||
|
extern unsigned short unac_data398[];
|
||||||
|
extern unsigned short unac_data399[];
|
||||||
|
extern unsigned short unac_data400[];
|
||||||
|
extern unsigned short unac_data401[];
|
||||||
|
extern unsigned short unac_data402[];
|
||||||
|
extern unsigned short unac_data403[];
|
||||||
|
extern unsigned short unac_data404[];
|
||||||
|
extern unsigned short unac_data405[];
|
||||||
|
extern unsigned short unac_data406[];
|
||||||
|
extern unsigned short unac_data407[];
|
||||||
|
extern unsigned short unac_data408[];
|
||||||
|
extern unsigned short unac_data409[];
|
||||||
|
extern unsigned short unac_data410[];
|
||||||
|
extern unsigned short unac_data411[];
|
||||||
|
extern unsigned short unac_data412[];
|
||||||
|
extern unsigned short unac_data413[];
|
||||||
|
extern unsigned short unac_data414[];
|
||||||
|
extern unsigned short unac_data415[];
|
||||||
|
extern unsigned short unac_data416[];
|
||||||
|
extern unsigned short unac_data417[];
|
||||||
/* Generated by builder. Do not modify. End declarations */
|
/* Generated by builder. Do not modify. End declarations */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@ -93,6 +93,14 @@ sub main {
|
|||||||
}
|
}
|
||||||
if($general_category =~ /^M/) {
|
if($general_category =~ /^M/) {
|
||||||
$mark{$code_value} = 1;
|
$mark{$code_value} = 1;
|
||||||
|
# For mark caracters, we generate a 0 entry in the
|
||||||
|
# decomposition table. This signals to the c code that no
|
||||||
|
# output should be generated. Slightly hacky but ok. The
|
||||||
|
# original code left mark character go through (generating
|
||||||
|
# still accented output if the input was in decomposed
|
||||||
|
# form). Decomposed text is rare, but, for example, macosx file
|
||||||
|
# names have separate combining accent characters.
|
||||||
|
$decomposition{$code_value} = "0000";
|
||||||
}
|
}
|
||||||
$name{$code_value} = $character_name;
|
$name{$code_value} = $character_name;
|
||||||
}
|
}
|
||||||
@ -114,11 +122,16 @@ sub main {
|
|||||||
undef @decomposition;
|
undef @decomposition;
|
||||||
last;
|
last;
|
||||||
}
|
}
|
||||||
if(exists($decomposition{$code_value})) {
|
# marks also have entries in the decomposition table (so that
|
||||||
push(@code_values, split(' ', $decomposition{$code_value}));
|
# they can be suppressed when found in input), but no output
|
||||||
} elsif (!exists($mark{$code_value})) {
|
# component should be generated for them.
|
||||||
push(@decomposition, $code_value);
|
if (!exists($mark{$code_value})) {
|
||||||
}
|
if(exists($decomposition{$code_value})) {
|
||||||
|
push(@code_values, split(' ', $decomposition{$code_value}));
|
||||||
|
} else {
|
||||||
|
push(@decomposition, $code_value);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if(@decomposition) {
|
if(@decomposition) {
|
||||||
$decomposition{$from} = "@decomposition";
|
$decomposition{$from} = "@decomposition";
|
||||||
|
|||||||
3164
unac/unac.c
3164
unac/unac.c
File diff suppressed because it is too large
Load Diff
65
unac/unac.h
65
unac/unac.h
@ -35,7 +35,7 @@ extern "C" {
|
|||||||
#define UNAC_BLOCK_SHIFT 4
|
#define UNAC_BLOCK_SHIFT 4
|
||||||
#define UNAC_BLOCK_MASK ((1 << UNAC_BLOCK_SHIFT) - 1)
|
#define UNAC_BLOCK_MASK ((1 << UNAC_BLOCK_SHIFT) - 1)
|
||||||
#define UNAC_BLOCK_SIZE (1 << UNAC_BLOCK_SHIFT)
|
#define UNAC_BLOCK_SIZE (1 << UNAC_BLOCK_SHIFT)
|
||||||
#define UNAC_BLOCK_COUNT 355
|
#define UNAC_BLOCK_COUNT 418
|
||||||
#define UNAC_INDEXES_SIZE (0x10000 >> UNAC_BLOCK_SHIFT)
|
#define UNAC_INDEXES_SIZE (0x10000 >> UNAC_BLOCK_SHIFT)
|
||||||
/* Generated by builder. Do not modify. End defines */
|
/* Generated by builder. Do not modify. End defines */
|
||||||
|
|
||||||
@ -521,6 +521,69 @@ extern unsigned short unac_data351[];
|
|||||||
extern unsigned short unac_data352[];
|
extern unsigned short unac_data352[];
|
||||||
extern unsigned short unac_data353[];
|
extern unsigned short unac_data353[];
|
||||||
extern unsigned short unac_data354[];
|
extern unsigned short unac_data354[];
|
||||||
|
extern unsigned short unac_data355[];
|
||||||
|
extern unsigned short unac_data356[];
|
||||||
|
extern unsigned short unac_data357[];
|
||||||
|
extern unsigned short unac_data358[];
|
||||||
|
extern unsigned short unac_data359[];
|
||||||
|
extern unsigned short unac_data360[];
|
||||||
|
extern unsigned short unac_data361[];
|
||||||
|
extern unsigned short unac_data362[];
|
||||||
|
extern unsigned short unac_data363[];
|
||||||
|
extern unsigned short unac_data364[];
|
||||||
|
extern unsigned short unac_data365[];
|
||||||
|
extern unsigned short unac_data366[];
|
||||||
|
extern unsigned short unac_data367[];
|
||||||
|
extern unsigned short unac_data368[];
|
||||||
|
extern unsigned short unac_data369[];
|
||||||
|
extern unsigned short unac_data370[];
|
||||||
|
extern unsigned short unac_data371[];
|
||||||
|
extern unsigned short unac_data372[];
|
||||||
|
extern unsigned short unac_data373[];
|
||||||
|
extern unsigned short unac_data374[];
|
||||||
|
extern unsigned short unac_data375[];
|
||||||
|
extern unsigned short unac_data376[];
|
||||||
|
extern unsigned short unac_data377[];
|
||||||
|
extern unsigned short unac_data378[];
|
||||||
|
extern unsigned short unac_data379[];
|
||||||
|
extern unsigned short unac_data380[];
|
||||||
|
extern unsigned short unac_data381[];
|
||||||
|
extern unsigned short unac_data382[];
|
||||||
|
extern unsigned short unac_data383[];
|
||||||
|
extern unsigned short unac_data384[];
|
||||||
|
extern unsigned short unac_data385[];
|
||||||
|
extern unsigned short unac_data386[];
|
||||||
|
extern unsigned short unac_data387[];
|
||||||
|
extern unsigned short unac_data388[];
|
||||||
|
extern unsigned short unac_data389[];
|
||||||
|
extern unsigned short unac_data390[];
|
||||||
|
extern unsigned short unac_data391[];
|
||||||
|
extern unsigned short unac_data392[];
|
||||||
|
extern unsigned short unac_data393[];
|
||||||
|
extern unsigned short unac_data394[];
|
||||||
|
extern unsigned short unac_data395[];
|
||||||
|
extern unsigned short unac_data396[];
|
||||||
|
extern unsigned short unac_data397[];
|
||||||
|
extern unsigned short unac_data398[];
|
||||||
|
extern unsigned short unac_data399[];
|
||||||
|
extern unsigned short unac_data400[];
|
||||||
|
extern unsigned short unac_data401[];
|
||||||
|
extern unsigned short unac_data402[];
|
||||||
|
extern unsigned short unac_data403[];
|
||||||
|
extern unsigned short unac_data404[];
|
||||||
|
extern unsigned short unac_data405[];
|
||||||
|
extern unsigned short unac_data406[];
|
||||||
|
extern unsigned short unac_data407[];
|
||||||
|
extern unsigned short unac_data408[];
|
||||||
|
extern unsigned short unac_data409[];
|
||||||
|
extern unsigned short unac_data410[];
|
||||||
|
extern unsigned short unac_data411[];
|
||||||
|
extern unsigned short unac_data412[];
|
||||||
|
extern unsigned short unac_data413[];
|
||||||
|
extern unsigned short unac_data414[];
|
||||||
|
extern unsigned short unac_data415[];
|
||||||
|
extern unsigned short unac_data416[];
|
||||||
|
extern unsigned short unac_data417[];
|
||||||
/* Generated by builder. Do not modify. End declarations */
|
/* Generated by builder. Do not modify. End declarations */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user