implemented additional case-folding
This commit is contained in:
parent
b396d2c39f
commit
00b954c4ef
912
unac/CaseFolding-3.2.0.txt
Normal file
912
unac/CaseFolding-3.2.0.txt
Normal file
@ -0,0 +1,912 @@
|
||||
# CaseFolding-3.2.0.txt
|
||||
# Date: 2002-03-22,20:54:33 GMT [MD]
|
||||
#
|
||||
# Case Folding Properties
|
||||
#
|
||||
# This file is a supplement to the UnicodeData file.
|
||||
# It provides a case folding mapping generated from the Unicode Character Database.
|
||||
# If all characters are mapped according to the full mapping below, then
|
||||
# case differences (according to UnicodeData.txt and SpecialCasing.txt)
|
||||
# are eliminated.
|
||||
#
|
||||
# The data supports both implementations that require simple case foldings
|
||||
# (where string lengths don't change), and implementations that allow full case folding
|
||||
# (where string lengths may grow). Note that where they can be supported, the
|
||||
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
|
||||
#
|
||||
# NOTE: case folding does not preserve normalization formats!
|
||||
#
|
||||
# For information on case folding, see
|
||||
# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
|
||||
#
|
||||
# ================================================================================
|
||||
# Format
|
||||
# ================================================================================
|
||||
# The entries in this file are in the following machine-readable format:
|
||||
#
|
||||
# <code>; <status>; <mapping>; # <name>
|
||||
#
|
||||
# The status field is:
|
||||
# C: common case folding, common mappings shared by both simple and full mappings.
|
||||
# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
|
||||
# S: simple case folding, mappings to single characters where different from F.
|
||||
# T: special case for uppercase I and dotted uppercase I
|
||||
# - For non-Turkic languages, this mapping is normally not used.
|
||||
# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
|
||||
#
|
||||
# Usage:
|
||||
# A. To do a simple case folding, use the mappings with status C + S.
|
||||
# B. To do a full case folding, use the mappings with status C + F.
|
||||
#
|
||||
# The mappings with status T can be used or omitted depending on the desired case-folding
|
||||
# behavior. (The default option is to exclude them.)
|
||||
#
|
||||
# =================================================================
|
||||
|
||||
0041; C; 0061; # LATIN CAPITAL LETTER A
|
||||
0042; C; 0062; # LATIN CAPITAL LETTER B
|
||||
0043; C; 0063; # LATIN CAPITAL LETTER C
|
||||
0044; C; 0064; # LATIN CAPITAL LETTER D
|
||||
0045; C; 0065; # LATIN CAPITAL LETTER E
|
||||
0046; C; 0066; # LATIN CAPITAL LETTER F
|
||||
0047; C; 0067; # LATIN CAPITAL LETTER G
|
||||
0048; C; 0068; # LATIN CAPITAL LETTER H
|
||||
0049; C; 0069; # LATIN CAPITAL LETTER I
|
||||
0049; T; 0131; # LATIN CAPITAL LETTER I
|
||||
004A; C; 006A; # LATIN CAPITAL LETTER J
|
||||
004B; C; 006B; # LATIN CAPITAL LETTER K
|
||||
004C; C; 006C; # LATIN CAPITAL LETTER L
|
||||
004D; C; 006D; # LATIN CAPITAL LETTER M
|
||||
004E; C; 006E; # LATIN CAPITAL LETTER N
|
||||
004F; C; 006F; # LATIN CAPITAL LETTER O
|
||||
0050; C; 0070; # LATIN CAPITAL LETTER P
|
||||
0051; C; 0071; # LATIN CAPITAL LETTER Q
|
||||
0052; C; 0072; # LATIN CAPITAL LETTER R
|
||||
0053; C; 0073; # LATIN CAPITAL LETTER S
|
||||
0054; C; 0074; # LATIN CAPITAL LETTER T
|
||||
0055; C; 0075; # LATIN CAPITAL LETTER U
|
||||
0056; C; 0076; # LATIN CAPITAL LETTER V
|
||||
0057; C; 0077; # LATIN CAPITAL LETTER W
|
||||
0058; C; 0078; # LATIN CAPITAL LETTER X
|
||||
0059; C; 0079; # LATIN CAPITAL LETTER Y
|
||||
005A; C; 007A; # LATIN CAPITAL LETTER Z
|
||||
00B5; C; 03BC; # MICRO SIGN
|
||||
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
|
||||
00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
00C6; C; 00E6; # LATIN CAPITAL LETTER AE
|
||||
00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
|
||||
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
|
||||
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
|
||||
00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
|
||||
00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
|
||||
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
|
||||
0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||
010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
|
||||
010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
|
||||
0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
|
||||
0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
|
||||
011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||
011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||
0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
|
||||
012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
|
||||
012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
|
||||
0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||
0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
|
||||
013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
|
||||
0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
014A; C; 014B; # LATIN CAPITAL LETTER ENG
|
||||
014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
|
||||
014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
|
||||
0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0152; C; 0153; # LATIN CAPITAL LIGATURE OE
|
||||
0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
|
||||
015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||
015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
|
||||
0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
|
||||
0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
|
||||
0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
|
||||
016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
|
||||
016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
|
||||
016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||
0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||
0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
|
||||
017F; C; 0073; # LATIN SMALL LETTER LONG S
|
||||
0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
|
||||
0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
|
||||
0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
|
||||
0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
|
||||
0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
|
||||
018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
|
||||
018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
|
||||
018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
|
||||
0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
|
||||
0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
|
||||
0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
|
||||
0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
|
||||
0196; C; 0269; # LATIN CAPITAL LETTER IOTA
|
||||
0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
|
||||
0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
|
||||
019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
|
||||
019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
|
||||
01A2; C; 01A3; # LATIN CAPITAL LETTER OI
|
||||
01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
|
||||
01A6; C; 0280; # LATIN LETTER YR
|
||||
01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
|
||||
01A9; C; 0283; # LATIN CAPITAL LETTER ESH
|
||||
01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
|
||||
01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
|
||||
01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
|
||||
01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
|
||||
01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
|
||||
01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
|
||||
01B7; C; 0292; # LATIN CAPITAL LETTER EZH
|
||||
01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
|
||||
01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
|
||||
01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
|
||||
01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
|
||||
01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
|
||||
01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
|
||||
01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
|
||||
01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
|
||||
01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
|
||||
01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
|
||||
01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
|
||||
01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
|
||||
01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
|
||||
01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
|
||||
01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
|
||||
01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
|
||||
01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
|
||||
01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
|
||||
01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
|
||||
01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
|
||||
01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
|
||||
01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
|
||||
01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
|
||||
01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
|
||||
01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
|
||||
01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
|
||||
01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
|
||||
01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
|
||||
01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
|
||||
01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
|
||||
0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
|
||||
0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
|
||||
0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
|
||||
0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
|
||||
0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
|
||||
020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
|
||||
020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
|
||||
020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
|
||||
0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
|
||||
0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
|
||||
0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
|
||||
0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
|
||||
0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
|
||||
021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
|
||||
021C; C; 021D; # LATIN CAPITAL LETTER YOGH
|
||||
021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
|
||||
0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
|
||||
0222; C; 0223; # LATIN CAPITAL LETTER OU
|
||||
0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
|
||||
0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
|
||||
0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
|
||||
022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
|
||||
022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
|
||||
022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
|
||||
0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
|
||||
0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
|
||||
0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
|
||||
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
|
||||
0392; C; 03B2; # GREEK CAPITAL LETTER BETA
|
||||
0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
|
||||
0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
|
||||
0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
|
||||
0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
|
||||
0397; C; 03B7; # GREEK CAPITAL LETTER ETA
|
||||
0398; C; 03B8; # GREEK CAPITAL LETTER THETA
|
||||
0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
|
||||
039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
|
||||
039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
|
||||
039C; C; 03BC; # GREEK CAPITAL LETTER MU
|
||||
039D; C; 03BD; # GREEK CAPITAL LETTER NU
|
||||
039E; C; 03BE; # GREEK CAPITAL LETTER XI
|
||||
039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
|
||||
03A0; C; 03C0; # GREEK CAPITAL LETTER PI
|
||||
03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
|
||||
03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
|
||||
03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
|
||||
03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
|
||||
03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
|
||||
03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
|
||||
03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
|
||||
03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
|
||||
03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
|
||||
03D0; C; 03B2; # GREEK BETA SYMBOL
|
||||
03D1; C; 03B8; # GREEK THETA SYMBOL
|
||||
03D5; C; 03C6; # GREEK PHI SYMBOL
|
||||
03D6; C; 03C0; # GREEK PI SYMBOL
|
||||
03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
|
||||
03DA; C; 03DB; # GREEK LETTER STIGMA
|
||||
03DC; C; 03DD; # GREEK LETTER DIGAMMA
|
||||
03DE; C; 03DF; # GREEK LETTER KOPPA
|
||||
03E0; C; 03E1; # GREEK LETTER SAMPI
|
||||
03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
|
||||
03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
|
||||
03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
|
||||
03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
|
||||
03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
|
||||
03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
|
||||
03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
|
||||
03F0; C; 03BA; # GREEK KAPPA SYMBOL
|
||||
03F1; C; 03C1; # GREEK RHO SYMBOL
|
||||
03F2; C; 03C3; # GREEK LUNATE SIGMA SYMBOL
|
||||
03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
|
||||
03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
|
||||
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
|
||||
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
|
||||
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
|
||||
0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
|
||||
0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
|
||||
0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
|
||||
0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
|
||||
0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
|
||||
040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
|
||||
040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
|
||||
040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
|
||||
040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
|
||||
040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
|
||||
040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
|
||||
0410; C; 0430; # CYRILLIC CAPITAL LETTER A
|
||||
0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
|
||||
0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
|
||||
0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
|
||||
0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
|
||||
0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
|
||||
0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
|
||||
0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
|
||||
0418; C; 0438; # CYRILLIC CAPITAL LETTER I
|
||||
0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
|
||||
041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
|
||||
041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
|
||||
041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
|
||||
041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
|
||||
041E; C; 043E; # CYRILLIC CAPITAL LETTER O
|
||||
041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
|
||||
0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
|
||||
0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
|
||||
0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
|
||||
0423; C; 0443; # CYRILLIC CAPITAL LETTER U
|
||||
0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
|
||||
0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
|
||||
0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
|
||||
0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
|
||||
0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
|
||||
0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
|
||||
042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
|
||||
042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
042D; C; 044D; # CYRILLIC CAPITAL LETTER E
|
||||
042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
|
||||
042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
|
||||
0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
|
||||
0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
|
||||
046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
|
||||
0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
|
||||
0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
|
||||
0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
|
||||
0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
|
||||
047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
|
||||
0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
|
||||
048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
|
||||
04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
|
||||
04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
|
||||
04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
|
||||
04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
|
||||
04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
|
||||
04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
|
||||
04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
|
||||
04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
|
||||
04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
|
||||
04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
|
||||
04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
|
||||
04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
|
||||
04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
|
||||
04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
|
||||
04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
|
||||
04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
|
||||
04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
|
||||
04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
|
||||
04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
|
||||
04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
|
||||
0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
|
||||
0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
|
||||
0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
|
||||
0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
|
||||
0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
|
||||
050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
|
||||
050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
|
||||
050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
|
||||
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
|
||||
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
|
||||
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
|
||||
0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
|
||||
0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
|
||||
0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
|
||||
0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
|
||||
0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
|
||||
0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
|
||||
053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
|
||||
053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
|
||||
053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
|
||||
053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
|
||||
053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
|
||||
053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
|
||||
0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
|
||||
0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
|
||||
0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
|
||||
0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
|
||||
0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
|
||||
0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
|
||||
0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
|
||||
0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
|
||||
0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
|
||||
0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
|
||||
054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
|
||||
054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
|
||||
054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
|
||||
054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
|
||||
054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
|
||||
054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
|
||||
0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
|
||||
0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
|
||||
0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
|
||||
0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
|
||||
0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
|
||||
0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
|
||||
0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
|
||||
0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
|
||||
1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||
1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
|
||||
1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
|
||||
1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
|
||||
1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
|
||||
1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
|
||||
1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
|
||||
1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
|
||||
1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
|
||||
1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
|
||||
1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
|
||||
1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
|
||||
1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
|
||||
1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
|
||||
1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
|
||||
1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
|
||||
1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
|
||||
1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
|
||||
1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
|
||||
1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
|
||||
1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
|
||||
1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
|
||||
1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
|
||||
1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
|
||||
1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
|
||||
1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
|
||||
1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
|
||||
1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
|
||||
1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
|
||||
1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
|
||||
1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
|
||||
1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
|
||||
1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
|
||||
1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
|
||||
1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
|
||||
1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
|
||||
1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
|
||||
1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
|
||||
1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
|
||||
1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
|
||||
1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
|
||||
1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
|
||||
1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
|
||||
1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
|
||||
1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
|
||||
1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
|
||||
1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
|
||||
1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
|
||||
1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
|
||||
1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
|
||||
1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
|
||||
1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
|
||||
1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
|
||||
1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
|
||||
1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
|
||||
1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
|
||||
1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
|
||||
1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
|
||||
1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
|
||||
1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
|
||||
1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
|
||||
1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
|
||||
1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
|
||||
1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
|
||||
1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
|
||||
1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
|
||||
1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
|
||||
1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
|
||||
1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
|
||||
1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
|
||||
1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
|
||||
1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
|
||||
1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
|
||||
1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
|
||||
1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
|
||||
1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
|
||||
1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
|
||||
1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
|
||||
1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
|
||||
1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
|
||||
1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
|
||||
1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
|
||||
1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
|
||||
1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
|
||||
1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
|
||||
1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
|
||||
1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
|
||||
1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
|
||||
1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
|
||||
1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
|
||||
1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
|
||||
1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
|
||||
1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
|
||||
1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
|
||||
1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
|
||||
1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
|
||||
1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
|
||||
1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
|
||||
1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
|
||||
1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
|
||||
1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
|
||||
1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
|
||||
1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
|
||||
1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
|
||||
1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
|
||||
1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
|
||||
1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
|
||||
1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
|
||||
1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
|
||||
1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
|
||||
1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
|
||||
1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
|
||||
1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
|
||||
1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
|
||||
1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
|
||||
1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
|
||||
1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
|
||||
1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
|
||||
1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
|
||||
1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
|
||||
1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
|
||||
1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
|
||||
1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
|
||||
1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
|
||||
1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
|
||||
1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
|
||||
1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
|
||||
1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
|
||||
1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
|
||||
1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
|
||||
1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
|
||||
1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
|
||||
1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
|
||||
1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
|
||||
1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
|
||||
1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
|
||||
1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
|
||||
1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
|
||||
1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
|
||||
1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
|
||||
1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
|
||||
1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
|
||||
1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
|
||||
1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
|
||||
1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
|
||||
1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
|
||||
1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
|
||||
1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
|
||||
1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
|
||||
1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
|
||||
1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
|
||||
1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
|
||||
1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
|
||||
1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
|
||||
1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
|
||||
1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
|
||||
1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
|
||||
1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
|
||||
1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
|
||||
1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
|
||||
1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
|
||||
1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||||
1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
|
||||
1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
|
||||
1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
|
||||
1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
|
||||
1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
|
||||
1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
|
||||
1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
|
||||
1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
|
||||
1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
|
||||
1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
|
||||
1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
|
||||
1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
|
||||
1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
|
||||
1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
|
||||
1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
|
||||
1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
|
||||
1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
|
||||
1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
|
||||
1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||||
1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
|
||||
1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
|
||||
1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
|
||||
1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
|
||||
1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
|
||||
1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||||
1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
|
||||
1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
|
||||
1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
|
||||
1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
|
||||
1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
|
||||
1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
|
||||
1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
|
||||
1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
|
||||
1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
|
||||
1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
|
||||
1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
|
||||
1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
|
||||
1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||||
1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
|
||||
1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
|
||||
1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
|
||||
1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
|
||||
1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
|
||||
1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
2126; C; 03C9; # OHM SIGN
|
||||
212A; C; 006B; # KELVIN SIGN
|
||||
212B; C; 00E5; # ANGSTROM SIGN
|
||||
2160; C; 2170; # ROMAN NUMERAL ONE
|
||||
2161; C; 2171; # ROMAN NUMERAL TWO
|
||||
2162; C; 2172; # ROMAN NUMERAL THREE
|
||||
2163; C; 2173; # ROMAN NUMERAL FOUR
|
||||
2164; C; 2174; # ROMAN NUMERAL FIVE
|
||||
2165; C; 2175; # ROMAN NUMERAL SIX
|
||||
2166; C; 2176; # ROMAN NUMERAL SEVEN
|
||||
2167; C; 2177; # ROMAN NUMERAL EIGHT
|
||||
2168; C; 2178; # ROMAN NUMERAL NINE
|
||||
2169; C; 2179; # ROMAN NUMERAL TEN
|
||||
216A; C; 217A; # ROMAN NUMERAL ELEVEN
|
||||
216B; C; 217B; # ROMAN NUMERAL TWELVE
|
||||
216C; C; 217C; # ROMAN NUMERAL FIFTY
|
||||
216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
|
||||
216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
|
||||
216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
|
||||
24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
|
||||
24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
|
||||
24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
|
||||
24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
|
||||
24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
|
||||
24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
|
||||
24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
|
||||
24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
|
||||
24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
|
||||
24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
|
||||
24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
|
||||
24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
|
||||
24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
|
||||
24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
|
||||
24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
|
||||
24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
|
||||
24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
|
||||
24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
|
||||
24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
|
||||
24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
|
||||
24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
|
||||
24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
|
||||
24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
|
||||
24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
|
||||
24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
|
||||
24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
|
||||
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
|
||||
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
|
||||
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
|
||||
FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
|
||||
FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
|
||||
FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
|
||||
FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
|
||||
FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
|
||||
FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
|
||||
FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
|
||||
FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
|
||||
FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
|
||||
FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
|
||||
FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
|
||||
FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
|
||||
FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
|
||||
FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
|
||||
FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
|
||||
FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
|
||||
FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
|
||||
FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
|
||||
FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
|
||||
FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
|
||||
FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
|
||||
FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
|
||||
FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
|
||||
FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
|
||||
FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
|
||||
FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
|
||||
FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
|
||||
FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
|
||||
FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
|
||||
FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
|
||||
FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
|
||||
FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
|
||||
FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
10400; C; 10428; # DESERET CAPITAL LETTER LONG I
|
||||
10401; C; 10429; # DESERET CAPITAL LETTER LONG E
|
||||
10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
|
||||
10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
|
||||
10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
|
||||
10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
|
||||
10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
|
||||
10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
|
||||
10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
|
||||
10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
|
||||
1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
|
||||
1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
|
||||
1040C; C; 10434; # DESERET CAPITAL LETTER AY
|
||||
1040D; C; 10435; # DESERET CAPITAL LETTER OW
|
||||
1040E; C; 10436; # DESERET CAPITAL LETTER WU
|
||||
1040F; C; 10437; # DESERET CAPITAL LETTER YEE
|
||||
10410; C; 10438; # DESERET CAPITAL LETTER H
|
||||
10411; C; 10439; # DESERET CAPITAL LETTER PEE
|
||||
10412; C; 1043A; # DESERET CAPITAL LETTER BEE
|
||||
10413; C; 1043B; # DESERET CAPITAL LETTER TEE
|
||||
10414; C; 1043C; # DESERET CAPITAL LETTER DEE
|
||||
10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
|
||||
10416; C; 1043E; # DESERET CAPITAL LETTER JEE
|
||||
10417; C; 1043F; # DESERET CAPITAL LETTER KAY
|
||||
10418; C; 10440; # DESERET CAPITAL LETTER GAY
|
||||
10419; C; 10441; # DESERET CAPITAL LETTER EF
|
||||
1041A; C; 10442; # DESERET CAPITAL LETTER VEE
|
||||
1041B; C; 10443; # DESERET CAPITAL LETTER ETH
|
||||
1041C; C; 10444; # DESERET CAPITAL LETTER THEE
|
||||
1041D; C; 10445; # DESERET CAPITAL LETTER ES
|
||||
1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
|
||||
1041F; C; 10447; # DESERET CAPITAL LETTER ESH
|
||||
10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
|
||||
10421; C; 10449; # DESERET CAPITAL LETTER ER
|
||||
10422; C; 1044A; # DESERET CAPITAL LETTER EL
|
||||
10423; C; 1044B; # DESERET CAPITAL LETTER EM
|
||||
10424; C; 1044C; # DESERET CAPITAL LETTER EN
|
||||
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
|
||||
@ -31,6 +31,7 @@ use Getopt::Long;
|
||||
|
||||
sub main {
|
||||
my($base) = "UnicodeData-@UNICODE_VERSION@.txt";
|
||||
my($cfbase) = "CaseFolding-@UNICODE_VERSION@.txt";
|
||||
my($verbose);
|
||||
my($source);
|
||||
my($reference);
|
||||
@ -39,7 +40,7 @@ sub main {
|
||||
"database=s" => \$base,
|
||||
"source!" => \$source,
|
||||
"reference!" => \$reference);
|
||||
|
||||
|
||||
my(%decomposition, %mark, %name);
|
||||
my(%ranges);
|
||||
open(FILE, "<$base") or die "cannot open $base for reading : $!";
|
||||
@ -101,8 +102,38 @@ sub main {
|
||||
}
|
||||
}
|
||||
|
||||
# Read in the casefolding file
|
||||
my(%casefold);
|
||||
open(FILE, "<$cfbase") or die "cannot open $cfbase for reading : $!";
|
||||
while(<FILE>) {
|
||||
next if(/^\s*#/); # Skip comments
|
||||
my($code_value,
|
||||
$foldstatus,
|
||||
$folded) = split(/;/, $_);
|
||||
if ($foldstatus =~ /C|F/) {
|
||||
$casefold{$code_value} = $folded;
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
|
||||
#showcasefold(\%casefold);
|
||||
reference(\%decomposition, $verbose) if($reference);
|
||||
source(\%decomposition, \%name, $verbose) if($source);
|
||||
source(\%decomposition, \%name, \%casefold, $verbose) if($source);
|
||||
}
|
||||
|
||||
sub showcasefold {
|
||||
my($casefold) = @_;
|
||||
|
||||
my($code_value);
|
||||
foreach $code_value (0 .. 0xFFFF) {
|
||||
$code_value = uc(sprintf("%04x", $code_value));
|
||||
print "$code_value";
|
||||
if(exists($casefold->{$code_value})) {
|
||||
print " => $casefold->{$code_value}\n";
|
||||
} else {
|
||||
print "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
@ -202,7 +233,7 @@ sub spit {
|
||||
# The unac.c and unac.h files are substituted in place.
|
||||
#
|
||||
sub source {
|
||||
my($decomposition, $name, $verbose) = @_;
|
||||
my($decomposition, $name, $casefold, $verbose) = @_;
|
||||
|
||||
my($csource) = slurp("unac.c");
|
||||
my($hsource) = slurp("unac.h");
|
||||
@ -277,6 +308,31 @@ sub source {
|
||||
} else {
|
||||
push(@values, "FFFF");
|
||||
}
|
||||
# We also push the case-folded version of the unaccented char
|
||||
# Note that by pushing the case-folded version of the original
|
||||
# char, we'd have the possibility of independant unaccenting and
|
||||
# case folding, but with less performance.
|
||||
# We could also keep the three chunks, using a little more memory
|
||||
if(exists($decomposition->{$code_value})) {
|
||||
my($cv);
|
||||
my(@vl);
|
||||
foreach $cv (split(' ', $decomposition->{$code_value})) {
|
||||
if(exists($casefold->{$cv})) {
|
||||
push(@vl, $casefold->{$cv});
|
||||
} else {
|
||||
push(@vl, $cv);
|
||||
}
|
||||
}
|
||||
#print STDERR "Pushing " . join(" ", @vl) . " for " .
|
||||
#$code_value . "\n";
|
||||
push(@values, join(" ", @vl));
|
||||
} else {
|
||||
if(exists($casefold->{$code_value})) {
|
||||
push(@values, $casefold->{$code_value});
|
||||
} else {
|
||||
push(@values, "FFFF");
|
||||
}
|
||||
}
|
||||
}
|
||||
print STDERR scalar(@blocks) . " blocks of " . $block_count . " entries, factorized $duplicate blocks\n\t" if($verbose);
|
||||
my($block_size) = 0;
|
||||
@ -372,7 +428,7 @@ EOF
|
||||
$block_number++;
|
||||
}
|
||||
my($position_type) = $highest_position >= 256 ? "short" : "char";
|
||||
my($positions_out) = "unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][UNAC_BLOCK_SIZE + 1] = {\n";
|
||||
my($positions_out) = "unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][2*UNAC_BLOCK_SIZE + 1] = {\n";
|
||||
|
||||
$positions_out .= join(",\n", @positions_out);
|
||||
$positions_out .= "\n};\n";
|
||||
@ -387,7 +443,7 @@ EOF
|
||||
my($declarations);
|
||||
$declarations = <<EOF;
|
||||
extern unsigned short unac_indexes[UNAC_INDEXES_SIZE];
|
||||
extern unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][UNAC_BLOCK_SIZE + 1];
|
||||
extern unsigned $position_type unac_positions[UNAC_BLOCK_COUNT][2*UNAC_BLOCK_SIZE + 1];
|
||||
extern unsigned short* unac_data_table[UNAC_BLOCK_COUNT];
|
||||
EOF
|
||||
for($block_number = 0; $block_number < $block_count; $block_number++) {
|
||||
|
||||
1161
unac/unac.c
1161
unac/unac.c
File diff suppressed because it is too large
Load Diff
165
unac/unac.h
165
unac/unac.h
@ -32,10 +32,10 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/* Generated by builder. Do not modify. Start defines */
|
||||
#define UNAC_BLOCK_SHIFT 5
|
||||
#define UNAC_BLOCK_SHIFT 4
|
||||
#define UNAC_BLOCK_MASK ((1 << UNAC_BLOCK_SHIFT) - 1)
|
||||
#define UNAC_BLOCK_SIZE (1 << UNAC_BLOCK_SHIFT)
|
||||
#define UNAC_BLOCK_COUNT 178
|
||||
#define UNAC_BLOCK_COUNT 315
|
||||
#define UNAC_INDEXES_SIZE (0x10000 >> UNAC_BLOCK_SHIFT)
|
||||
/* Generated by builder. Do not modify. End defines */
|
||||
|
||||
@ -53,7 +53,7 @@ extern "C" {
|
||||
#define unac_char_utf16(c,p,l) \
|
||||
{ \
|
||||
unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT]; \
|
||||
unsigned char position = (c) & UNAC_BLOCK_MASK; \
|
||||
unsigned char position = 2*((c) & UNAC_BLOCK_MASK); \
|
||||
(p) = &(unac_data_table[index][unac_positions[index][position]]); \
|
||||
(l) = unac_positions[index][position + 1] - unac_positions[index][position]; \
|
||||
if((l) == 1 && *(p) == 0xFFFF) { \
|
||||
@ -62,6 +62,21 @@ extern "C" {
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
* Save as unac_ but case-folded
|
||||
*/
|
||||
#define unacfold_char_utf16(c,p,l) \
|
||||
{ \
|
||||
unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT]; \
|
||||
unsigned char position = 2*((c) & UNAC_BLOCK_MASK)+1; \
|
||||
(p) = &(unac_data_table[index][unac_positions[index][position]]); \
|
||||
(l) = unac_positions[index][position + 1] - unac_positions[index][position]; \
|
||||
if((l) == 1 && *(p) == 0xFFFF) { \
|
||||
(p) = 0; \
|
||||
(l) = 0; \
|
||||
} \
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the unaccented equivalent of the UTF-16 string <in> of
|
||||
* length <in_length> in the pointer <out>. The length of the UTF-16
|
||||
@ -77,6 +92,8 @@ extern "C" {
|
||||
*/
|
||||
int unac_string_utf16(const char* in, size_t in_length,
|
||||
char** out, size_t* out_length);
|
||||
int unacfold_string_utf16(const char* in, size_t in_length,
|
||||
char** out, size_t* out_length);
|
||||
|
||||
/*
|
||||
* The semantic of this function is stricly equal to the function
|
||||
@ -92,6 +109,9 @@ int unac_string_utf16(const char* in, size_t in_length,
|
||||
int unac_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** out, size_t* out_length);
|
||||
int unacfold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** out, size_t* out_length);
|
||||
|
||||
/*
|
||||
* Return unac version number.
|
||||
@ -141,7 +161,7 @@ void unac_debug_callback(int level, unac_debug_print_t function, void* data);
|
||||
|
||||
/* Generated by builder. Do not modify. Start declarations */
|
||||
extern unsigned short unac_indexes[UNAC_INDEXES_SIZE];
|
||||
extern unsigned char unac_positions[UNAC_BLOCK_COUNT][UNAC_BLOCK_SIZE + 1];
|
||||
extern unsigned char unac_positions[UNAC_BLOCK_COUNT][2*UNAC_BLOCK_SIZE + 1];
|
||||
extern unsigned short* unac_data_table[UNAC_BLOCK_COUNT];
|
||||
extern unsigned short unac_data0[];
|
||||
extern unsigned short unac_data1[];
|
||||
@ -321,6 +341,143 @@ extern unsigned short unac_data174[];
|
||||
extern unsigned short unac_data175[];
|
||||
extern unsigned short unac_data176[];
|
||||
extern unsigned short unac_data177[];
|
||||
extern unsigned short unac_data178[];
|
||||
extern unsigned short unac_data179[];
|
||||
extern unsigned short unac_data180[];
|
||||
extern unsigned short unac_data181[];
|
||||
extern unsigned short unac_data182[];
|
||||
extern unsigned short unac_data183[];
|
||||
extern unsigned short unac_data184[];
|
||||
extern unsigned short unac_data185[];
|
||||
extern unsigned short unac_data186[];
|
||||
extern unsigned short unac_data187[];
|
||||
extern unsigned short unac_data188[];
|
||||
extern unsigned short unac_data189[];
|
||||
extern unsigned short unac_data190[];
|
||||
extern unsigned short unac_data191[];
|
||||
extern unsigned short unac_data192[];
|
||||
extern unsigned short unac_data193[];
|
||||
extern unsigned short unac_data194[];
|
||||
extern unsigned short unac_data195[];
|
||||
extern unsigned short unac_data196[];
|
||||
extern unsigned short unac_data197[];
|
||||
extern unsigned short unac_data198[];
|
||||
extern unsigned short unac_data199[];
|
||||
extern unsigned short unac_data200[];
|
||||
extern unsigned short unac_data201[];
|
||||
extern unsigned short unac_data202[];
|
||||
extern unsigned short unac_data203[];
|
||||
extern unsigned short unac_data204[];
|
||||
extern unsigned short unac_data205[];
|
||||
extern unsigned short unac_data206[];
|
||||
extern unsigned short unac_data207[];
|
||||
extern unsigned short unac_data208[];
|
||||
extern unsigned short unac_data209[];
|
||||
extern unsigned short unac_data210[];
|
||||
extern unsigned short unac_data211[];
|
||||
extern unsigned short unac_data212[];
|
||||
extern unsigned short unac_data213[];
|
||||
extern unsigned short unac_data214[];
|
||||
extern unsigned short unac_data215[];
|
||||
extern unsigned short unac_data216[];
|
||||
extern unsigned short unac_data217[];
|
||||
extern unsigned short unac_data218[];
|
||||
extern unsigned short unac_data219[];
|
||||
extern unsigned short unac_data220[];
|
||||
extern unsigned short unac_data221[];
|
||||
extern unsigned short unac_data222[];
|
||||
extern unsigned short unac_data223[];
|
||||
extern unsigned short unac_data224[];
|
||||
extern unsigned short unac_data225[];
|
||||
extern unsigned short unac_data226[];
|
||||
extern unsigned short unac_data227[];
|
||||
extern unsigned short unac_data228[];
|
||||
extern unsigned short unac_data229[];
|
||||
extern unsigned short unac_data230[];
|
||||
extern unsigned short unac_data231[];
|
||||
extern unsigned short unac_data232[];
|
||||
extern unsigned short unac_data233[];
|
||||
extern unsigned short unac_data234[];
|
||||
extern unsigned short unac_data235[];
|
||||
extern unsigned short unac_data236[];
|
||||
extern unsigned short unac_data237[];
|
||||
extern unsigned short unac_data238[];
|
||||
extern unsigned short unac_data239[];
|
||||
extern unsigned short unac_data240[];
|
||||
extern unsigned short unac_data241[];
|
||||
extern unsigned short unac_data242[];
|
||||
extern unsigned short unac_data243[];
|
||||
extern unsigned short unac_data244[];
|
||||
extern unsigned short unac_data245[];
|
||||
extern unsigned short unac_data246[];
|
||||
extern unsigned short unac_data247[];
|
||||
extern unsigned short unac_data248[];
|
||||
extern unsigned short unac_data249[];
|
||||
extern unsigned short unac_data250[];
|
||||
extern unsigned short unac_data251[];
|
||||
extern unsigned short unac_data252[];
|
||||
extern unsigned short unac_data253[];
|
||||
extern unsigned short unac_data254[];
|
||||
extern unsigned short unac_data255[];
|
||||
extern unsigned short unac_data256[];
|
||||
extern unsigned short unac_data257[];
|
||||
extern unsigned short unac_data258[];
|
||||
extern unsigned short unac_data259[];
|
||||
extern unsigned short unac_data260[];
|
||||
extern unsigned short unac_data261[];
|
||||
extern unsigned short unac_data262[];
|
||||
extern unsigned short unac_data263[];
|
||||
extern unsigned short unac_data264[];
|
||||
extern unsigned short unac_data265[];
|
||||
extern unsigned short unac_data266[];
|
||||
extern unsigned short unac_data267[];
|
||||
extern unsigned short unac_data268[];
|
||||
extern unsigned short unac_data269[];
|
||||
extern unsigned short unac_data270[];
|
||||
extern unsigned short unac_data271[];
|
||||
extern unsigned short unac_data272[];
|
||||
extern unsigned short unac_data273[];
|
||||
extern unsigned short unac_data274[];
|
||||
extern unsigned short unac_data275[];
|
||||
extern unsigned short unac_data276[];
|
||||
extern unsigned short unac_data277[];
|
||||
extern unsigned short unac_data278[];
|
||||
extern unsigned short unac_data279[];
|
||||
extern unsigned short unac_data280[];
|
||||
extern unsigned short unac_data281[];
|
||||
extern unsigned short unac_data282[];
|
||||
extern unsigned short unac_data283[];
|
||||
extern unsigned short unac_data284[];
|
||||
extern unsigned short unac_data285[];
|
||||
extern unsigned short unac_data286[];
|
||||
extern unsigned short unac_data287[];
|
||||
extern unsigned short unac_data288[];
|
||||
extern unsigned short unac_data289[];
|
||||
extern unsigned short unac_data290[];
|
||||
extern unsigned short unac_data291[];
|
||||
extern unsigned short unac_data292[];
|
||||
extern unsigned short unac_data293[];
|
||||
extern unsigned short unac_data294[];
|
||||
extern unsigned short unac_data295[];
|
||||
extern unsigned short unac_data296[];
|
||||
extern unsigned short unac_data297[];
|
||||
extern unsigned short unac_data298[];
|
||||
extern unsigned short unac_data299[];
|
||||
extern unsigned short unac_data300[];
|
||||
extern unsigned short unac_data301[];
|
||||
extern unsigned short unac_data302[];
|
||||
extern unsigned short unac_data303[];
|
||||
extern unsigned short unac_data304[];
|
||||
extern unsigned short unac_data305[];
|
||||
extern unsigned short unac_data306[];
|
||||
extern unsigned short unac_data307[];
|
||||
extern unsigned short unac_data308[];
|
||||
extern unsigned short unac_data309[];
|
||||
extern unsigned short unac_data310[];
|
||||
extern unsigned short unac_data311[];
|
||||
extern unsigned short unac_data312[];
|
||||
extern unsigned short unac_data313[];
|
||||
extern unsigned short unac_data314[];
|
||||
/* Generated by builder. Do not modify. End declarations */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user