From 66878ddf150e4e2040bb44f0d4b62bfd724ec0ca Mon Sep 17 00:00:00 2001 From: dockes Date: Thu, 5 Jan 2006 16:16:14 +0000 Subject: [PATCH] *** empty log message *** --- src/utils/CaseFolding.txt | 1064 ++++++++++++++++++++++ src/utils/caseconvert.cpp | 1765 +++++++++++++++++++++++++++++++++++++ src/utils/caseconvert.h | 10 + src/utils/gencasefold.sh | 121 +++ 4 files changed, 2960 insertions(+) create mode 100644 src/utils/CaseFolding.txt create mode 100644 src/utils/caseconvert.cpp create mode 100644 src/utils/caseconvert.h create mode 100644 src/utils/gencasefold.sh diff --git a/src/utils/CaseFolding.txt b/src/utils/CaseFolding.txt new file mode 100644 index 00000000..f25d9bfe --- /dev/null +++ b/src/utils/CaseFolding.txt @@ -0,0 +1,1064 @@ +# CaseFolding-4.1.0.txt +# Date: 2005-03-26, 00:24:43 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2005 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see UCD.html +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to the full mapping below, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# The data supports both implementations that require simple case foldings +# (where string lengths don't change), and implementations that allow full case folding +# (where string lengths may grow). Note that where they can be supported, the +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# +# All code points not listed in this file map to themselves. +# +# NOTE: case folding does not preserve normalization formats! +# +# For information on case folding, see +# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/ +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# ; ; ; # +# +# The status field is: +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# Note that the Turkic mappings do not maintain canonical equivalence without additional processing. +# See the discussions of case mapping in the Unicode Standard for more information. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) +# +# ================================================================= + +0041; C; 0061; # LATIN CAPITAL LETTER A +0042; C; 0062; # LATIN CAPITAL LETTER B +0043; C; 0063; # LATIN CAPITAL LETTER C +0044; C; 0064; # LATIN CAPITAL LETTER D +0045; C; 0065; # LATIN CAPITAL LETTER E +0046; C; 0066; # LATIN CAPITAL LETTER F +0047; C; 0067; # LATIN CAPITAL LETTER G +0048; C; 0068; # LATIN CAPITAL LETTER H +0049; C; 0069; # LATIN CAPITAL LETTER I +0049; T; 0131; # LATIN CAPITAL LETTER I +004A; C; 006A; # LATIN CAPITAL LETTER J +004B; C; 006B; # LATIN CAPITAL LETTER K +004C; C; 006C; # LATIN CAPITAL LETTER L +004D; C; 006D; # LATIN CAPITAL LETTER M +004E; C; 006E; # LATIN CAPITAL LETTER N +004F; C; 006F; # LATIN CAPITAL LETTER O +0050; C; 0070; # LATIN CAPITAL LETTER P +0051; C; 0071; # LATIN CAPITAL LETTER Q +0052; C; 0072; # LATIN CAPITAL LETTER R +0053; C; 0073; # LATIN CAPITAL LETTER S +0054; C; 0074; # LATIN CAPITAL LETTER T +0055; C; 0075; # LATIN CAPITAL LETTER U +0056; C; 0076; # LATIN CAPITAL LETTER V +0057; C; 0077; # LATIN CAPITAL LETTER W +0058; C; 0078; # LATIN CAPITAL LETTER X +0059; C; 0079; # LATIN CAPITAL LETTER Y +005A; C; 007A; # LATIN CAPITAL LETTER Z +00B5; C; 03BC; # MICRO SIGN +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; C; 00E6; # LATIN CAPITAL LETTER AE +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S +0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON +0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE +0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK +0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE +0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON +010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON +0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE +0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON +0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE +0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK +011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON +011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE +0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA +0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE +0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE +012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON +012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE +012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0132; C; 0133; # LATIN CAPITAL LIGATURE IJ +0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA +0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE +013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA +013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON +013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE +0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE +0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA +0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON +0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; C; 014B; # LATIN CAPITAL LETTER ENG +014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON +014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE +0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; C; 0153; # LATIN CAPITAL LIGATURE OE +0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE +0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA +0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON +015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE +015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA +0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON +0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA +0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON +0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE +0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE +016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON +016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE +016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE +0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK +0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE +017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON +017F; C; 0073; # LATIN SMALL LETTER LONG S +0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK +0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR +0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX +0186; C; 0254; # LATIN CAPITAL LETTER OPEN O +0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK +0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D +018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK +018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR +018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E +018F; C; 0259; # LATIN CAPITAL LETTER SCHWA +0190; C; 025B; # LATIN CAPITAL LETTER OPEN E +0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK +0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK +0194; C; 0263; # LATIN CAPITAL LETTER GAMMA +0196; C; 0269; # LATIN CAPITAL LETTER IOTA +0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE +0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK +019C; C; 026F; # LATIN CAPITAL LETTER TURNED M +019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN +01A2; C; 01A3; # LATIN CAPITAL LETTER OI +01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK +01A6; C; 0280; # LATIN LETTER YR +01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO +01A9; C; 0283; # LATIN CAPITAL LETTER ESH +01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK +01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN +01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON +01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK +01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK +01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE +01B7; C; 0292; # LATIN CAPITAL LETTER EZH +01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED +01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE +01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON +01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; C; 01C9; # LATIN CAPITAL LETTER LJ +01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; C; 01CC; # LATIN CAPITAL LETTER NJ +01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON +01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON +01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON +01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON +01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON +01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE +01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON +01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON +01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK +01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON +01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON +01F1; C; 01F3; # LATIN CAPITAL LETTER DZ +01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE +01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR +01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN +01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE +01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE +01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; C; 021D; # LATIN CAPITAL LETTER YOGH +021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON +0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222; C; 0223; # LATIN CAPITAL LETTER OU +0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK +0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA +022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR +0241; C; 0294; # LATIN CAPITAL LETTER GLOTTAL STOP +0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS +038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS +038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA +0392; C; 03B2; # GREEK CAPITAL LETTER BETA +0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA +0394; C; 03B4; # GREEK CAPITAL LETTER DELTA +0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON +0396; C; 03B6; # GREEK CAPITAL LETTER ZETA +0397; C; 03B7; # GREEK CAPITAL LETTER ETA +0398; C; 03B8; # GREEK CAPITAL LETTER THETA +0399; C; 03B9; # GREEK CAPITAL LETTER IOTA +039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA +039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA +039C; C; 03BC; # GREEK CAPITAL LETTER MU +039D; C; 03BD; # GREEK CAPITAL LETTER NU +039E; C; 03BE; # GREEK CAPITAL LETTER XI +039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON +03A0; C; 03C0; # GREEK CAPITAL LETTER PI +03A1; C; 03C1; # GREEK CAPITAL LETTER RHO +03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA +03A4; C; 03C4; # GREEK CAPITAL LETTER TAU +03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON +03A6; C; 03C6; # GREEK CAPITAL LETTER PHI +03A7; C; 03C7; # GREEK CAPITAL LETTER CHI +03A8; C; 03C8; # GREEK CAPITAL LETTER PSI +03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA +03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03D0; C; 03B2; # GREEK BETA SYMBOL +03D1; C; 03B8; # GREEK THETA SYMBOL +03D5; C; 03C6; # GREEK PHI SYMBOL +03D6; C; 03C0; # GREEK PI SYMBOL +03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA +03DA; C; 03DB; # GREEK LETTER STIGMA +03DC; C; 03DD; # GREEK LETTER DIGAMMA +03DE; C; 03DF; # GREEK LETTER KOPPA +03E0; C; 03E1; # GREEK LETTER SAMPI +03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI +03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI +03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI +03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI +03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA +03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA +03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI +03F0; C; 03BA; # GREEK KAPPA SYMBOL +03F1; C; 03C1; # GREEK RHO SYMBOL +03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL +03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL +03F7; C; 03F8; # GREEK CAPITAL LETTER SHO +03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL +03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; C; 0451; # CYRILLIC CAPITAL LETTER IO +0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE +0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE +0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE +0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; C; 0457; # CYRILLIC CAPITAL LETTER YI +0408; C; 0458; # CYRILLIC CAPITAL LETTER JE +0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE +040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE +040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE +040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE +040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U +040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE +0410; C; 0430; # CYRILLIC CAPITAL LETTER A +0411; C; 0431; # CYRILLIC CAPITAL LETTER BE +0412; C; 0432; # CYRILLIC CAPITAL LETTER VE +0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE +0414; C; 0434; # CYRILLIC CAPITAL LETTER DE +0415; C; 0435; # CYRILLIC CAPITAL LETTER IE +0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE +0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE +0418; C; 0438; # CYRILLIC CAPITAL LETTER I +0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I +041A; C; 043A; # CYRILLIC CAPITAL LETTER KA +041B; C; 043B; # CYRILLIC CAPITAL LETTER EL +041C; C; 043C; # CYRILLIC CAPITAL LETTER EM +041D; C; 043D; # CYRILLIC CAPITAL LETTER EN +041E; C; 043E; # CYRILLIC CAPITAL LETTER O +041F; C; 043F; # CYRILLIC CAPITAL LETTER PE +0420; C; 0440; # CYRILLIC CAPITAL LETTER ER +0421; C; 0441; # CYRILLIC CAPITAL LETTER ES +0422; C; 0442; # CYRILLIC CAPITAL LETTER TE +0423; C; 0443; # CYRILLIC CAPITAL LETTER U +0424; C; 0444; # CYRILLIC CAPITAL LETTER EF +0425; C; 0445; # CYRILLIC CAPITAL LETTER HA +0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE +0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE +0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA +0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA +042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN +042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU +042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN +042D; C; 044D; # CYRILLIC CAPITAL LETTER E +042E; C; 044E; # CYRILLIC CAPITAL LETTER YU +042F; C; 044F; # CYRILLIC CAPITAL LETTER YA +0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA +0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT +0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E +0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS +0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS +046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI +0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI +0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA +0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA +0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; C; 0479; # CYRILLIC CAPITAL LETTER UK +047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; C; 047F; # CYRILLIC CAPITAL LETTER OT +0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA +048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK +0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE +04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE +04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA +04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE +04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA +04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O +04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE +0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE +0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE +0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE +0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE +050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE +050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE +050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB +0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN +0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM +0534; C; 0564; # ARMENIAN CAPITAL LETTER DA +0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH +0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA +0537; C; 0567; # ARMENIAN CAPITAL LETTER EH +0538; C; 0568; # ARMENIAN CAPITAL LETTER ET +0539; C; 0569; # ARMENIAN CAPITAL LETTER TO +053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE +053B; C; 056B; # ARMENIAN CAPITAL LETTER INI +053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN +053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH +053E; C; 056E; # ARMENIAN CAPITAL LETTER CA +053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN +0540; C; 0570; # ARMENIAN CAPITAL LETTER HO +0541; C; 0571; # ARMENIAN CAPITAL LETTER JA +0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD +0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH +0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN +0545; C; 0575; # ARMENIAN CAPITAL LETTER YI +0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW +0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA +0548; C; 0578; # ARMENIAN CAPITAL LETTER VO +0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA +054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH +054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH +054C; C; 057C; # ARMENIAN CAPITAL LETTER RA +054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH +054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW +054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN +0550; C; 0580; # ARMENIAN CAPITAL LETTER REH +0551; C; 0581; # ARMENIAN CAPITAL LETTER CO +0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN +0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR +0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH +0555; C; 0585; # ARMENIAN CAPITAL LETTER OH +0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH +0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW +1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA +1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON +1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE +1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE +1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE +1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE +1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE +1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE +1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE +1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI +1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; C; 03B9; # GREEK PROSGEGRAMMENI +1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; C; 03C9; # OHM SIGN +212A; C; 006B; # KELVIN SIGN +212B; C; 00E5; # ANGSTROM SIGN +2160; C; 2170; # ROMAN NUMERAL ONE +2161; C; 2171; # ROMAN NUMERAL TWO +2162; C; 2172; # ROMAN NUMERAL THREE +2163; C; 2173; # ROMAN NUMERAL FOUR +2164; C; 2174; # ROMAN NUMERAL FIVE +2165; C; 2175; # ROMAN NUMERAL SIX +2166; C; 2176; # ROMAN NUMERAL SEVEN +2167; C; 2177; # ROMAN NUMERAL EIGHT +2168; C; 2178; # ROMAN NUMERAL NINE +2169; C; 2179; # ROMAN NUMERAL TEN +216A; C; 217A; # ROMAN NUMERAL ELEVEN +216B; C; 217B; # ROMAN NUMERAL TWELVE +216C; C; 217C; # ROMAN NUMERAL FIFTY +216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED +216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED +216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A +24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B +24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C +24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D +24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E +24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F +24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G +24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H +24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I +24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J +24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K +24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L +24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M +24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N +24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O +24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P +24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q +24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R +24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S +24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T +24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U +24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V +24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W +24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X +24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y +24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF +FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI +FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL +FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI +FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL +FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST +FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH +FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A +FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B +FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C +FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D +FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E +FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F +FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G +FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H +FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I +FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J +FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K +FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L +FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M +FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N +FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O +FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P +FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q +FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R +FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S +FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T +FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U +FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V +FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W +FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X +FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z +10400; C; 10428; # DESERET CAPITAL LETTER LONG I +10401; C; 10429; # DESERET CAPITAL LETTER LONG E +10402; C; 1042A; # DESERET CAPITAL LETTER LONG A +10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH +10404; C; 1042C; # DESERET CAPITAL LETTER LONG O +10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO +10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I +10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E +10408; C; 10430; # DESERET CAPITAL LETTER SHORT A +10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH +1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O +1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO +1040C; C; 10434; # DESERET CAPITAL LETTER AY +1040D; C; 10435; # DESERET CAPITAL LETTER OW +1040E; C; 10436; # DESERET CAPITAL LETTER WU +1040F; C; 10437; # DESERET CAPITAL LETTER YEE +10410; C; 10438; # DESERET CAPITAL LETTER H +10411; C; 10439; # DESERET CAPITAL LETTER PEE +10412; C; 1043A; # DESERET CAPITAL LETTER BEE +10413; C; 1043B; # DESERET CAPITAL LETTER TEE +10414; C; 1043C; # DESERET CAPITAL LETTER DEE +10415; C; 1043D; # DESERET CAPITAL LETTER CHEE +10416; C; 1043E; # DESERET CAPITAL LETTER JEE +10417; C; 1043F; # DESERET CAPITAL LETTER KAY +10418; C; 10440; # DESERET CAPITAL LETTER GAY +10419; C; 10441; # DESERET CAPITAL LETTER EF +1041A; C; 10442; # DESERET CAPITAL LETTER VEE +1041B; C; 10443; # DESERET CAPITAL LETTER ETH +1041C; C; 10444; # DESERET CAPITAL LETTER THEE +1041D; C; 10445; # DESERET CAPITAL LETTER ES +1041E; C; 10446; # DESERET CAPITAL LETTER ZEE +1041F; C; 10447; # DESERET CAPITAL LETTER ESH +10420; C; 10448; # DESERET CAPITAL LETTER ZHEE +10421; C; 10449; # DESERET CAPITAL LETTER ER +10422; C; 1044A; # DESERET CAPITAL LETTER EL +10423; C; 1044B; # DESERET CAPITAL LETTER EM +10424; C; 1044C; # DESERET CAPITAL LETTER EN +10425; C; 1044D; # DESERET CAPITAL LETTER ENG +10426; C; 1044E; # DESERET CAPITAL LETTER OI +10427; C; 1044F; # DESERET CAPITAL LETTER EW diff --git a/src/utils/caseconvert.cpp b/src/utils/caseconvert.cpp new file mode 100644 index 00000000..83f275b7 --- /dev/null +++ b/src/utils/caseconvert.cpp @@ -0,0 +1,1765 @@ +/* C++ code produced by gperf version 3.0.1 */ +/* Command-line: gperf -I -n -LC++ -t */ +/* Computed positions: -k'1-4' */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + + +// Automatically generated by gencasefold.sh, do not edit +#ifndef TEST_CASECONVERT +struct mapping { char *name; unsigned short value; }; +#include + +#define TOTAL_KEYWORDS 865 +#define MIN_WORD_LENGTH 4 +#define MAX_WORD_LENGTH 4 +#define MIN_HASH_VALUE 60 +#define MAX_HASH_VALUE 3775 +/* maximum key range = 3716, duplicates = 0 */ + +class Perfect_Hash +{ +private: + static inline unsigned int hash (const char *str, unsigned int len); +public: + static struct mapping *in_word_set (const char *str, unsigned int len); +}; + +inline /*ARGSUSED*/ +unsigned int +Perfect_Hash::hash (register const char *str, register unsigned int len) +{ + static unsigned short asso_values[] = + { + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 80, + 60, 130, 906, 1499, 91, 2014, 950, 1369, 96, 1974, + 5, 30, 800, 1719, 828, 45, 847, 3776, 3776, 0, + 1054, 10, 1439, 680, 1609, 366, 7, 560, 537, 473, + 95, 647, 0, 15, 8, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 1, 195, 375, 302, 857, 319, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, 3776, + 3776, 3776, 3776 + }; + return asso_values[(unsigned char)str[3]+4] + asso_values[(unsigned char)str[2]+27] + asso_values[(unsigned char)str[1]+13] + asso_values[(unsigned char)str[0]+1]; +} + +struct mapping * +Perfect_Hash::in_word_set (register const char *str, register unsigned int len) +{ + static struct mapping wordlist[] = + { + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1E7A", 0x1E7B}, + {"1EAA", 0x1EAB}, + {""}, {""}, {""}, + {"1E78", 0x1E79}, + {"1EA8", 0x1EA9}, + {"1E1A", 0x1E1B}, + {""}, {""}, + {"1E7C", 0x1E7D}, + {"1EAC", 0x1EAD}, + {"1E18", 0x1E19}, + {""}, {""}, + {"1E8A", 0x1E8B}, + {"1FAA", 0x1FA2}, + {"1E1C", 0x1E1D}, + {""}, {""}, + {"1E88", 0x1E89}, + {"1FA8", 0x1FA0}, + {"1F1A", 0x1F12}, + {"1F9A", 0x1F92}, + {""}, + {"1E8C", 0x1E8D}, + {"1FAC", 0x1FA4}, + {"1F18", 0x1F10}, + {"1F98", 0x1F90}, + {""}, + {"1F8A", 0x1F82}, + {"10AA", 0x2D0A}, + {"1F1C", 0x1F14}, + {"1F9C", 0x1F94}, + {""}, + {"1F88", 0x1F80}, + {"10A8", 0x2D08}, + {""}, {""}, {""}, + {"1F8C", 0x1F84}, + {"10AC", 0x2D0C}, + {""}, {""}, {""}, {""}, + {"1FA9", 0x1FA1}, + {""}, {""}, {""}, {""}, {""}, + {"1F19", 0x1F11}, + {"1F99", 0x1F91}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1F89", 0x1F81}, + {"10A9", 0x2D09}, + {""}, {""}, {""}, + {"047A", 0x047B}, + {"04AA", 0x04AB}, + {""}, {""}, {""}, + {"0478", 0x0479}, + {"04A8", 0x04A9}, + {"041A", 0x043A}, + {"049A", 0x049B}, + {""}, + {"047C", 0x047D}, + {"04AC", 0x04AD}, + {"0418", 0x0438}, + {"0498", 0x0499}, + {""}, + {"048A", 0x048B}, + {""}, + {"041C", 0x043C}, + {"049C", 0x049D}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"048C", 0x048D}, + {"1E72", 0x1E73}, + {"1EA2", 0x1EA3}, + {""}, {""}, + {"1E5A", 0x1E5B}, + {"1E76", 0x1E77}, + {"1EA6", 0x1EA7}, + {"1E12", 0x1E13}, + {"1E92", 0x1E93}, + {"1E58", 0x1E59}, + {""}, + {"0419", 0x0439}, + {"1E16", 0x1E17}, + {""}, + {"1E5C", 0x1E5D}, + {"1E82", 0x1E83}, + {""}, {""}, {""}, {""}, + {"1E86", 0x1E87}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"10A2", 0x2D02}, + {""}, {""}, {""}, {""}, + {"10A6", 0x2D06}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"1F59", 0x1F51}, + {""}, {""}, {""}, {""}, + {"005A", 0x007A}, + {""}, {""}, {""}, {""}, + {"0058", 0x0078}, + {""}, {""}, {""}, {""}, {""}, + {"0472", 0x0473}, + {"04A2", 0x04A3}, + {""}, {""}, {""}, + {"0476", 0x0477}, + {"04A6", 0x04A7}, + {"0412", 0x0432}, + {"0492", 0x0493}, + {""}, + {"2CAA", 0x2CAB}, + {""}, + {"0416", 0x0436}, + {"0496", 0x0497}, + {""}, + {"2CA8", 0x2CA9}, + {"2C1A", 0x2C4A}, + {"2C9A", 0x2C9B}, + {""}, + {"0059", 0x0079}, + {"2CAC", 0x2CAD}, + {"2C18", 0x2C48}, + {"2C98", 0x2C99}, + {""}, + {"2C8A", 0x2C8B}, + {""}, + {"2C1C", 0x2C4C}, + {"2C9C", 0x2C9D}, + {""}, + {"2C88", 0x2C89}, + {"1E52", 0x1E53}, + {""}, {""}, {""}, + {"2C8C", 0x2C8D}, + {"1E56", 0x1E57}, + {""}, {""}, {""}, + {"1EBA", 0x1EBB}, + {""}, {""}, {""}, {""}, + {"1EB8", 0x1EB9}, + {""}, + {"2C19", 0x2C49}, + {""}, {""}, + {"1EBC", 0x1EBD}, + {""}, {""}, {""}, {""}, + {"1FBA", 0x1F70}, + {""}, {""}, {""}, {""}, + {"1FB8", 0x1FB0}, + {""}, {""}, {""}, {""}, + {"1FBC", 0x1FB3}, + {""}, {""}, {""}, {""}, + {"10BA", 0x2D1A}, + {""}, {""}, {""}, {""}, + {"10B8", 0x2D18}, + {""}, {""}, {""}, {""}, + {"10BC", 0x2D1C}, + {"0052", 0x0072}, + {""}, {""}, {""}, + {"1FB9", 0x1FB1}, + {"0056", 0x0076}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"10B9", 0x2D19}, + {""}, + {"2CA2", 0x2CA3}, + {""}, {""}, + {"04BA", 0x04BB}, + {""}, + {"2CA6", 0x2CA7}, + {"2C12", 0x2C42}, + {"2C92", 0x2C93}, + {"04B8", 0x04B9}, + {""}, {""}, + {"2C16", 0x2C46}, + {"2C96", 0x2C97}, + {"04BC", 0x04BD}, + {"2C82", 0x2C83}, + {""}, {""}, {""}, {""}, + {"2C86", 0x2C87}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"1EB2", 0x1EB3}, + {""}, {""}, {""}, {""}, + {"1EB6", 0x1EB7}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"1EDA", 0x1EDB}, + {""}, {""}, {""}, {""}, + {"1ED8", 0x1ED9}, + {""}, {""}, + {"24BA", 0x24D4}, + {""}, + {"1EDC", 0x1EDD}, + {""}, {""}, + {"24B8", 0x24D2}, + {"10B2", 0x2D12}, + {"1FDA", 0x1F76}, + {""}, {""}, + {"24BC", 0x24D6}, + {"10B6", 0x2D16}, + {"1FD8", 0x1FD0}, + {""}, + {"1EF8", 0x1EF9}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"1FFA", 0x1F7C}, + {""}, {""}, {""}, {""}, + {"1FF8", 0x1F78}, + {"24B9", 0x24D3}, + {""}, {""}, {""}, + {"1FFC", 0x1FF3}, + {""}, {""}, + {"1FD9", 0x1FD1}, + {""}, {""}, {""}, + {"04B2", 0x04B3}, + {"00DA", 0x00FA}, + {""}, {""}, {""}, + {"04B6", 0x04B7}, + {"00D8", 0x00F8}, + {""}, {""}, + {"2CBA", 0x2CBB}, + {""}, + {"00DC", 0x00FC}, + {""}, + {"1FF9", 0x1F79}, + {"2CB8", 0x2CB9}, + {"1E0A", 0x1E0B}, + {"04DA", 0x04DB}, + {""}, {""}, + {"2CBC", 0x2CBD}, + {"1E08", 0x1E09}, + {"04D8", 0x04D9}, + {""}, {""}, + {"1ECA", 0x1ECB}, + {"1E0C", 0x1E0D}, + {"04DC", 0x04DD}, + {""}, {""}, + {"1EC8", 0x1EC9}, + {"1F0A", 0x1F02}, + {"00D9", 0x00F9}, + {""}, {""}, + {"1ECC", 0x1ECD}, + {"1F08", 0x1F00}, + {""}, {""}, + {"04F8", 0x04F9}, + {"1FCA", 0x1F74}, + {"1F0C", 0x1F04}, + {""}, + {"1ED2", 0x1ED3}, + {""}, + {"1FC8", 0x1F72}, + {""}, {""}, + {"1ED6", 0x1ED7}, + {""}, + {"1FCC", 0x1FC3}, + {""}, {""}, {""}, {""}, {""}, + {"24B6", 0x24D0}, + {""}, {""}, {""}, + {"1EF2", 0x1EF3}, + {"1F09", 0x1F01}, + {""}, {""}, {""}, + {"1EF6", 0x1EF7}, + {""}, {""}, {""}, {""}, + {"1FC9", 0x1F73}, + {""}, {""}, {""}, {""}, + {"00CA", 0x00EA}, + {""}, {""}, {""}, {""}, + {"00C8", 0x00E8}, + {"040A", 0x045A}, + {""}, {""}, {""}, + {"00CC", 0x00EC}, + {"0408", 0x0458}, + {""}, {""}, {""}, {""}, + {"040C", 0x045C}, + {""}, + {"00D2", 0x00F2}, + {""}, {""}, {""}, {""}, + {"00D6", 0x00F6}, + {""}, {""}, + {"2CB2", 0x2CB3}, + {""}, {""}, {""}, + {"00C9", 0x00E9}, + {"2CB6", 0x2CB7}, + {"1E02", 0x1E03}, + {"04D2", 0x04D3}, + {""}, {""}, + {"0409", 0x0459}, + {"1E06", 0x1E07}, + {"04D6", 0x04D7}, + {""}, {""}, + {"1EC2", 0x1EC3}, + {"2CDA", 0x2CDB}, + {""}, {""}, + {"04C9", 0x04CA}, + {"1EC6", 0x1EC7}, + {"2CD8", 0x2CD9}, + {"1E4A", 0x1E4B}, + {""}, + {"04F2", 0x04F3}, + {""}, + {"2CDC", 0x2CDD}, + {"1E48", 0x1E49}, + {""}, + {"04F6", 0x04F7}, + {""}, {""}, + {"1E4C", 0x1E4D}, + {""}, {""}, {""}, {""}, + {"1F4A", 0x1F42}, + {""}, + {"24CA", 0x24E4}, + {""}, {""}, + {"1F48", 0x1F40}, + {""}, + {"24C8", 0x24E2}, + {"10C2", 0x2D22}, + {""}, + {"1F4C", 0x1F44}, + {""}, + {"24CC", 0x24E6}, + {""}, + {"FF3A", 0xFF5A}, + {""}, {""}, {""}, {""}, + {"FF38", 0xFF58}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"00C2", 0x00E2}, + {""}, + {"1F49", 0x1F41}, + {""}, + {"24C9", 0x24E3}, + {"00C6", 0x00E6}, + {"0402", 0x0452}, + {"004A", 0x006A}, + {""}, + {"FF2A", 0xFF4A}, + {""}, + {"0406", 0x0456}, + {"0048", 0x0068}, + {""}, + {"FF28", 0xFF48}, + {"2C0A", 0x2C3A}, + {"FF39", 0xFF59}, + {"004C", 0x006C}, + {""}, + {"FF2C", 0xFF4C}, + {"2C08", 0x2C38}, + {"1E3A", 0x1E3B}, + {""}, {""}, + {"2CCA", 0x2CCB}, + {"2C0C", 0x2C3C}, + {"1E38", 0x1E39}, + {""}, {""}, + {"2CC8", 0x2CC9}, + {""}, + {"1E3C", 0x1E3D}, + {""}, {""}, + {"2CCC", 0x2CCD}, + {""}, + {"1F3A", 0x1F32}, + {"0049", 0x0069}, + {""}, + {"FF29", 0xFF49}, + {""}, + {"1F38", 0x1F30}, + {"2CD2", 0x2CD3}, + {""}, + {"1E2A", 0x1E2B}, + {"2C09", 0x2C39}, + {"1F3C", 0x1F34}, + {"2CD6", 0x2CD7}, + {"1E42", 0x1E43}, + {"1E28", 0x1E29}, + {""}, {""}, {""}, + {"1E46", 0x1E47}, + {"1E2C", 0x1E2D}, + {""}, {""}, {""}, {""}, + {"1F2A", 0x1F22}, + {""}, {""}, {""}, {""}, + {"1F28", 0x1F20}, + {"24C2", 0x24DC}, + {"1F39", 0x1F31}, + {""}, {""}, + {"1F2C", 0x1F24}, + {"24C6", 0x24E0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"FF32", 0xFF52}, + {""}, {""}, {""}, {""}, + {"FF36", 0xFF56}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1F29", 0x1F21}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0042", 0x0062}, + {""}, + {"FF22", 0xFF42}, + {""}, {""}, + {"0046", 0x0066}, + {""}, + {"FF26", 0xFF46}, + {"2C02", 0x2C32}, + {""}, {""}, + {"042A", 0x044A}, + {""}, + {"2C06", 0x2C36}, + {"1E32", 0x1E33}, + {""}, + {"0428", 0x0448}, + {"2CC2", 0x2CC3}, + {""}, + {"1E36", 0x1E37}, + {""}, + {"042C", 0x044C}, + {"2CC6", 0x2CC7}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"1E6A", 0x1E6B}, + {""}, {""}, {""}, + {"1E22", 0x1E23}, + {"1E68", 0x1E69}, + {""}, {""}, + {"0429", 0x0449}, + {"1E26", 0x1E27}, + {"1E6C", 0x1E6D}, + {""}, {""}, {""}, {""}, + {"1F6A", 0x1F62}, + {""}, {""}, {""}, {""}, + {"1F68", 0x1F60}, + {""}, {""}, {""}, {""}, + {"1F6C", 0x1F64}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"1E7E", 0x1E7F}, + {"1EAE", 0x1EAF}, + {""}, {""}, {""}, {""}, {""}, + {"1E1E", 0x1E1F}, + {""}, {""}, {""}, {""}, + {"1F69", 0x1F61}, + {""}, {""}, + {"1E8E", 0x1E8F}, + {"1FAE", 0x1FA6}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1F9E", 0x1F96}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1F8E", 0x1F86}, + {"10AE", 0x2D0E}, + {"046A", 0x046B}, + {""}, {""}, {""}, + {"0422", 0x0442}, + {"0468", 0x0469}, + {""}, {""}, {""}, + {"0426", 0x0446}, + {"046C", 0x046D}, + {""}, {""}, + {"2C2A", 0x2C5A}, + {""}, {""}, {""}, {""}, + {"2C28", 0x2C58}, + {""}, {""}, {""}, {""}, + {"2C2C", 0x2C5C}, + {""}, {""}, + {"1E62", 0x1E63}, + {""}, {""}, {""}, {""}, + {"1E66", 0x1E67}, + {""}, + {"047E", 0x047F}, + {"04AE", 0x04AF}, + {""}, {""}, {""}, {""}, {""}, + {"041E", 0x043E}, + {"049E", 0x049F}, + {""}, + {"2C29", 0x2C59}, + {""}, {""}, {""}, {""}, + {"048E", 0x048F}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, + {"1E5E", 0x1E5F}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0462", 0x0463}, + {""}, {""}, {""}, {""}, + {"0466", 0x0467}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"2C22", 0x2C52}, + {""}, {""}, {""}, {""}, + {"2C26", 0x2C56}, + {""}, {""}, {""}, + {"0178", 0x00FF}, + {""}, + {"011A", 0x011B}, + {""}, {""}, {""}, + {"01AC", 0x01AD}, + {"0118", 0x0119}, + {"0198", 0x0199}, + {""}, + {"018A", 0x0257}, + {""}, + {"011C", 0x011D}, + {"019C", 0x026F}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"2CAE", 0x2CAF}, + {""}, {""}, + {"03AA", 0x03CA}, + {"0179", 0x017A}, + {"01A9", 0x0283}, + {"2C1E", 0x2C4E}, + {"2C9E", 0x2C9F}, + {"03A8", 0x03C8}, + {""}, + {"039A", 0x03BA}, + {"1EEA", 0x1EEB}, + {""}, {""}, + {"2C8E", 0x2C8F}, + {"0398", 0x03B8}, + {"1EE8", 0x1EE9}, + {"038A", 0x03AF}, + {""}, + {"0189", 0x0256}, + {"039C", 0x03BC}, + {"1EEC", 0x1EED}, + {"0388", 0x03AD}, + {""}, {""}, {""}, + {"1FEA", 0x1F7A}, + {"038C", 0x03CC}, + {""}, + {"1EBE", 0x1EBF}, + {""}, + {"1FE8", 0x1FE0}, + {""}, + {"03A9", 0x03C9}, + {""}, {""}, + {"1FEC", 0x1FE5}, + {""}, {""}, {""}, + {"0399", 0x03B9}, + {""}, {""}, {""}, + {"1FBE", 0x03B9}, + {""}, {""}, + {"0389", 0x03AE}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"1FE9", 0x1FE1}, + {""}, {""}, + {"10BE", 0x2D1E}, + {"1E70", 0x1E71}, + {"1EA0", 0x1EA1}, + {""}, {""}, {""}, + {"0172", 0x0173}, + {"01A2", 0x01A3}, + {"1E10", 0x1E11}, + {"1E90", 0x1E91}, + {"015A", 0x015B}, + {"0176", 0x0177}, + {"01A6", 0x0280}, + {"0112", 0x0113}, + {""}, + {"0158", 0x0159}, + {"1E80", 0x1E81}, + {"04EA", 0x04EB}, + {"0116", 0x0117}, + {"0196", 0x0269}, + {"015C", 0x015D}, + {"0182", 0x0183}, + {"04E8", 0x04E9}, + {""}, {""}, {""}, + {"0186", 0x0254}, + {"04EC", 0x04ED}, + {""}, {""}, {""}, {""}, + {"10A0", 0x2D00}, + {""}, {""}, + {"04BE", 0x04BF}, + {""}, {""}, {""}, {""}, + {"03A6", 0x03C6}, + {""}, + {"0392", 0x03B2}, + {"1EE2", 0x1EE3}, + {""}, + {"1E74", 0x1E75}, + {"1EA4", 0x1EA5}, + {"0396", 0x03B6}, + {"1EE6", 0x1EE7}, + {""}, {""}, {""}, + {"1E14", 0x1E15}, + {"1E94", 0x1E95}, + {"0386", 0x03AC}, + {""}, {""}, {""}, {""}, {""}, + {"1E84", 0x1E85}, + {""}, {""}, {""}, {""}, {""}, + {"0470", 0x0471}, + {"04A0", 0x04A1}, + {""}, {""}, {""}, {""}, {""}, + {"0410", 0x0430}, + {"0490", 0x0491}, + {""}, + {"10A4", 0x2D04}, + {"1EDE", 0x1EDF}, + {""}, {""}, {""}, + {"0480", 0x0481}, + {""}, {""}, {""}, + {"24BE", 0x24D8}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"1E50", 0x1E51}, + {""}, {""}, {""}, {""}, + {"0152", 0x0153}, + {""}, {""}, {""}, {""}, + {"0156", 0x0157}, + {""}, + {"04E2", 0x04E3}, + {""}, + {"0474", 0x0475}, + {"04A4", 0x04A5}, + {""}, + {"04E6", 0x04E7}, + {""}, + {"01B8", 0x01B9}, + {""}, + {"0414", 0x0434}, + {"0494", 0x0495}, + {""}, + {"01BC", 0x01BD}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"00DE", 0x00FE}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"2CBE", 0x2CBF}, + {""}, {""}, {""}, {""}, + {"1E54", 0x1E55}, + {"1E0E", 0x1E0F}, + {"04DE", 0x04DF}, + {""}, {""}, {""}, + {"0050", 0x0070}, + {""}, + {"0552", 0x0582}, + {""}, + {"1ECE", 0x1ECF}, + {""}, {""}, + {"0556", 0x0586}, + {""}, {""}, + {"1F0E", 0x1F06}, + {"1E9B", 0x1E61}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"1FAB", 0x1FA3}, + {""}, + {"2CA0", 0x2CA1}, + {""}, {""}, {""}, + {"1F1B", 0x1F13}, + {"1F9B", 0x1F93}, + {"2C10", 0x2C40}, + {"2C90", 0x2C91}, + {""}, {""}, {""}, {""}, + {"1F8B", 0x1F83}, + {"10AB", 0x2D0B}, + {"2C80", 0x2C81}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0054", 0x0074}, + {""}, {""}, {""}, {""}, {""}, + {"1EB0", 0x1EB1}, + {""}, {""}, {""}, + {"00CE", 0x00EE}, + {"01B2", 0x028B}, + {""}, {""}, {""}, {""}, + {"040E", 0x045E}, + {""}, + {"2CE2", 0x2CE3}, + {""}, {""}, + {"2CA4", 0x2CA5}, + {""}, {""}, {""}, {""}, {""}, + {"2C14", 0x2C44}, + {"2C94", 0x2C95}, + {""}, {""}, + {"041B", 0x043B}, + {""}, {""}, {""}, + {"2C84", 0x2C85}, + {"10B0", 0x2D10}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01FA", 0x01FB}, + {""}, {""}, {""}, {""}, + {"01F8", 0x01F9}, + {"1EB4", 0x1EB5}, + {""}, + {"2CDE", 0x2CDF}, + {""}, + {"01FC", 0x01FD}, + {"03DA", 0x03DB}, + {""}, + {"01D9", 0x01DA}, + {"1E4E", 0x1E4F}, + {""}, + {"03D8", 0x03D9}, + {""}, {""}, {""}, {""}, + {"03DC", 0x03DD}, + {""}, {""}, {""}, + {"1F5B", 0x1F53}, + {""}, + {"04B0", 0x04B1}, + {"03FA", 0x03FB}, + {""}, {""}, + {"24CE", 0x24E8}, + {""}, {""}, {""}, {""}, + {"10B4", 0x2D14}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"010A", 0x010B}, + {""}, {""}, {""}, {""}, + {"0108", 0x0109}, + {""}, {""}, {""}, + {"01CA", 0x01CC}, + {"010C", 0x010D}, + {"03F9", 0x03F2}, + {""}, {""}, + {"01C8", 0x01C9}, + {""}, {""}, + {"004E", 0x006E}, + {""}, + {"FF2E", 0xFF4E}, + {""}, {""}, + {"1ED0", 0x1ED1}, + {""}, + {"04B4", 0x04B5}, + {"2C0E", 0x2C3E}, + {""}, {""}, {""}, {""}, {""}, + {"1E3E", 0x1E3F}, + {""}, {""}, + {"2CCE", 0x2CCF}, + {""}, {""}, {""}, {""}, + {"1EF0", 0x1EF1}, + {"2C1B", 0x2C4B}, + {""}, {""}, {""}, + {"01F2", 0x01F3}, + {""}, + {"1F3E", 0x1F36}, + {"050A", 0x050B}, + {""}, + {"01F6", 0x0195}, + {""}, {""}, + {"0508", 0x0509}, + {""}, + {"1E2E", 0x1E2F}, + {""}, {""}, + {"050C", 0x050D}, + {""}, {""}, + {"03D6", 0x03C0}, + {""}, {""}, {""}, {""}, {""}, + {"1ED4", 0x1ED5}, + {""}, {""}, + {"1F2E", 0x1F26}, + {""}, {""}, + {"00D0", 0x00F0}, + {""}, {""}, {""}, {""}, {""}, + {"1FBB", 0x1F71}, + {""}, + {"2CB0", 0x2CB1}, + {""}, {""}, + {"1EF4", 0x1EF5}, + {""}, {""}, + {"1E00", 0x1E01}, + {"04D0", 0x04D1}, + {""}, {""}, {""}, + {"0102", 0x0103}, + {""}, + {"10BB", 0x2D1B}, + {""}, + {"1EC0", 0x1EC1}, + {"0106", 0x0107}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"04F0", 0x04F1}, + {""}, {""}, + {"014A", 0x014B}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"00D4", 0x00F4}, + {"014C", 0x014D}, + {""}, + {"042E", 0x044E}, + {""}, {""}, {""}, {""}, + {"2CB4", 0x2CB5}, + {"10C0", 0x2D20}, + {""}, {""}, + {"03C2", 0x03C3}, + {""}, + {"1E04", 0x1E05}, + {"04D4", 0x04D5}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0502", 0x0503}, + {"1EC4", 0x1EC5}, + {""}, + {"1E6E", 0x1E6F}, + {""}, + {"0506", 0x0507}, + {""}, + {"00C0", 0x00E0}, + {""}, {""}, + {"04F4", 0x04F5}, + {""}, {""}, + {"0400", 0x0450}, + {""}, {""}, + {"054A", 0x057A}, + {""}, + {"1F6E", 0x1F66}, + {""}, {""}, + {"0548", 0x0578}, + {""}, {""}, {""}, {""}, + {"054C", 0x057C}, + {""}, {""}, {""}, {""}, + {"10C4", 0x2D24}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"24BB", 0x24D5}, + {""}, {""}, {""}, {""}, {""}, + {"0549", 0x0579}, + {"1FDB", 0x1F77}, + {""}, + {"2CD0", 0x2CD1}, + {""}, + {"00C4", 0x00E4}, + {""}, {""}, {""}, + {"1E40", 0x1E41}, + {"012A", 0x012B}, + {"0404", 0x0454}, + {""}, {""}, {""}, + {"0128", 0x0129}, + {""}, + {"0139", 0x013A}, + {"1FFB", 0x1F7D}, + {""}, + {"012C", 0x012D}, + {""}, + {"046E", 0x046F}, + {""}, {""}, {""}, + {"24C0", 0x24DA}, + {""}, {""}, {""}, + {"10A5", 0x2D05}, + {""}, {""}, {""}, + {"053A", 0x056A}, + {"2C2E", 0x2C5E}, + {"00DB", 0x00FB}, + {""}, + {"FF30", 0xFF50}, + {"0538", 0x0568}, + {""}, {""}, {""}, {""}, + {"053C", 0x056C}, + {""}, {""}, + {"2CD4", 0x2CD5}, + {""}, {""}, {""}, {""}, {""}, + {"1E44", 0x1E45}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"212A", 0x006B}, + {"0542", 0x0572}, + {""}, {""}, + {"0539", 0x0569}, + {"1F0B", 0x1F03}, + {"0546", 0x0576}, + {"2C00", 0x2C30}, + {""}, {""}, + {"24C4", 0x24DE}, + {"0415", 0x0435}, + {""}, + {"1E30", 0x1E31}, + {"1FCB", 0x1F75}, + {""}, + {"2CC0", 0x2CC1}, + {""}, + {"0132", 0x0133}, + {""}, {""}, {""}, + {"FF34", 0xFF54}, + {"0136", 0x0137}, + {""}, + {"1FAD", 0x1FA5}, + {""}, {""}, {""}, {""}, {""}, + {"1F1D", 0x1F15}, + {"1F9D", 0x1F95}, + {""}, {""}, {""}, + {"1E20", 0x1E21}, + {"016A", 0x016B}, + {""}, + {"1F8D", 0x1F85}, + {"10AD", 0x2D0D}, + {"0122", 0x0123}, + {"0168", 0x0169}, + {"0044", 0x0064}, + {""}, + {"FF24", 0xFF44}, + {"0126", 0x0127}, + {"016C", 0x016D}, + {""}, + {"00CB", 0x00EB}, + {""}, + {"2C04", 0x2C34}, + {""}, {""}, {""}, + {"040B", 0x045B}, + {""}, + {"1E34", 0x1E35}, + {""}, {""}, + {"2CC4", 0x2CC5}, + {""}, {""}, {""}, + {"04CB", 0x04CC}, + {"0532", 0x0562}, + {""}, {""}, {""}, {""}, + {"0536", 0x0566}, + {"01AE", 0x0288}, + {""}, {""}, {""}, {""}, {""}, + {"011E", 0x011F}, + {""}, {""}, + {"1E24", 0x1E25}, + {"041D", 0x043D}, + {""}, {""}, + {"0055", 0x0075}, + {"018E", 0x01DD}, + {""}, + {"216A", 0x217A}, + {""}, {""}, {""}, {""}, + {"2168", 0x2178}, + {""}, {""}, {""}, + {"2126", 0x03C9}, + {"216C", 0x217C}, + {""}, {""}, + {"10A1", 0x2D01}, + {"0420", 0x0440}, + {""}, {""}, {""}, {""}, + {"039E", 0x03BE}, + {"1EEE", 0x1EEF}, + {""}, {""}, {""}, + {"2C15", 0x2C45}, + {"1F4B", 0x1F43}, + {"038E", 0x03CD}, + {"24CB", 0x24E5}, + {""}, {""}, + {"2169", 0x2179}, + {""}, + {"1F5D", 0x1F55}, + {""}, {""}, {""}, + {"1E60", 0x1E61}, + {""}, {""}, {""}, {""}, + {"0162", 0x0163}, + {""}, {""}, {""}, {""}, + {"0166", 0x0167}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0411", 0x0431}, + {""}, {""}, {""}, + {"0424", 0x0444}, + {""}, + {"004B", 0x006B}, + {""}, + {"FF2B", 0xFF4B}, + {""}, {""}, {""}, {""}, {""}, + {"2C0B", 0x2C3B}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"10B5", 0x2D15}, + {"015E", 0x015F}, + {""}, + {"1E64", 0x1E65}, + {""}, {""}, {""}, {""}, + {"04EE", 0x04EF}, + {""}, {""}, {""}, + {"1F3B", 0x1F33}, + {""}, + {"2162", 0x2172}, + {""}, {""}, + {"2C1D", 0x2C4D}, + {""}, + {"2166", 0x2176}, + {"00B5", 0x03BC}, + {""}, {""}, {""}, + {"0460", 0x0461}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1FAF", 0x1FA7}, + {""}, {""}, {""}, + {"1F2B", 0x1F23}, + {""}, + {"2C20", 0x2C50}, + {"1F9F", 0x1F97}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1F8F", 0x1F87}, + {"10AF", 0x2D0F}, + {""}, {""}, {""}, + {"0051", 0x0071}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0464", 0x0465}, + {""}, + {"10BD", 0x2D1D}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"2C11", 0x2C41}, + {""}, {""}, {""}, + {"2C24", 0x2C54}, + {""}, + {"01EA", 0x01EB}, + {""}, + {"042B", 0x044B}, + {""}, + {"041F", 0x043F}, + {"01E8", 0x01E9}, + {""}, {""}, {""}, {""}, + {"01EC", 0x01ED}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"03EA", 0x03EB}, + {""}, {""}, {""}, {""}, + {"03E8", 0x03E9}, + {""}, {""}, {""}, {""}, + {"03EC", 0x03ED}, + {"1F6B", 0x1F63}, + {""}, {""}, + {"1F5F", 0x1F57}, + {""}, + {"00D5", 0x00F5}, + {""}, {""}, + {"10B1", 0x2D11}, + {""}, + {"0170", 0x0171}, + {"01A0", 0x01A1}, + {""}, {""}, {""}, {""}, {""}, + {"0110", 0x0111}, + {"0190", 0x025B}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, + {"021A", 0x021B}, + {""}, {""}, + {"24BD", 0x24D7}, + {""}, + {"0218", 0x0219}, + {""}, {""}, {""}, + {"03A0", 0x03C0}, + {"021C", 0x021D}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"1EE0", 0x1EE1}, + {""}, {""}, {""}, {""}, + {"01E2", 0x01E3}, + {""}, + {"0174", 0x0175}, + {"01A4", 0x01A5}, + {""}, + {"01E6", 0x01E7}, + {"10C5", 0x2D25}, + {""}, {""}, + {"0114", 0x0115}, + {"0194", 0x0263}, + {"2C2B", 0x2C5B}, + {""}, + {"2C1F", 0x2C4F}, + {""}, {""}, {""}, + {"0184", 0x0185}, + {""}, {""}, {""}, {""}, {""}, + {"00DD", 0x00FD}, + {""}, {""}, + {"00C5", 0x00E5}, + {""}, + {"03E2", 0x03E3}, + {""}, {""}, + {"03A4", 0x03C4}, + {"0405", 0x0455}, + {"03E6", 0x03E7}, + {"01DE", 0x01DF}, + {""}, {""}, {""}, + {"0394", 0x03B4}, + {"1EE4", 0x1EE5}, + {""}, + {"04C5", 0x04C6}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01FE", 0x01FF}, + {"1F0D", 0x1F05}, + {"0150", 0x0151}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"04E0", 0x04E1}, + {""}, + {"03DE", 0x03DF}, + {""}, {""}, {""}, + {"10BF", 0x2D1F}, + {""}, {""}, + {"0212", 0x0213}, + {""}, {""}, {""}, {""}, + {"0216", 0x0217}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"00D1", 0x00F1}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"24C5", 0x24DF}, + {""}, {""}, {""}, {""}, + {"00CD", 0x00ED}, + {"0154", 0x0155}, + {"010E", 0x010F}, + {""}, + {"0550", 0x0580}, + {""}, + {"040D", 0x045D}, + {"FF35", 0xFF55}, + {"04E4", 0x04E5}, + {""}, + {"017B", 0x017C}, + {""}, {""}, {""}, {""}, + {"04CD", 0x04CE}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"018B", 0x018C}, + {""}, {""}, + {"0045", 0x0065}, + {""}, + {"FF25", 0xFF45}, + {""}, {""}, {""}, {""}, {""}, + {"2C05", 0x2C35}, + {""}, {""}, + {"03AB", 0x03CB}, + {"10C1", 0x2D21}, + {""}, {""}, {""}, {""}, {""}, + {"039B", 0x03BB}, + {""}, + {"0554", 0x0584}, + {"050E", 0x050F}, + {""}, {""}, {""}, {""}, {""}, + {"24BF", 0x24D9}, + {""}, {""}, {""}, {""}, + {"00C1", 0x00E1}, + {""}, + {"1FEB", 0x1F7B}, + {"1F4D", 0x1F45}, + {"2CE0", 0x2CE1}, + {"24CD", 0x24E7}, + {"0401", 0x0451}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"04C1", 0x04C2}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"004D", 0x006D}, + {""}, + {"FF2D", 0xFF4D}, + {""}, {""}, {""}, {""}, {""}, + {"2C0D", 0x2C3D}, + {""}, {""}, + {"014E", 0x014F}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"24C1", 0x24DB}, + {"1F0F", 0x1F07}, + {"1F3D", 0x1F35}, + {""}, {""}, + {"0425", 0x0445}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"FF31", 0xFF51}, + {""}, {""}, {""}, + {"10A7", 0x2D07}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"1F2D", 0x1F25}, + {""}, {""}, {""}, {""}, {""}, + {"054E", 0x057E}, + {""}, + {"0041", 0x0061}, + {""}, + {"FF21", 0xFF41}, + {""}, {""}, {""}, {""}, {""}, + {"2C01", 0x2C31}, + {""}, {""}, {""}, + {"00CF", 0x00EF}, + {""}, {""}, {""}, {""}, {""}, + {"040F", 0x045F}, + {""}, {""}, {""}, {""}, + {"10A3", 0x2D03}, + {"0417", 0x0437}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"03D0", 0x03B2}, + {""}, {""}, {""}, + {"012E", 0x012F}, + {""}, {""}, {""}, + {"042D", 0x044D}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"03F0", 0x03BA}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"053E", 0x056E}, + {""}, + {"0413", 0x0433}, + {""}, {""}, + {"01F4", 0x01F5}, + {""}, {""}, + {"0100", 0x0101}, + {""}, + {"2C25", 0x2C55}, + {""}, {""}, {""}, {""}, + {"24CF", 0x24E9}, + {""}, + {"1F6D", 0x1F65}, + {""}, {""}, {""}, + {"020A", 0x020B}, + {""}, {""}, {""}, {""}, + {"0208", 0x0209}, + {""}, {""}, {""}, {""}, + {"020C", 0x020D}, + {""}, + {"03F4", 0x03B8}, + {""}, + {"0057", 0x0077}, + {""}, {""}, {""}, {""}, + {"0421", 0x0441}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"004F", 0x006F}, + {""}, + {"FF2F", 0xFF4F}, + {""}, + {"0104", 0x0105}, + {""}, {""}, + {"0500", 0x0501}, + {"2C0F", 0x2C3F}, + {""}, {""}, {""}, {""}, + {"01C4", 0x01C6}, + {"2C17", 0x2C47}, + {"016E", 0x016F}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, + {"0053", 0x0073}, + {""}, + {"1F3F", 0x1F37}, + {""}, {""}, + {"2C2D", 0x2C5D}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, + {"01DB", 0x01DC}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0504", 0x0505}, + {"1F2F", 0x1F27}, + {""}, + {"2C13", 0x2C43}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0202", 0x0203}, + {"216E", 0x217E}, + {""}, + {"10B7", 0x2D17}, + {""}, + {"0206", 0x0207}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"03A5", 0x03C5}, + {""}, {""}, {""}, {""}, {""}, + {"2C21", 0x2C51}, + {"0395", 0x03B5}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"042F", 0x044F}, + {""}, {""}, {""}, {""}, + {"10B3", 0x2D13}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0540", 0x0570}, + {""}, {""}, + {"01CB", 0x01CC}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"017D", 0x017E}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"019D", 0x0272}, + {""}, {""}, {""}, + {"1F6F", 0x1F67}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"24B7", 0x24D1}, + {""}, + {"0120", 0x0121}, + {""}, {""}, {""}, + {"0544", 0x0574}, + {""}, {""}, {""}, {""}, + {"039D", 0x03BD}, + {""}, {""}, {""}, + {"022A", 0x022B}, + {""}, {""}, {""}, {""}, + {"0228", 0x0229}, + {""}, {""}, + {"0134", 0x0135}, + {""}, + {"022C", 0x022D}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0191", 0x0192}, + {""}, {""}, + {"0124", 0x0125}, + {"0555", 0x0585}, + {""}, {""}, + {"0181", 0x0253}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"03A1", 0x03C1}, + {""}, {""}, {""}, {""}, {""}, + {"0534", 0x0564}, + {"0391", 0x03B1}, + {""}, + {"01EE", 0x01EF}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"00D3", 0x00F3}, + {"0232", 0x0233}, + {""}, {""}, {""}, {""}, {""}, + {"0160", 0x0161}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"01B5", 0x01B6}, + {"03EE", 0x03EF}, + {""}, {""}, {""}, {""}, + {"0222", 0x0223}, + {""}, {""}, {""}, + {"054B", 0x057B}, + {"0226", 0x0227}, + {""}, {""}, {""}, + {"00C7", 0x00E7}, + {""}, {""}, {""}, {""}, {""}, + {"0407", 0x0457}, + {""}, {""}, {""}, {""}, {""}, + {"013B", 0x013C}, + {""}, {""}, + {"04C7", 0x04C8}, + {""}, {""}, + {"0164", 0x0165}, + {""}, + {"10C3", 0x2D23}, + {""}, {""}, {""}, + {"2160", 0x2170}, + {""}, {""}, + {"021E", 0x021F}, + {""}, {""}, + {"017F", 0x0073}, + {"01AF", 0x01B0}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"019F", 0x0275}, + {""}, + {"00C3", 0x00E3}, + {""}, {""}, {""}, {""}, + {"018F", 0x0259}, + {"0403", 0x0453}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"04C3", 0x04C4}, + {""}, {""}, {""}, + {"053B", 0x056B}, + {""}, {""}, + {"0551", 0x0581}, + {""}, {""}, + {"24C7", 0x24E1}, + {"039F", 0x03BF}, + {""}, + {"2164", 0x2174}, + {""}, {""}, {""}, {""}, + {"038F", 0x03CE}, + {""}, {""}, {""}, + {"FF37", 0xFF57}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"212B", 0x00E5}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"01D5", 0x01D6}, + {""}, {""}, {""}, {""}, {""}, + {"0047", 0x0067}, + {""}, + {"FF27", 0xFF47}, + {""}, {""}, {""}, {""}, + {"24C3", 0x24DD}, + {"2C07", 0x2C37}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01B1", 0x028A}, + {""}, + {"FF33", 0xFF53}, + {""}, {""}, + {"03D5", 0x03C6}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"03F5", 0x03B5}, + {"0043", 0x0063}, + {""}, + {"FF23", 0xFF43}, + {""}, {""}, {""}, {""}, {""}, + {"2C03", 0x2C33}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01C5", 0x01C6}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"216B", 0x217B}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, + {"01E0", 0x01E1}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0427", 0x0447}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, + {"03E0", 0x03E1}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01D1", 0x01D2}, + {""}, {""}, {""}, {""}, {""}, + {"01E4", 0x01E5}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"01CD", 0x01CE}, + {""}, {""}, {""}, + {"01F1", 0x01F3}, + {"0423", 0x0443}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"03D1", 0x03B8}, + {""}, {""}, + {"0210", 0x0211}, + {""}, {""}, + {"03E4", 0x03E5}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0145", 0x0146}, + {""}, {""}, {""}, + {"03F1", 0x03C1}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, + {"0345", 0x03B9}, + {""}, {""}, {""}, {""}, {""}, + {"0214", 0x0215}, + {""}, {""}, + {"2C27", 0x2C57}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0545", 0x0575}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, + {"2C23", 0x2C53}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0535", 0x0565}, + {""}, {""}, {""}, {""}, {""}, + {"054D", 0x057D}, + {""}, {""}, {""}, {""}, {""}, + {"020E", 0x020F}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0141", 0x0142}, + {""}, {""}, + {"01A7", 0x01A8}, + {"013D", 0x013E}, + {""}, {""}, {""}, {""}, {""}, + {"0197", 0x0268}, + {""}, + {"01CF", 0x01D0}, + {""}, {""}, {""}, {""}, + {"0187", 0x0188}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"03A7", 0x03C7}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0397", 0x03B7}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0541", 0x0571}, + {""}, {""}, + {"0193", 0x0260}, + {"053D", 0x056D}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, + {"03A3", 0x03C3}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0393", 0x03B3}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"2165", 0x2175}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0531", 0x0561}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"054F", 0x057F}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"216D", 0x217D}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"013F", 0x0140}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0553", 0x0583}, + {""}, {""}, + {"022E", 0x022F}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01B7", 0x0292}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"0200", 0x0201}, + {""}, + {"053F", 0x056F}, + {""}, {""}, + {"2161", 0x2171}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"01B3", 0x01B4}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0204", 0x0205}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"01D7", 0x01D8}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01F7", 0x01BF}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"216F", 0x217F}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01D3", 0x01D4}, + {""}, {""}, {""}, {""}, + {"03F7", 0x03F8}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"01C7", 0x01C9}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"0230", 0x0231}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, + {"0220", 0x019E}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0224", 0x0225}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0147", 0x0148}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"0143", 0x0144}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"0547", 0x0577}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, + {"023B", 0x023C}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, + {"0543", 0x0573}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, + {"0537", 0x0567}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"0533", 0x0563}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"2167", 0x2177}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"2163", 0x2173}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {"0241", 0x0294}, + {""}, {""}, {""}, + {"023D", 0x019A} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strcmp (str + 1, s + 1)) + return &wordlist[key]; + } + } + return 0; +} +#include +#include +#include "caseconvert.h" + +using std::string; + +// Input and output must be utf-16 +bool ucs2lower(const string &in, string &out) +{ + if (in.length() < 2) { + out.erase(); + return true; + } + static const char hex[]="0123456789ABCDEF"; + char key[5]; + key[4] = 0; + for (unsigned int i = 0; i < in.length() - 1; i += 2) { + struct mapping *m; + // Convert 16 bits to 4 hex chars as key + key[0] = hex[(in[i]&0xf0) >> 4]; + key[1] = hex[in[i] & 0x0f]; + key[2] = hex[(in[i+1]&0xf0) >> 4]; + key[3] = hex[in[i+1] & 0x0f]; + //fprintf(stderr, "Key: %s\n", key); + if ((m = Perfect_Hash::in_word_set(key, 4)) && m->name[0]) { +#if 0 + char sval[50]; + sprintf(sval, "%X", (unsigned int)(m->value)); + fprintf(stderr, "svalue: %s\n", sval); +#endif + out += char((m->value & 0xff00) >> 16); + out += char(m->value & 0x00ff); + } else + { + out += in[i]; + out += in[i+1]; + } + } + return true; +} +#else // !TESTING-> + +#include + +#include +#include + +#include +#include + +using namespace std; + +#include "readfile.h" +#include "caseconvert.h" + +int main(int argc, char **argv) +{ + if (argc != 3) { + cerr << "Usage: trcaseconvert ifilename ofilename" << endl; + cerr << "Input and output must be utf16be" << endl; + exit(1); + } + const string ifilename = argv[1]; + const string ofilename = argv[2]; + + string text; + if (!file_to_string(ifilename, text)) { + cerr << "Couldnt read file, errno " << errno << endl; + exit(1); + } + string out; + if (!ucs2lower(text, out)) { + cerr << "ucs2lower failed" << endl; + exit(1); + } + int fd = open(ofilename.c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (fd < 0) { + perror("Open/create output"); + exit(1); + } + if (write(fd, out.c_str(), out.length()) != (int)out.length()) { + perror("write"); + exit(1); + } + close(fd); + exit(0); +} +#endif // TEST_CASEFOLDING diff --git a/src/utils/caseconvert.h b/src/utils/caseconvert.h new file mode 100644 index 00000000..748a0b06 --- /dev/null +++ b/src/utils/caseconvert.h @@ -0,0 +1,10 @@ +#ifndef _CASECONVERT_H_INCLUDED_ +#define _CASECONVERT_H_INCLUDED_ +/* @(#$Id: caseconvert.h,v 1.1 2006-01-05 16:16:14 dockes Exp $ (C) 2005 J.F.Dockes */ +#include + +// Lower-case string +// Input and output must be utf-16be +extern bool ucs2lower(const std::string &in, std::string &out); + +#endif /* _CASECONVERT_H_INCLUDED_ */ diff --git a/src/utils/gencasefold.sh b/src/utils/gencasefold.sh new file mode 100644 index 00000000..30892cac --- /dev/null +++ b/src/utils/gencasefold.sh @@ -0,0 +1,121 @@ +#!/bin/sh + +############### +## Use awk and gperf to generate casefolding efficient function +awk -F';' \ +' +BEGIN { + printf "%%{\n" + printf "// Automatically generated by gencasefold.sh, do not edit\n" + printf "#ifndef TEST_CASECONVERT\n" + printf "%%}\n" + printf "struct mapping { char *name; unsigned short value; };\n\n" + printf("%%%%\n"); +} +/^#/{next} +/^$/{next} +{ + if (length($1) <= 4 && ($2 ~ "C" || $2 ~ "S")) { + gsub(" ", "", $3); + printf "%s, 0x%s\n", $1, $3 + } + #else {printf "T/F/higher plane line: %s\n", $0} +} +' \ +< CaseFolding.txt | \ +gperf -I -n -LC++ -t > caseconvert.cpp + + +############# +## Append wrapper function + +cat <> caseconvert.cpp +#include +#include +#include "caseconvert.h" + +using std::string; + +// Input and output must be utf-16 +bool ucs2lower(const string &in, string &out) +{ + if (in.length() < 2) { + out.erase(); + return true; + } + static const char hex[]="0123456789ABCDEF"; + char key[5]; + key[4] = 0; + for (unsigned int i = 0; i < in.length() - 1; i += 2) { + struct mapping *m; + // Convert 16 bits to 4 hex chars as key + key[0] = hex[(in[i]&0xf0) >> 4]; + key[1] = hex[in[i] & 0x0f]; + key[2] = hex[(in[i+1]&0xf0) >> 4]; + key[3] = hex[in[i+1] & 0x0f]; + //fprintf(stderr, "Key: %s\n", key); + if ((m = Perfect_Hash::in_word_set(key, 4)) && m->name[0]) { +#if 0 + char sval[50]; + sprintf(sval, "%X", (unsigned int)(m->value)); + fprintf(stderr, "svalue: %s\n", sval); +#endif + out += char((m->value & 0xff00) >> 16); + out += char(m->value & 0x00ff); + } else + { + out += in[i]; + out += in[i+1]; + } + } + return true; +} +#else // !TESTING-> + +#include + +#include +#include + +#include +#include + +using namespace std; + +#include "readfile.h" +#include "caseconvert.h" + +int main(int argc, char **argv) +{ + if (argc != 3) { + cerr << "Usage: trcaseconvert ifilename ofilename" << endl; + cerr << "Input and output must be utf16be" << endl; + exit(1); + } + const string ifilename = argv[1]; + const string ofilename = argv[2]; + + string text; + if (!file_to_string(ifilename, text)) { + cerr << "Couldnt read file, errno " << errno << endl; + exit(1); + } + string out; + if (!ucs2lower(text, out)) { + cerr << "ucs2lower failed" << endl; + exit(1); + } + int fd = open(ofilename.c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (fd < 0) { + perror("Open/create output"); + exit(1); + } + if (write(fd, out.c_str(), out.length()) != (int)out.length()) { + perror("write"); + exit(1); + } + close(fd); + exit(0); +} +#endif // TEST_CASEFOLDING +EOF