*** empty log message ***
This commit is contained in:
parent
58a36b35c2
commit
66878ddf15
1064
src/utils/CaseFolding.txt
Normal file
1064
src/utils/CaseFolding.txt
Normal file
File diff suppressed because it is too large
Load Diff
1765
src/utils/caseconvert.cpp
Normal file
1765
src/utils/caseconvert.cpp
Normal file
File diff suppressed because it is too large
Load Diff
10
src/utils/caseconvert.h
Normal file
10
src/utils/caseconvert.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef _CASECONVERT_H_INCLUDED_
|
||||
#define _CASECONVERT_H_INCLUDED_
|
||||
/* @(#$Id: caseconvert.h,v 1.1 2006-01-05 16:16:14 dockes Exp $ (C) 2005 J.F.Dockes */
|
||||
#include <string>
|
||||
|
||||
// Lower-case string
|
||||
// Input and output must be utf-16be
|
||||
extern bool ucs2lower(const std::string &in, std::string &out);
|
||||
|
||||
#endif /* _CASECONVERT_H_INCLUDED_ */
|
||||
121
src/utils/gencasefold.sh
Normal file
121
src/utils/gencasefold.sh
Normal file
@ -0,0 +1,121 @@
|
||||
#!/bin/sh
|
||||
|
||||
###############
|
||||
## Use awk and gperf to generate casefolding efficient function
|
||||
awk -F';' \
|
||||
'
|
||||
BEGIN {
|
||||
printf "%%{\n"
|
||||
printf "// Automatically generated by gencasefold.sh, do not edit\n"
|
||||
printf "#ifndef TEST_CASECONVERT\n"
|
||||
printf "%%}\n"
|
||||
printf "struct mapping { char *name; unsigned short value; };\n\n"
|
||||
printf("%%%%\n");
|
||||
}
|
||||
/^#/{next}
|
||||
/^$/{next}
|
||||
{
|
||||
if (length($1) <= 4 && ($2 ~ "C" || $2 ~ "S")) {
|
||||
gsub(" ", "", $3);
|
||||
printf "%s, 0x%s\n", $1, $3
|
||||
}
|
||||
#else {printf "T/F/higher plane line: %s\n", $0}
|
||||
}
|
||||
' \
|
||||
< CaseFolding.txt | \
|
||||
gperf -I -n -LC++ -t > caseconvert.cpp
|
||||
|
||||
|
||||
#############
|
||||
## Append wrapper function
|
||||
|
||||
cat <<EOF >> caseconvert.cpp
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include "caseconvert.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
// Input and output must be utf-16
|
||||
bool ucs2lower(const string &in, string &out)
|
||||
{
|
||||
if (in.length() < 2) {
|
||||
out.erase();
|
||||
return true;
|
||||
}
|
||||
static const char hex[]="0123456789ABCDEF";
|
||||
char key[5];
|
||||
key[4] = 0;
|
||||
for (unsigned int i = 0; i < in.length() - 1; i += 2) {
|
||||
struct mapping *m;
|
||||
// Convert 16 bits to 4 hex chars as key
|
||||
key[0] = hex[(in[i]&0xf0) >> 4];
|
||||
key[1] = hex[in[i] & 0x0f];
|
||||
key[2] = hex[(in[i+1]&0xf0) >> 4];
|
||||
key[3] = hex[in[i+1] & 0x0f];
|
||||
//fprintf(stderr, "Key: %s\n", key);
|
||||
if ((m = Perfect_Hash::in_word_set(key, 4)) && m->name[0]) {
|
||||
#if 0
|
||||
char sval[50];
|
||||
sprintf(sval, "%X", (unsigned int)(m->value));
|
||||
fprintf(stderr, "svalue: %s\n", sval);
|
||||
#endif
|
||||
out += char((m->value & 0xff00) >> 16);
|
||||
out += char(m->value & 0x00ff);
|
||||
} else
|
||||
{
|
||||
out += in[i];
|
||||
out += in[i+1];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#else // !TESTING->
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include "readfile.h"
|
||||
#include "caseconvert.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 3) {
|
||||
cerr << "Usage: trcaseconvert ifilename ofilename" << endl;
|
||||
cerr << "Input and output must be utf16be" << endl;
|
||||
exit(1);
|
||||
}
|
||||
const string ifilename = argv[1];
|
||||
const string ofilename = argv[2];
|
||||
|
||||
string text;
|
||||
if (!file_to_string(ifilename, text)) {
|
||||
cerr << "Couldnt read file, errno " << errno << endl;
|
||||
exit(1);
|
||||
}
|
||||
string out;
|
||||
if (!ucs2lower(text, out)) {
|
||||
cerr << "ucs2lower failed" << endl;
|
||||
exit(1);
|
||||
}
|
||||
int fd = open(ofilename.c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0666);
|
||||
if (fd < 0) {
|
||||
perror("Open/create output");
|
||||
exit(1);
|
||||
}
|
||||
if (write(fd, out.c_str(), out.length()) != (int)out.length()) {
|
||||
perror("write");
|
||||
exit(1);
|
||||
}
|
||||
close(fd);
|
||||
exit(0);
|
||||
}
|
||||
#endif // TEST_CASEFOLDING
|
||||
EOF
|
||||
Loading…
x
Reference in New Issue
Block a user