From 262e7260d808adcb6b4fa20fb1cab85c196aca53 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Fri, 5 Jul 2019 11:33:29 +0200 Subject: [PATCH] utf8iter driver: read from stdin --- src/testmains/trutf8iter.cpp | 168 ++++++++++++++++++----------------- 1 file changed, 87 insertions(+), 81 deletions(-) diff --git a/src/testmains/trutf8iter.cpp b/src/testmains/trutf8iter.cpp index 3209d7f7..4613d9da 100644 --- a/src/testmains/trutf8iter.cpp +++ b/src/testmains/trutf8iter.cpp @@ -44,17 +44,19 @@ void tryempty() const char *thisprog; static char usage [] = - "utf8iter [opts] infile outfile\n" - " converts infile to 32 bits unicode (processor order), for testing\n" - "-v : print stuff as we go\n" - ; + "utf8iter [opts] infile outfile\n" + " converts infile to 32 bits unicode (processor order), for testing\n" + "-v : print stuff as we go\n" + ; void Usage() { fprintf(stderr, "%s:%s\n", thisprog, usage); exit(1); } static int op_flags; -#define OPT_v 0x2 +#define OPT_v 0x2 + +FILE *infout = stdout; int main(int argc, char **argv) { @@ -62,124 +64,128 @@ int main(int argc, char **argv) argc--; argv++; while (argc > 0 && **argv == '-') { - (*argv)++; - if (!(**argv)) - Usage(); - while (**argv) - switch (*(*argv)++) { - case 'v': op_flags |= OPT_v; break; + (*argv)++; + if (!(**argv)) + Usage(); + while (**argv) + switch (*(*argv)++) { + case 'v': op_flags |= OPT_v; break; - default: Usage(); break; - } - argc--;argv++; + default: Usage(); break; + } + argc--;argv++; } - - if (argc != 2) { - Usage(); + string infile, outfile; + if (argc == 2) { + infile = *argv++;argc--; + outfile = *argv++;argc--; + Usage(); + } else if (argc != 0) { + Usage(); } - const char *infile = *argv++;argc--; - const char *outfile = *argv++;argc--; string in; if (!file_to_string(infile, in)) { - cerr << "Cant read file\n" << endl; - exit(1); + cerr << "Cant read file\n" << endl; + exit(1); } vectorucsout1; string out, out1; Utf8Iter it(in); - FILE *fp = fopen(outfile, "w"); - if (fp == 0) { - fprintf(stderr, "cant create %s\n", outfile); - exit(1); + FILE *fp = 0; + if (!outfile.empty()) { + fp = fopen(outfile.c_str(), "w"); + if (fp == 0) { + cerr << "Can't create " << outfile << endl; + exit(1); + } } int nchars = 0; for (;!it.eof(); it++) { - unsigned int value = *it; - if (value == (unsigned int)-1) { - cerr << "Conversion error occurred\n" << endl; - exit(1); - } - if (op_flags & OPT_v) { - printf("Value: 0x%x", value); - if (value < 0x7f) - printf(" (%c) ", value); - printf("\n"); - } - // UTF-32LE or BE array - ucsout1.push_back(value); - // UTF-32LE or BE file - fwrite(&value, 4, 1, fp); + unsigned int value = *it; + if (value == (unsigned int)-1) { + cerr << "Conversion error occurred at position " << it.getBpos() + << endl; + exit(1); + } + if (op_flags & OPT_v) { + fprintf(infout, "Value: 0x%04x", value); + if (value < 0x7f) + fprintf(stdout, " (%c) ", value); + fprintf(infout, "\n"); + } + // UTF-32LE or BE array + ucsout1.push_back(value); + if (fp) { + // UTF-32LE or BE file + fwrite(&value, 4, 1, fp); + } - // Reconstructed utf8 strings (2 methods) - if (!it.appendchartostring(out)) - break; - // conversion to string - out1 += it; - - // fprintf(stderr, "%s", string(it).c_str()); - nchars++; + // Reconstructed utf8 strings (2 methods) + if (!it.appendchartostring(out)) + break; + // conversion to string + out1 += it; + + // fprintf(stderr, "%s", string(it).c_str()); + nchars++; + } + if (fp) { + fclose(fp); } - fclose(fp); - fprintf(stderr, "nchars %d\n", nchars); + fprintf(infout, "Found %d Unicode characters\n", nchars); if (in.compare(out)) { - fprintf(stderr, "error: out != in\n"); - exit(1); + fprintf(stderr, "error: out != in\n"); + exit(1); } if (in != out1) { - fprintf(stderr, "error: out1 != in\n"); - exit(1); + fprintf(stderr, "error: out1 != in\n"); + exit(1); } // Rewind and do it a second time vectorucsout2; it.rewind(); for (int i = 0; ; i++) { - unsigned int value; - if ((value = it[i]) == (unsigned int)-1) { - fprintf(stderr, "%d chars\n", i); - break; - } - it++; - ucsout2.push_back(value); + unsigned int value; + if ((value = it[i]) == (unsigned int)-1) { + break; + } + it++; + ucsout2.push_back(value); } if (ucsout1 != ucsout2) { - fprintf(stderr, "error: ucsout1 != ucsout2\n"); - exit(1); + fprintf(stderr, "error: ucsout1 != ucsout2\n"); + exit(1); } ucsout2.clear(); int ercnt; const char *encoding = "UTF-32LE"; // note : use BE on high-endian machine string ucs, ucs1; - for (vector::iterator it = ucsout1.begin(); - it != ucsout1.end(); it++) { - unsigned int i = *it; - ucs.append((const char *)&i, 4); + for (const unsigned int i : ucsout1) { + ucs.append((const char *)&i, 4); } - if (!transcode(ucs, ucs1, - encoding, encoding, &ercnt) || ercnt) { - fprintf(stderr, "Transcode check failed, ercount: %d\n", ercnt); - exit(1); + if (!transcode(ucs, ucs1, encoding, encoding, &ercnt) || ercnt) { + fprintf(stderr, "Transcode check failed, ercount: %d\n", ercnt); + exit(1); } if (ucs.compare(ucs1)) { - fprintf(stderr, "error: ucsout1 != ucsout2 after iconv\n"); - exit(1); + fprintf(stderr, "error: ucsout1 != ucsout2 after iconv\n"); + exit(1); } - if (!transcode(ucs, ucs1, - encoding, "UTF-8", &ercnt) || ercnt) { - fprintf(stderr, "Transcode back to utf-8 check failed, ercount: %d\n", - ercnt); - exit(1); + if (!transcode(ucs, ucs1, encoding, "UTF-8", &ercnt) || ercnt) { + fprintf(stderr, "Transcode back to utf-8 check failed, ercount: %d\n", + ercnt); + exit(1); } if (ucs1.compare(in)) { - fprintf(stderr, "Transcode back to utf-8 compare to in failed\n"); - exit(1); + fprintf(stderr, "Transcode back to utf-8 compare to in failed\n"); + exit(1); } exit(0); } -