From 2cac8aa0ab2342435195c3179993a8d792a4ee52 Mon Sep 17 00:00:00 2001 From: dockes Date: Sat, 30 Aug 2008 07:31:16 +0000 Subject: [PATCH] utf8truncate --- src/utils/smallut.cpp | 27 ++++++++++++++++++++++++--- src/utils/smallut.h | 7 +++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index 55111e64..c343add1 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: smallut.cpp,v 1.29 2008-07-01 11:51:51 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: smallut.cpp,v 1.30 2008-08-30 07:30:55 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -31,6 +31,7 @@ static char rcsid[] = "@(#$Id: smallut.cpp,v 1.29 2008-07-01 11:51:51 dockes Exp #include #include "smallut.h" +#include "utf8iter.h" #ifndef NO_NAMESPACES using namespace std; @@ -374,7 +375,7 @@ string neutchars(const string &str, string delims) * we have enough, this would be cleanly utf8-aware but would remove * punctuation */ static const string SEPAR = " \t\n\r-:.;,/[]{}"; -string truncate_to_word(string & input, string::size_type maxlen) +string truncate_to_word(const string &input, string::size_type maxlen) { string output; if (input.length() <= maxlen) { @@ -398,6 +399,17 @@ string truncate_to_word(string & input, string::size_type maxlen) return output; } +void utf8truncate(string &s, int maxlen) +{ + Utf8Iter iter(s); + int pos = 0; + while (iter++ != string::npos) + if (iter.getBpos() < string::size_type(maxlen)) + pos = iter.getBpos(); + + s.erase(pos); +} + // Escape things that would look like markup string escapeHtml(const string &in) { @@ -563,6 +575,9 @@ float Chrono::secs(int frozen) #else #include +using namespace std; +#include + #include "smallut.h" struct spair { @@ -618,12 +633,18 @@ int main(int argc, char **argv) } printf("\n"); } -#else +#elif 0 for (int i = 0; i < nsuffpairs; i++) { int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2); printf("[%s] %s [%s] \n", suffpairs[i].s1, c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2); } +#elif 1 + std::string testit("\303\251l\303\251gant"); + for (int sz = 10; sz >= 0; sz--) { + utf8truncate(testit, sz); + cout << testit << endl; + } #endif } diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 3ce1fd92..0dedebcf 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -16,7 +16,7 @@ */ #ifndef _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_ -/* @(#$Id: smallut.h,v 1.28 2008-05-08 09:57:29 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: smallut.h,v 1.29 2008-08-30 07:30:55 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include #include @@ -81,7 +81,10 @@ extern string escapeShell(const string &str); /** Truncate a string to a given maxlength, avoiding cutting off midword * if reasonably possible. */ -extern string truncate_to_word(string &input, string::size_type maxlen); +extern string truncate_to_word(const string &input, string::size_type maxlen); + +/** Truncate in place in an utf8-legal way */ +extern void utf8truncate(string &s, int maxlen); /** Small utility to substitute printf-like percents cmds in a string */ bool pcSubst(const string& in, string& out, map& subs);