utf8truncate

This commit is contained in:
dockes 2008-08-30 07:31:16 +00:00
parent eb1214c5a9
commit 2cac8aa0ab
2 changed files with 29 additions and 5 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.29 2008-07-01 11:51:51 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: smallut.cpp,v 1.30 2008-08-30 07:30:55 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -31,6 +31,7 @@ static char rcsid[] = "@(#$Id: smallut.cpp,v 1.29 2008-07-01 11:51:51 dockes Exp
#include <string> #include <string>
#include "smallut.h" #include "smallut.h"
#include "utf8iter.h"
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
using namespace std; using namespace std;
@ -374,7 +375,7 @@ string neutchars(const string &str, string delims)
* we have enough, this would be cleanly utf8-aware but would remove * we have enough, this would be cleanly utf8-aware but would remove
* punctuation */ * punctuation */
static const string SEPAR = " \t\n\r-:.;,/[]{}"; static const string SEPAR = " \t\n\r-:.;,/[]{}";
string truncate_to_word(string & input, string::size_type maxlen) string truncate_to_word(const string &input, string::size_type maxlen)
{ {
string output; string output;
if (input.length() <= maxlen) { if (input.length() <= maxlen) {
@ -398,6 +399,17 @@ string truncate_to_word(string & input, string::size_type maxlen)
return output; return output;
} }
void utf8truncate(string &s, int maxlen)
{
Utf8Iter iter(s);
int pos = 0;
while (iter++ != string::npos)
if (iter.getBpos() < string::size_type(maxlen))
pos = iter.getBpos();
s.erase(pos);
}
// Escape things that would look like markup // Escape things that would look like markup
string escapeHtml(const string &in) string escapeHtml(const string &in)
{ {
@ -563,6 +575,9 @@ float Chrono::secs(int frozen)
#else #else
#include <string> #include <string>
using namespace std;
#include <iostream>
#include "smallut.h" #include "smallut.h"
struct spair { struct spair {
@ -618,12 +633,18 @@ int main(int argc, char **argv)
} }
printf("\n"); printf("\n");
} }
#else #elif 0
for (int i = 0; i < nsuffpairs; i++) { for (int i = 0; i < nsuffpairs; i++) {
int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2); int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2);
printf("[%s] %s [%s] \n", suffpairs[i].s1, printf("[%s] %s [%s] \n", suffpairs[i].s1,
c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2); c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2);
} }
#elif 1
std::string testit("\303\251l\303\251gant");
for (int sz = 10; sz >= 0; sz--) {
utf8truncate(testit, sz);
cout << testit << endl;
}
#endif #endif
} }

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _SMALLUT_H_INCLUDED_ #ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_
/* @(#$Id: smallut.h,v 1.28 2008-05-08 09:57:29 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: smallut.h,v 1.29 2008-08-30 07:30:55 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
#include <vector> #include <vector>
@ -81,7 +81,10 @@ extern string escapeShell(const string &str);
/** Truncate a string to a given maxlength, avoiding cutting off midword /** Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */ * if reasonably possible. */
extern string truncate_to_word(string &input, string::size_type maxlen); extern string truncate_to_word(const string &input, string::size_type maxlen);
/** Truncate in place in an utf8-legal way */
extern void utf8truncate(string &s, int maxlen);
/** Small utility to substitute printf-like percents cmds in a string */ /** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const string& in, string& out, map<char, string>& subs); bool pcSubst(const string& in, string& out, map<char, string>& subs);