utf8truncate

This commit is contained in:
dockes 2008-08-30 07:31:16 +00:00
parent eb1214c5a9
commit 2cac8aa0ab
2 changed files with 29 additions and 5 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.29 2008-07-01 11:51:51 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.30 2008-08-30 07:30:55 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -31,6 +31,7 @@ static char rcsid[] = "@(#$Id: smallut.cpp,v 1.29 2008-07-01 11:51:51 dockes Exp
#include <string>
#include "smallut.h"
#include "utf8iter.h"
#ifndef NO_NAMESPACES
using namespace std;
@ -374,7 +375,7 @@ string neutchars(const string &str, string delims)
* we have enough, this would be cleanly utf8-aware but would remove
* punctuation */
static const string SEPAR = " \t\n\r-:.;,/[]{}";
string truncate_to_word(string & input, string::size_type maxlen)
string truncate_to_word(const string &input, string::size_type maxlen)
{
string output;
if (input.length() <= maxlen) {
@ -398,6 +399,17 @@ string truncate_to_word(string & input, string::size_type maxlen)
return output;
}
void utf8truncate(string &s, int maxlen)
{
Utf8Iter iter(s);
int pos = 0;
while (iter++ != string::npos)
if (iter.getBpos() < string::size_type(maxlen))
pos = iter.getBpos();
s.erase(pos);
}
// Escape things that would look like markup
string escapeHtml(const string &in)
{
@ -563,6 +575,9 @@ float Chrono::secs(int frozen)
#else
#include <string>
using namespace std;
#include <iostream>
#include "smallut.h"
struct spair {
@ -618,12 +633,18 @@ int main(int argc, char **argv)
}
printf("\n");
}
#else
#elif 0
for (int i = 0; i < nsuffpairs; i++) {
int c = stringisuffcmp(suffpairs[i].s1, suffpairs[i].s2);
printf("[%s] %s [%s] \n", suffpairs[i].s1,
c == 0 ? "matches" : c < 0 ? "<" : ">", suffpairs[i].s2);
}
#elif 1
std::string testit("\303\251l\303\251gant");
for (int sz = 10; sz >= 0; sz--) {
utf8truncate(testit, sz);
cout << testit << endl;
}
#endif
}

View File

@ -16,7 +16,7 @@
*/
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_
/* @(#$Id: smallut.h,v 1.28 2008-05-08 09:57:29 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: smallut.h,v 1.29 2008-08-30 07:30:55 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
#include <vector>
@ -81,7 +81,10 @@ extern string escapeShell(const string &str);
/** Truncate a string to a given maxlength, avoiding cutting off midword
* if reasonably possible. */
extern string truncate_to_word(string &input, string::size_type maxlen);
extern string truncate_to_word(const string &input, string::size_type maxlen);
/** Truncate in place in an utf8-legal way */
extern void utf8truncate(string &s, int maxlen);
/** Small utility to substitute printf-like percents cmds in a string */
bool pcSubst(const string& in, string& out, map<char, string>& subs);