This commit is contained in:
Jean-Francois Dockes 2020-03-21 10:24:26 +01:00
parent 5be3ed89c5
commit d83bb8cf69

View File

@ -48,7 +48,7 @@ public:
}; };
TextSplit(Flags flags = Flags(TXTS_NONE)) TextSplit(Flags flags = Flags(TXTS_NONE))
: m_flags(flags) {} : m_flags(flags) {}
virtual ~TextSplit() {} virtual ~TextSplit() {}
/** Call at program initialization to read non default values from the /** Call at program initialization to read non default values from the
@ -60,10 +60,10 @@ public:
/** Process one output word: to be implemented by the actual user class */ /** Process one output word: to be implemented by the actual user class */
virtual bool takeword(const std::string& term, virtual bool takeword(const std::string& term,
int pos, // term pos int pos, // term pos
int bts, // byte offset of first char in term int bts, // byte offset of first char in term
int bte // byte offset of first char after term int bte // byte offset of first char after term
) = 0; ) = 0;
/** Called when we encounter formfeed \f 0x0c. Override to use the event. /** Called when we encounter formfeed \f 0x0c. Override to use the event.
* Mostly or exclusively used with pdftoxx output. Other filters mostly * Mostly or exclusively used with pdftoxx output. Other filters mostly
@ -108,49 +108,43 @@ public:
#ifdef TEXTSPLIT_STATS #ifdef TEXTSPLIT_STATS
class Stats { class Stats {
public: public:
Stats() Stats() {
{ reset();
reset(); }
} void reset() {
void reset() count = 0;
{ totlen = 0;
count = 0; sigma_acc = 0;
totlen = 0; }
sigma_acc = 0; void newsamp(unsigned int len) {
} ++count;
void newsamp(unsigned int len) totlen += len;
{ double avglen = double(totlen) / double(count);
++count; sigma_acc += (avglen - len) * (avglen - len);
totlen += len; }
double avglen = double(totlen) / double(count); struct Values {
sigma_acc += (avglen - len) * (avglen - len); int count;
} double avglen;
struct Values { double sigma;
int count; };
double avglen; Values get() {
double sigma; Values v;
}; v.count = count;
Values get() v.avglen = double(totlen) / double(count);
{ v.sigma = sqrt(sigma_acc / count);
Values v; return v;
v.count = count; }
v.avglen = double(totlen) / double(count);
v.sigma = sqrt(sigma_acc / count);
return v;
}
private: private:
int count; int count;
int totlen; int totlen;
double sigma_acc; double sigma_acc;
}; };
Stats::Values getStats() Stats::Values getStats() {
{ return m_stats.get();
return m_stats.get();
} }
void resetStats() void resetStats() {
{ m_stats.reset();
m_stats.reset();
} }
#endif // TEXTSPLIT_STATS #endif // TEXTSPLIT_STATS