diff --git a/src/qtgui/plaintorich.cpp b/src/qtgui/plaintorich.cpp new file mode 100644 index 00000000..3e97d0ec --- /dev/null +++ b/src/qtgui/plaintorich.cpp @@ -0,0 +1,142 @@ + + +#include +#include +#include +using std::list; +using std::pair; + +#include "rcldb.h" +#include "rclconfig.h" +#include "debuglog.h" +#include "textsplit.h" +#include "utf8iter.h" +#include "transcode.h" +#include "smallut.h" + +// Text splitter callback used to take note of the position of query terms +// inside the result text. This is then used to post highlight tags. +class myTextSplitCB : public TextSplitCB { + public: + const list *terms; // in: query terms + list > tboffs; // out: begin and end positions of + // query terms in text + + myTextSplitCB(const list& terms) + : terms(&terms) { + } + + // Callback called by the text-to-words breaker for each word + virtual bool takeword(const std::string& term, int pos, int bts, int bte) { + string dumb; + Rcl::dumb_string(term, dumb); + //LOGDEB(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(), + // pos, bts, bte)); + for (list::const_iterator it = terms->begin(); + it != terms->end(); it++) { + if (!stringlowercmp(*it, dumb)) { + tboffs.push_back(pair(bts, bte)); + break; + } + } + + return true; + } +}; + + +// Fix result text for display inside the gui text window. +// +// To compute the term character positions in the output text, we have +// to emulate how qt's textedit counts chars (ignoring tags and +// duplicate whitespace etc...). This is tricky business and it might +// be better to insert the text char by char, taking note of where qt +// thinks it is at each term. +string plaintorich(const string &in, const list& terms, + list >&termoffsets) +{ + LOGDEB(("plaintorich: terms: %s\n", + stringlistdisp(terms).c_str())); + + termoffsets.erase(termoffsets.begin(), termoffsets.end()); + + myTextSplitCB cb(terms); + TextSplit splitter(&cb, true); + // Note that splitter returns the term locations in byte, not + // character offset + splitter.text_to_words(in); + + + + // Rich text output + string out = "

"; + + // Iterator for the list of input term positions. We use it to + // output highlight tags and to compute term positions in the + // output text + list >::iterator it = cb.tboffs.begin(); + + // Storage for the current term _character_ position in output. + pair otermcpos; + // Current char position in output, excluding tags + int outcpos=0; + // Input character iterator + Utf8Iter chariter(in); + // State variable used to limitate the number of consecutive empty lines + int ateol = 0; + // State variable to update the char pos only for the first of + // consecutive blank chars + int atblank = 0; + for (string::size_type pos = 0; pos != string::npos; pos = chariter++) { + // If we still have terms, check (byte) position + if (it != cb.tboffs.end()) { + int ibyteidx = chariter.getBpos(); + if (ibyteidx == it->first) { + out += ""; + otermcpos.first = outcpos; + } else if (ibyteidx == it->second) { + if (it != cb.tboffs.end()) + it++; + otermcpos.second = outcpos; + termoffsets.push_back(otermcpos); + out += ""; + } + } + switch(*chariter) { + case '\n': + if (ateol < 2) { + out += "
\n"; + ateol++; + outcpos++; + } + break; + case '\r': + break; + case '<': + ateol = 0; + out += "<"; + outcpos++; + break; + default: + // We don't change the eol status for whitespace, want a real line + if (*chariter == ' ' || *chariter == ' ') { + if (!atblank) + outcpos++; + atblank = 1; + } else { + ateol = 0; + atblank = 0; + outcpos++; + } + chariter.appendchartostring(out); + } + } +#if 0 + { + FILE *fp = fopen("/tmp/debugplaintorich", "w"); + fprintf(fp, "%s\n", out.c_str()); + fclose(fp); + } +#endif + return out; +} diff --git a/src/qtgui/plaintorich.h b/src/qtgui/plaintorich.h new file mode 100644 index 00000000..5729e77b --- /dev/null +++ b/src/qtgui/plaintorich.h @@ -0,0 +1,18 @@ +#ifndef _PLAINTORICH_H_INCLUDED_ +#define _PLAINTORICH_H_INCLUDED_ +/* @(#$Id: plaintorich.h,v 1.1 2005-09-22 16:22:34 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include + +/** + * Fix result text for display inside the gui text window. + * + * @param in raw text out of internfile. + * @param terms list of query terms + * @param termoffsets character offsets where we find terms + */ +extern string plaintorich(const string &in, + const list& terms, + list >&termoffsets); + +#endif /* _PLAINTORICH_H_INCLUDED_ */