*** empty log message ***
This commit is contained in:
parent
dcb19d4fee
commit
dd441d4e58
142
src/qtgui/plaintorich.cpp
Normal file
142
src/qtgui/plaintorich.cpp
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <list>
|
||||||
|
using std::list;
|
||||||
|
using std::pair;
|
||||||
|
|
||||||
|
#include "rcldb.h"
|
||||||
|
#include "rclconfig.h"
|
||||||
|
#include "debuglog.h"
|
||||||
|
#include "textsplit.h"
|
||||||
|
#include "utf8iter.h"
|
||||||
|
#include "transcode.h"
|
||||||
|
#include "smallut.h"
|
||||||
|
|
||||||
|
// Text splitter callback used to take note of the position of query terms
|
||||||
|
// inside the result text. This is then used to post highlight tags.
|
||||||
|
class myTextSplitCB : public TextSplitCB {
|
||||||
|
public:
|
||||||
|
const list<string> *terms; // in: query terms
|
||||||
|
list<pair<int, int> > tboffs; // out: begin and end positions of
|
||||||
|
// query terms in text
|
||||||
|
|
||||||
|
myTextSplitCB(const list<string>& terms)
|
||||||
|
: terms(&terms) {
|
||||||
|
}
|
||||||
|
|
||||||
|
// Callback called by the text-to-words breaker for each word
|
||||||
|
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
||||||
|
string dumb;
|
||||||
|
Rcl::dumb_string(term, dumb);
|
||||||
|
//LOGDEB(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
||||||
|
// pos, bts, bte));
|
||||||
|
for (list<string>::const_iterator it = terms->begin();
|
||||||
|
it != terms->end(); it++) {
|
||||||
|
if (!stringlowercmp(*it, dumb)) {
|
||||||
|
tboffs.push_back(pair<int, int>(bts, bte));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Fix result text for display inside the gui text window.
|
||||||
|
//
|
||||||
|
// To compute the term character positions in the output text, we have
|
||||||
|
// to emulate how qt's textedit counts chars (ignoring tags and
|
||||||
|
// duplicate whitespace etc...). This is tricky business and it might
|
||||||
|
// be better to insert the text char by char, taking note of where qt
|
||||||
|
// thinks it is at each term.
|
||||||
|
string plaintorich(const string &in, const list<string>& terms,
|
||||||
|
list<pair<int, int> >&termoffsets)
|
||||||
|
{
|
||||||
|
LOGDEB(("plaintorich: terms: %s\n",
|
||||||
|
stringlistdisp(terms).c_str()));
|
||||||
|
|
||||||
|
termoffsets.erase(termoffsets.begin(), termoffsets.end());
|
||||||
|
|
||||||
|
myTextSplitCB cb(terms);
|
||||||
|
TextSplit splitter(&cb, true);
|
||||||
|
// Note that splitter returns the term locations in byte, not
|
||||||
|
// character offset
|
||||||
|
splitter.text_to_words(in);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Rich text output
|
||||||
|
string out = "<qt><head><title></title></head><body><p>";
|
||||||
|
|
||||||
|
// Iterator for the list of input term positions. We use it to
|
||||||
|
// output highlight tags and to compute term positions in the
|
||||||
|
// output text
|
||||||
|
list<pair<int, int> >::iterator it = cb.tboffs.begin();
|
||||||
|
|
||||||
|
// Storage for the current term _character_ position in output.
|
||||||
|
pair<int, int> otermcpos;
|
||||||
|
// Current char position in output, excluding tags
|
||||||
|
int outcpos=0;
|
||||||
|
// Input character iterator
|
||||||
|
Utf8Iter chariter(in);
|
||||||
|
// State variable used to limitate the number of consecutive empty lines
|
||||||
|
int ateol = 0;
|
||||||
|
// State variable to update the char pos only for the first of
|
||||||
|
// consecutive blank chars
|
||||||
|
int atblank = 0;
|
||||||
|
for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
|
||||||
|
// If we still have terms, check (byte) position
|
||||||
|
if (it != cb.tboffs.end()) {
|
||||||
|
int ibyteidx = chariter.getBpos();
|
||||||
|
if (ibyteidx == it->first) {
|
||||||
|
out += "<termtag>";
|
||||||
|
otermcpos.first = outcpos;
|
||||||
|
} else if (ibyteidx == it->second) {
|
||||||
|
if (it != cb.tboffs.end())
|
||||||
|
it++;
|
||||||
|
otermcpos.second = outcpos;
|
||||||
|
termoffsets.push_back(otermcpos);
|
||||||
|
out += "</termtag>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch(*chariter) {
|
||||||
|
case '\n':
|
||||||
|
if (ateol < 2) {
|
||||||
|
out += "<br>\n";
|
||||||
|
ateol++;
|
||||||
|
outcpos++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '\r':
|
||||||
|
break;
|
||||||
|
case '<':
|
||||||
|
ateol = 0;
|
||||||
|
out += "<";
|
||||||
|
outcpos++;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// We don't change the eol status for whitespace, want a real line
|
||||||
|
if (*chariter == ' ' || *chariter == ' ') {
|
||||||
|
if (!atblank)
|
||||||
|
outcpos++;
|
||||||
|
atblank = 1;
|
||||||
|
} else {
|
||||||
|
ateol = 0;
|
||||||
|
atblank = 0;
|
||||||
|
outcpos++;
|
||||||
|
}
|
||||||
|
chariter.appendchartostring(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
FILE *fp = fopen("/tmp/debugplaintorich", "w");
|
||||||
|
fprintf(fp, "%s\n", out.c_str());
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return out;
|
||||||
|
}
|
||||||
18
src/qtgui/plaintorich.h
Normal file
18
src/qtgui/plaintorich.h
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#ifndef _PLAINTORICH_H_INCLUDED_
|
||||||
|
#define _PLAINTORICH_H_INCLUDED_
|
||||||
|
/* @(#$Id: plaintorich.h,v 1.1 2005-09-22 16:22:34 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fix result text for display inside the gui text window.
|
||||||
|
*
|
||||||
|
* @param in raw text out of internfile.
|
||||||
|
* @param terms list of query terms
|
||||||
|
* @param termoffsets character offsets where we find terms
|
||||||
|
*/
|
||||||
|
extern string plaintorich(const string &in,
|
||||||
|
const list<string>& terms,
|
||||||
|
list<pair<int, int> >&termoffsets);
|
||||||
|
|
||||||
|
#endif /* _PLAINTORICH_H_INCLUDED_ */
|
||||||
Loading…
x
Reference in New Issue
Block a user