*** empty log message ***

2005-09-22 16:22:34 +00:00 · 2005-09-22 16:22:34 +00:00 · dd441d4e58
commit dd441d4e58
parent dcb19d4fee
2 changed files with 160 additions and 0 deletions
--- a/src/qtgui/plaintorich.cpp
+++ b/src/qtgui/plaintorich.cpp
@ -0,0 +1,142 @@
+
+
+#include <string>
+#include <utility>
+#include <list>
+using std::list;
+using std::pair;
+
+#include "rcldb.h"
+#include "rclconfig.h"
+#include "debuglog.h"
+#include "textsplit.h"
+#include "utf8iter.h"
+#include "transcode.h"
+#include "smallut.h"
+
+// Text splitter callback used to take note of the position of query terms 
+// inside the result text. This is then used to post highlight tags. 
+class myTextSplitCB : public TextSplitCB {
+ public:
+    const list<string>    *terms;  // in: query terms
+    list<pair<int, int> > tboffs;  // out: begin and end positions of
+                                   // query terms in text
+
+    myTextSplitCB(const list<string>& terms) 
+	: terms(&terms) {
+    }
+
+    // Callback called by the text-to-words breaker for each word
+    virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
+	string dumb;
+	Rcl::dumb_string(term, dumb);
+	//LOGDEB(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(), 
+	// pos, bts, bte));
+	for (list<string>::const_iterator it = terms->begin(); 
+	     it != terms->end(); it++) {
+	    if (!stringlowercmp(*it, dumb)) {
+		tboffs.push_back(pair<int, int>(bts, bte));
+		break;
+	    }
+	}
+	     
+	return true;
+    }
+};
+
+
+// Fix result text for display inside the gui text window.
+//
+// To compute the term character positions in the output text, we have
+// to emulate how qt's textedit counts chars (ignoring tags and
+// duplicate whitespace etc...). This is tricky business and it might
+// be better to insert the text char by char, taking note of where qt
+// thinks it is at each term.
+string plaintorich(const string &in,  const list<string>& terms,
+		   list<pair<int, int> >&termoffsets)
+{
+    LOGDEB(("plaintorich: terms: %s\n", 
+	    stringlistdisp(terms).c_str()));
+
+    termoffsets.erase(termoffsets.begin(), termoffsets.end());
+
+    myTextSplitCB cb(terms);
+    TextSplit splitter(&cb, true);
+    // Note that splitter returns the term locations in byte, not
+    // character offset
+    splitter.text_to_words(in);
+
+
+
+    // Rich text output
+    string out = "<qt><head><title></title></head><body><p>";
+
+    // Iterator for the list of input term positions. We use it to
+    // output highlight tags and to compute term positions in the
+    // output text
+    list<pair<int, int> >::iterator it = cb.tboffs.begin();
+
+    // Storage for the current term _character_ position in output.
+    pair<int, int> otermcpos;
+    // Current char position in output, excluding tags
+    int outcpos=0; 
+    // Input character iterator
+    Utf8Iter chariter(in);
+    // State variable used to limitate the number of consecutive empty lines 
+    int ateol = 0;
+    // State variable to update the char pos only for the first of
+    // consecutive blank chars
+    int atblank = 0;
+    for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
+	// If we still have terms, check (byte) position
+	if (it != cb.tboffs.end()) {
+	    int ibyteidx = chariter.getBpos();
+	    if (ibyteidx == it->first) {
+		out += "<termtag>";
+		otermcpos.first = outcpos;
+	    } else if (ibyteidx == it->second) {
+		if (it != cb.tboffs.end())
+		    it++;
+		otermcpos.second = outcpos;
+		termoffsets.push_back(otermcpos);
+		out += "</termtag>";
+	    }
+	}
+	switch(*chariter) {
+	case '\n':
+	    if (ateol < 2) {
+		out += "<br>\n";
+		ateol++;
+		outcpos++;
+	    }
+	    break;
+	case '\r': 
+	    break;
+	case '<':
+	    ateol = 0;
+	    out += "&lt;";
+	    outcpos++;
+	    break;
+	default:
+	    // We don't change the eol status for whitespace, want a real line
+	    if (*chariter == ' ' || *chariter == '	') {
+		if (!atblank)
+		    outcpos++;
+		atblank = 1;
+	    } else {
+		ateol = 0;
+		atblank = 0;
+		outcpos++;
+	    }
+	    chariter.appendchartostring(out);
+	}
+    }
+#if 0
+    {
+	FILE *fp = fopen("/tmp/debugplaintorich", "w");
+	fprintf(fp, "%s\n", out.c_str());
+	fclose(fp);
+    }
+#endif
+    return out;
+}
--- a/src/qtgui/plaintorich.h
+++ b/src/qtgui/plaintorich.h
@ -0,0 +1,18 @@
+#ifndef _PLAINTORICH_H_INCLUDED_
+#define _PLAINTORICH_H_INCLUDED_
+/* @(#$Id: plaintorich.h,v 1.1 2005-09-22 16:22:34 dockes Exp $  (C) 2004 J.F.Dockes */
+
+#include <string>
+
+/**
+ * Fix result text for display inside the gui text window.
+ * 
+ * @param in          raw text out of internfile.
+ * @param terms       list of query terms
+ * @param termoffsets character offsets where we find terms
+ */
+extern string plaintorich(const string &in,
+			  const list<string>& terms,
+			  list<pair<int, int> >&termoffsets);
+
+#endif /* _PLAINTORICH_H_INCLUDED_ */