add option to preview html instead of plain text

This commit is contained in:
dockes 2008-10-03 08:09:36 +00:00
parent 31b841de7b
commit 2c27cbb504
9 changed files with 155 additions and 69 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.58 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.59 2008-10-03 08:09:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -509,7 +509,7 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
for (list<string>::const_iterator it = sl.begin();
it != sl.end(); it++) {
string fld = fieldCanon(stringtolower(*it));
LOGDEB(("Inserting [%s] in stored list\n", fld.c_str()));
LOGDEB0(("Inserting [%s] in stored list\n", fld.c_str()));
m_storedFields.insert(fld);
}
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.38 2008-09-28 14:20:50 dockes Exp $ (C) 2005 Jean-Francois Dockes";
static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.39 2008-10-03 08:09:35 dockes Exp $ (C) 2005 Jean-Francois Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -147,6 +147,8 @@ void rwSettings(bool writing)
"/Recoll/prefs/startWithAdvSearchOpen", Bool, false);
SETTING_RW(prefs.startWithSortToolOpen,
"/Recoll/prefs/startWithSortToolOpen", Bool, false);
SETTING_RW(prefs.previewHtml,
"/Recoll/prefs/previewHtml", Bool, true);
QString advSearchClauses;
QString ascdflt;

View File

@ -17,7 +17,7 @@
#ifndef _GUIUTILS_H_INCLUDED_
#define _GUIUTILS_H_INCLUDED_
/*
* @(#$Id: guiutils.h,v 1.28 2008-09-28 14:20:50 dockes Exp $ (C) 2005 Jean-Francois Dockes
* @(#$Id: guiutils.h,v 1.29 2008-10-03 08:09:35 dockes Exp $ (C) 2005 Jean-Francois Dockes
* jean-francois.dockes@wanadoo.fr
*
* This program is free software; you can redistribute it and/or modify
@ -81,6 +81,7 @@ class PrefsPack {
bool queryReplaceAbstract;
bool startWithAdvSearchOpen;
bool startWithSortToolOpen;
bool previewHtml;
// Extra query indexes. This are encoded to base64 before storing
// to the qt settings file to avoid any bin string/ charset conv issues
list<string> allExtraDbs;

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.32 2008-07-04 09:29:50 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.33 2008-10-03 08:09:35 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -296,10 +296,12 @@ bool myTextSplitCB::matchGroups()
// duplicate whitespace etc...). This was tricky business, dependant
// on qtextedit internals, and we don't do it any more, so we finally
// don't know the term par/car positions in the editor text.
// Instead, we mark the search term positions either with html anchor
// (qt currently has problems with them), or a special string, and the
// caller will use the editor's find() function to position on it
bool PlainToRich::plaintorich(const string& in, list<string>& out,
// Instead, we now mark the search term positions with html anchors
//
// We output the result in chunks, arranging not to cut in the middle of
// a tag, which would confuse qtextedit.
bool PlainToRich::plaintorich(const string& in,
list<string>& out, // Output chunk list
const HiliteData& hdata,
int chunksize)
{
@ -323,16 +325,17 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
LOGDEB0((" %s", sterms.c_str()));
}
// We first use the text splitter to break the text into words,
// and compare the words to the search terms, which yields the
// query terms positions inside the text
// Compute the positions for the query terms. We use the text
// splitter to break the text into words, and compare the words to
// the search terms,
myTextSplitCB cb(terms, groups, slacks);
TextSplit splitter(&cb);
// Note that splitter returns the term locations in byte, not
// character offset
// Note: the splitter returns the term locations in byte, not
// character, offsets.
splitter.text_to_words(in);
LOGDEB0(("plaintorich: split done %d mS\n", chron.millis()));
// Compute the positions for NEAR and PHRASE groups.
cb.matchGroups();
out.clear();
@ -346,7 +349,7 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
// output highlight tags and to compute term positions in the
// output text
vector<pair<int, int> >::iterator tPosIt = cb.tboffs.begin();
vector<pair<int, int> >::iterator tboffsend = cb.tboffs.end();
vector<pair<int, int> >::iterator tPosEnd = cb.tboffs.end();
#if 0
for (vector<pair<int, int> >::const_iterator it = cb.tboffs.begin();
@ -357,12 +360,21 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
// Input character iterator
Utf8Iter chariter(in);
// State variable used to limitate the number of consecutive empty lines
// State variable used to limit the number of consecutive empty lines
int ateol = 0;
// Value for numbered anchors at each term match
int anchoridx = 1;
// html state
bool intag = false, inparamvalue = false;
unsigned int headend = 0;
if (m_inputhtml) {
headend = in.find("</head>");
if (headend == string::npos)
headend = in.find("</HEAD>");
if (headend != string::npos)
headend += 7;
}
for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
// Check from time to time if we need to stop
if ((pos & 0xfff) == 0) {
@ -371,51 +383,77 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
// If we still have terms positions, check (byte) position. If
// we are at or after a term match, mark.
if (tPosIt != tboffsend) {
if (tPosIt != tPosEnd) {
int ibyteidx = chariter.getBpos();
if (ibyteidx == tPosIt->first) {
*olit += startAnchor(anchoridx++);
*olit += startMatch();
if (!intag && ibyteidx > headend) {
*olit += startAnchor(anchoridx);
*olit += startMatch();
}
anchoridx++;
} else if (ibyteidx == tPosIt->second) {
// Output end tag, then skip all highlight areas that
// would overlap this one
*olit += endMatch();
*olit += endAnchor();
// Output end or match region tags
if (!intag && ibyteidx > headend) {
*olit += endMatch();
*olit += endAnchor();
}
// Skip all highlight areas that would overlap this one
int crend = tPosIt->second;
while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
tPosIt++;
// Maybe end this chunk, begin next
if (olit->size() > (unsigned int)chunksize) {
// Maybe end this chunk, begin next. Don't do it on html
// there is just no way to do it right (qtextedit cant grok
// chunks cut in the middle of <a></a> for example).
if (!m_inputhtml && olit->size() > (unsigned int)chunksize) {
out.push_back("");
olit++;
}
}
}
switch(*chariter) {
case '\n':
if (ateol < 2) {
*olit += "<br>\n";
ateol++;
}
break;
case '\r':
break;
case '<':
ateol = 0;
*olit += "&lt;";
break;
case '&':
ateol = 0;
*olit += "&amp;";
break;
default:
// We don't change the eol status for whitespace, want a real line
if (!(*chariter == ' ' || *chariter == '\t')) {
ateol = 0;
if (m_inputhtml) {
switch (*chariter) {
case '<':
if (!inparamvalue)
intag = true;
break;
case '>':
if (!inparamvalue)
intag = false;
break;
case '"':
if (intag) {
inparamvalue = !inparamvalue;
}
break;
}
chariter.appendchartostring(*olit);
}
} else switch (*chariter) {
case '\n':
if (ateol < 2) {
*olit += "<br>\n";
ateol++;
}
break;
case '\r':
break;
case '<':
ateol = 0;
*olit += "&lt;";
break;
case '&':
ateol = 0;
*olit += "&amp;";
break;
default:
// We don't change the eol status for whitespace, want
// a real line
if (!(*chariter == ' ' || *chariter == '\t')) {
ateol = 0;
}
chariter.appendchartostring(*olit);
}
}
#if 0
{

View File

@ -16,7 +16,7 @@
*/
#ifndef _PLAINTORICH_H_INCLUDED_
#define _PLAINTORICH_H_INCLUDED_
/* @(#$Id: plaintorich.h,v 1.17 2008-07-01 08:27:58 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: plaintorich.h,v 1.18 2008-10-03 08:09:35 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
@ -37,12 +37,15 @@ struct HiliteData {
/**
* A class for highlighting search results. Overridable methods allow
* for different styles
* for different styles. We can handle plain text or html input. In the latter
* case, we may fail to highligt term groups if they are mixed with html tags.
*/
class PlainToRich {
public:
static const string snull;
PlainToRich(bool inputhtml = false) : m_inputhtml(inputhtml) {}
virtual ~PlainToRich() {}
void set_inputhtml(bool v) {m_inputhtml = v;}
/**
* Transform plain text for highlighting search terms, ie in the
* preview window or result list entries.
@ -75,6 +78,10 @@ public:
virtual string endMatch() {return snull;}
virtual string startAnchor(int) {return snull;}
virtual string endAnchor() {return snull;}
protected:
static const string snull;
bool m_inputhtml;
};
#endif /* _PLAINTORICH_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.36 2008-09-08 16:49:10 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.37 2008-10-03 08:09:35 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -672,6 +672,10 @@ class LoadThread : public QThread {
}
FileInterner interner(filename, &st, rclconfig, tmpdir, mtype);
// We don't set the interner's target mtype to html because we
// do want the html filter to do its work: we won't use the
// text, but we need the conversion to utf-8
// interner.setTargetMType("text/html");
try {
FileInterner::Status ret = interner.internfile(*out, ipath);
if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) {
@ -682,6 +686,10 @@ class LoadThread : public QThread {
// a mysterious error. Happens when the file name matches a
// a search term of course.
*statusp = 0;
if (prefs.previewHtml && !interner.get_html().empty()) {
out->text = interner.get_html();
out->mimetype = "text/html";
}
} else {
out->mimetype = interner.getMimetype();
interner.getMissingExternal(missing);
@ -820,13 +828,20 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
// somehow slipped through previous processing.
bool highlightTerms = fdoc.text.length() <
(unsigned long)prefs.maxhltextmbs * 1024 * 1024;
// Final text is produced in chunks so that we can display the top
// while still inserting at bottom
list<QString> qrichlst;
bool inputishtml = !fdoc.mimetype.compare("text/html");
if (highlightTerms) {
progress.setLabelText(tr("Creating preview text"));
qApp->processEvents();
if (inputishtml) {
LOGDEB(("Preview: got html %s\n", fdoc.text.c_str()));
m_plaintorich.set_inputhtml(true);
} else {
m_plaintorich.set_inputhtml(false);
}
list<string> richlst;
ToRichThread rthr(fdoc.text, m_hData, richlst, m_plaintorich);
rthr.start();
@ -855,23 +870,29 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
richlst.back() += "<b>Cancelled !</b>";
}
}
// Convert to QString list
// Convert C++ string list to QString list
for (list<string>::iterator it = richlst.begin();
it != richlst.end(); it++) {
qrichlst.push_back(QString::fromUtf8(it->c_str(), it->length()));
}
} else {
// No plaintorich() call.
// In this case, the text will no be identified as
// richtxt/html (no <html> or <qt> etc. at the beginning), and
// there is no need to escape special characters.
// Also we need to split in chunks (so that the top is displayed faster),
// and we must do it on a QString (to avoid utf8 issues).
LOGDEB(("Preview: no hilighting\n"));
// No plaintorich() call. In this case, either the text is
// html and the html quoting is hopefully correct, or it's
// plain-text and there is no need to escape special
// characters. We'd still want to split in chunks (so that the
// top is displayed faster), but we must not cut tags, and
// it's too difficult on html. For text we do the splitting on
// a QString to avoid utf8 issues.
QString qr = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length());
int l = 0;
for (int pos = 0; pos < (int)qr.length(); pos += l) {
l = MIN(CHUNKL, qr.length() - pos);
qrichlst.push_back(qr.mid(pos, l));
if (inputishtml) {
qrichlst.push_back(qr);
} else {
for (int pos = 0; pos < (int)qr.length(); pos += l) {
l = MIN(CHUNKL, qr.length() - pos);
qrichlst.push_back(qr.mid(pos, l));
}
}
}
@ -895,7 +916,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
qApp->processEvents();
editor->append(*it);
LOGDEB(("Preview:: loaded: [%s]\n",
string((const char *)(*it).utf8()).c_str()));
// Stay at top
if (instep < 5) {
editor->setCursorPosition(0,0);

View File

@ -1,6 +1,6 @@
#ifndef _PREVIEW_W_H_INCLUDED_
#define _PREVIEW_W_H_INCLUDED_
/* @(#$Id: preview_w.h,v 1.18 2008-07-01 08:27:58 dockes Exp $ (C) 2006 J.F.Dockes */
/* @(#$Id: preview_w.h,v 1.19 2008-10-03 08:09:35 dockes Exp $ (C) 2006 J.F.Dockes */
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -69,12 +69,16 @@ class TabData {
class PlainToRichQtPreview : public PlainToRich {
public:
int lastanchor;
PlainToRichQtPreview() {
PlainToRichQtPreview(bool inputhtml = false) : PlainToRich(inputhtml) {
lastanchor = 0;
}
virtual ~PlainToRichQtPreview() {}
virtual string header() {
return string("<qt><head><title></title></head><body><p>");
if (m_inputhtml) {
return snull;
} else {
return string("<qt><head><title></title></head><body><p>");
}
}
virtual string startMatch() {return string("<termtag>");}
virtual string endMatch() {return string("</termtag>");}

View File

@ -388,6 +388,17 @@
<bool>false</bool>
</property>
</widget>
<widget class="QCheckBox">
<property name="name">
<cstring>previewHtmlCB</cstring>
</property>
<property name="text">
<string>Prefer Html to plain text for preview.</string>
</property>
<property name="checked">
<bool>false</bool>
</property>
</widget>
</vbox>
</widget>
</vbox>

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.25 2008-07-28 08:42:52 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.26 2008-10-03 08:09:36 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -106,7 +106,7 @@ void UIPrefsDialog::setFromPrefs()
initStartSortCB->setChecked(prefs.startWithSortToolOpen);
useDesktopOpenCB->setChecked(prefs.useDesktopOpen);
keepSortCB->setChecked(prefs.keepSort);
previewHtmlCB->setChecked(prefs.previewHtml);
// Query terms color
qtermColorLE->setText(prefs.qtermcolor);
@ -209,6 +209,7 @@ void UIPrefsDialog::accept()
prefs.startWithSortToolOpen = initStartSortCB->isChecked();
prefs.useDesktopOpen = useDesktopOpenCB->isChecked();
prefs.keepSort = keepSortCB->isChecked();
prefs.previewHtml = previewHtmlCB->isChecked();
prefs.syntAbsLen = syntlenSB->value();
prefs.syntAbsCtx = syntctxSB->value();