add option to preview html instead of plain text

This commit is contained in:
dockes 2008-10-03 08:09:36 +00:00
parent 31b841de7b
commit 2c27cbb504
9 changed files with 155 additions and 69 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.58 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.59 2008-10-03 08:09:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -509,7 +509,7 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
for (list<string>::const_iterator it = sl.begin(); for (list<string>::const_iterator it = sl.begin();
it != sl.end(); it++) { it != sl.end(); it++) {
string fld = fieldCanon(stringtolower(*it)); string fld = fieldCanon(stringtolower(*it));
LOGDEB(("Inserting [%s] in stored list\n", fld.c_str())); LOGDEB0(("Inserting [%s] in stored list\n", fld.c_str()));
m_storedFields.insert(fld); m_storedFields.insert(fld);
} }
} }

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.38 2008-09-28 14:20:50 dockes Exp $ (C) 2005 Jean-Francois Dockes"; static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.39 2008-10-03 08:09:35 dockes Exp $ (C) 2005 Jean-Francois Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -147,6 +147,8 @@ void rwSettings(bool writing)
"/Recoll/prefs/startWithAdvSearchOpen", Bool, false); "/Recoll/prefs/startWithAdvSearchOpen", Bool, false);
SETTING_RW(prefs.startWithSortToolOpen, SETTING_RW(prefs.startWithSortToolOpen,
"/Recoll/prefs/startWithSortToolOpen", Bool, false); "/Recoll/prefs/startWithSortToolOpen", Bool, false);
SETTING_RW(prefs.previewHtml,
"/Recoll/prefs/previewHtml", Bool, true);
QString advSearchClauses; QString advSearchClauses;
QString ascdflt; QString ascdflt;

View File

@ -17,7 +17,7 @@
#ifndef _GUIUTILS_H_INCLUDED_ #ifndef _GUIUTILS_H_INCLUDED_
#define _GUIUTILS_H_INCLUDED_ #define _GUIUTILS_H_INCLUDED_
/* /*
* @(#$Id: guiutils.h,v 1.28 2008-09-28 14:20:50 dockes Exp $ (C) 2005 Jean-Francois Dockes * @(#$Id: guiutils.h,v 1.29 2008-10-03 08:09:35 dockes Exp $ (C) 2005 Jean-Francois Dockes
* jean-francois.dockes@wanadoo.fr * jean-francois.dockes@wanadoo.fr
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -81,6 +81,7 @@ class PrefsPack {
bool queryReplaceAbstract; bool queryReplaceAbstract;
bool startWithAdvSearchOpen; bool startWithAdvSearchOpen;
bool startWithSortToolOpen; bool startWithSortToolOpen;
bool previewHtml;
// Extra query indexes. This are encoded to base64 before storing // Extra query indexes. This are encoded to base64 before storing
// to the qt settings file to avoid any bin string/ charset conv issues // to the qt settings file to avoid any bin string/ charset conv issues
list<string> allExtraDbs; list<string> allExtraDbs;

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.32 2008-07-04 09:29:50 dockes Exp $ (C) 2005 J.F.Dockes"; static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.33 2008-10-03 08:09:35 dockes Exp $ (C) 2005 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -296,10 +296,12 @@ bool myTextSplitCB::matchGroups()
// duplicate whitespace etc...). This was tricky business, dependant // duplicate whitespace etc...). This was tricky business, dependant
// on qtextedit internals, and we don't do it any more, so we finally // on qtextedit internals, and we don't do it any more, so we finally
// don't know the term par/car positions in the editor text. // don't know the term par/car positions in the editor text.
// Instead, we mark the search term positions either with html anchor // Instead, we now mark the search term positions with html anchors
// (qt currently has problems with them), or a special string, and the //
// caller will use the editor's find() function to position on it // We output the result in chunks, arranging not to cut in the middle of
bool PlainToRich::plaintorich(const string& in, list<string>& out, // a tag, which would confuse qtextedit.
bool PlainToRich::plaintorich(const string& in,
list<string>& out, // Output chunk list
const HiliteData& hdata, const HiliteData& hdata,
int chunksize) int chunksize)
{ {
@ -323,16 +325,17 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
LOGDEB0((" %s", sterms.c_str())); LOGDEB0((" %s", sterms.c_str()));
} }
// We first use the text splitter to break the text into words, // Compute the positions for the query terms. We use the text
// and compare the words to the search terms, which yields the // splitter to break the text into words, and compare the words to
// query terms positions inside the text // the search terms,
myTextSplitCB cb(terms, groups, slacks); myTextSplitCB cb(terms, groups, slacks);
TextSplit splitter(&cb); TextSplit splitter(&cb);
// Note that splitter returns the term locations in byte, not // Note: the splitter returns the term locations in byte, not
// character offset // character, offsets.
splitter.text_to_words(in); splitter.text_to_words(in);
LOGDEB0(("plaintorich: split done %d mS\n", chron.millis())); LOGDEB0(("plaintorich: split done %d mS\n", chron.millis()));
// Compute the positions for NEAR and PHRASE groups.
cb.matchGroups(); cb.matchGroups();
out.clear(); out.clear();
@ -346,7 +349,7 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
// output highlight tags and to compute term positions in the // output highlight tags and to compute term positions in the
// output text // output text
vector<pair<int, int> >::iterator tPosIt = cb.tboffs.begin(); vector<pair<int, int> >::iterator tPosIt = cb.tboffs.begin();
vector<pair<int, int> >::iterator tboffsend = cb.tboffs.end(); vector<pair<int, int> >::iterator tPosEnd = cb.tboffs.end();
#if 0 #if 0
for (vector<pair<int, int> >::const_iterator it = cb.tboffs.begin(); for (vector<pair<int, int> >::const_iterator it = cb.tboffs.begin();
@ -357,12 +360,21 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
// Input character iterator // Input character iterator
Utf8Iter chariter(in); Utf8Iter chariter(in);
// State variable used to limitate the number of consecutive empty lines // State variable used to limit the number of consecutive empty lines
int ateol = 0; int ateol = 0;
// Value for numbered anchors at each term match // Value for numbered anchors at each term match
int anchoridx = 1; int anchoridx = 1;
// html state
bool intag = false, inparamvalue = false;
unsigned int headend = 0;
if (m_inputhtml) {
headend = in.find("</head>");
if (headend == string::npos)
headend = in.find("</HEAD>");
if (headend != string::npos)
headend += 7;
}
for (string::size_type pos = 0; pos != string::npos; pos = chariter++) { for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
// Check from time to time if we need to stop // Check from time to time if we need to stop
if ((pos & 0xfff) == 0) { if ((pos & 0xfff) == 0) {
@ -371,51 +383,77 @@ bool PlainToRich::plaintorich(const string& in, list<string>& out,
// If we still have terms positions, check (byte) position. If // If we still have terms positions, check (byte) position. If
// we are at or after a term match, mark. // we are at or after a term match, mark.
if (tPosIt != tboffsend) { if (tPosIt != tPosEnd) {
int ibyteidx = chariter.getBpos(); int ibyteidx = chariter.getBpos();
if (ibyteidx == tPosIt->first) { if (ibyteidx == tPosIt->first) {
*olit += startAnchor(anchoridx++); if (!intag && ibyteidx > headend) {
*olit += startMatch(); *olit += startAnchor(anchoridx);
*olit += startMatch();
}
anchoridx++;
} else if (ibyteidx == tPosIt->second) { } else if (ibyteidx == tPosIt->second) {
// Output end tag, then skip all highlight areas that // Output end or match region tags
// would overlap this one if (!intag && ibyteidx > headend) {
*olit += endMatch(); *olit += endMatch();
*olit += endAnchor(); *olit += endAnchor();
}
// Skip all highlight areas that would overlap this one
int crend = tPosIt->second; int crend = tPosIt->second;
while (tPosIt != cb.tboffs.end() && tPosIt->first < crend) while (tPosIt != cb.tboffs.end() && tPosIt->first < crend)
tPosIt++; tPosIt++;
// Maybe end this chunk, begin next
if (olit->size() > (unsigned int)chunksize) { // Maybe end this chunk, begin next. Don't do it on html
// there is just no way to do it right (qtextedit cant grok
// chunks cut in the middle of <a></a> for example).
if (!m_inputhtml && olit->size() > (unsigned int)chunksize) {
out.push_back(""); out.push_back("");
olit++; olit++;
} }
} }
} }
switch(*chariter) { if (m_inputhtml) {
case '\n': switch (*chariter) {
if (ateol < 2) { case '<':
*olit += "<br>\n"; if (!inparamvalue)
ateol++; intag = true;
} break;
break; case '>':
case '\r': if (!inparamvalue)
break; intag = false;
case '<': break;
ateol = 0; case '"':
*olit += "&lt;"; if (intag) {
break; inparamvalue = !inparamvalue;
case '&': }
ateol = 0; break;
*olit += "&amp;";
break;
default:
// We don't change the eol status for whitespace, want a real line
if (!(*chariter == ' ' || *chariter == '\t')) {
ateol = 0;
} }
chariter.appendchartostring(*olit); chariter.appendchartostring(*olit);
} } else switch (*chariter) {
case '\n':
if (ateol < 2) {
*olit += "<br>\n";
ateol++;
}
break;
case '\r':
break;
case '<':
ateol = 0;
*olit += "&lt;";
break;
case '&':
ateol = 0;
*olit += "&amp;";
break;
default:
// We don't change the eol status for whitespace, want
// a real line
if (!(*chariter == ' ' || *chariter == '\t')) {
ateol = 0;
}
chariter.appendchartostring(*olit);
}
} }
#if 0 #if 0
{ {

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _PLAINTORICH_H_INCLUDED_ #ifndef _PLAINTORICH_H_INCLUDED_
#define _PLAINTORICH_H_INCLUDED_ #define _PLAINTORICH_H_INCLUDED_
/* @(#$Id: plaintorich.h,v 1.17 2008-07-01 08:27:58 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: plaintorich.h,v 1.18 2008-10-03 08:09:35 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -37,12 +37,15 @@ struct HiliteData {
/** /**
* A class for highlighting search results. Overridable methods allow * A class for highlighting search results. Overridable methods allow
* for different styles * for different styles. We can handle plain text or html input. In the latter
* case, we may fail to highligt term groups if they are mixed with html tags.
*/ */
class PlainToRich { class PlainToRich {
public: public:
static const string snull; PlainToRich(bool inputhtml = false) : m_inputhtml(inputhtml) {}
virtual ~PlainToRich() {} virtual ~PlainToRich() {}
void set_inputhtml(bool v) {m_inputhtml = v;}
/** /**
* Transform plain text for highlighting search terms, ie in the * Transform plain text for highlighting search terms, ie in the
* preview window or result list entries. * preview window or result list entries.
@ -75,6 +78,10 @@ public:
virtual string endMatch() {return snull;} virtual string endMatch() {return snull;}
virtual string startAnchor(int) {return snull;} virtual string startAnchor(int) {return snull;}
virtual string endAnchor() {return snull;} virtual string endAnchor() {return snull;}
protected:
static const string snull;
bool m_inputhtml;
}; };
#endif /* _PLAINTORICH_H_INCLUDED_ */ #endif /* _PLAINTORICH_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.36 2008-09-08 16:49:10 dockes Exp $ (C) 2005 J.F.Dockes"; static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.37 2008-10-03 08:09:35 dockes Exp $ (C) 2005 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -672,6 +672,10 @@ class LoadThread : public QThread {
} }
FileInterner interner(filename, &st, rclconfig, tmpdir, mtype); FileInterner interner(filename, &st, rclconfig, tmpdir, mtype);
// We don't set the interner's target mtype to html because we
// do want the html filter to do its work: we won't use the
// text, but we need the conversion to utf-8
// interner.setTargetMType("text/html");
try { try {
FileInterner::Status ret = interner.internfile(*out, ipath); FileInterner::Status ret = interner.internfile(*out, ipath);
if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) { if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) {
@ -682,6 +686,10 @@ class LoadThread : public QThread {
// a mysterious error. Happens when the file name matches a // a mysterious error. Happens when the file name matches a
// a search term of course. // a search term of course.
*statusp = 0; *statusp = 0;
if (prefs.previewHtml && !interner.get_html().empty()) {
out->text = interner.get_html();
out->mimetype = "text/html";
}
} else { } else {
out->mimetype = interner.getMimetype(); out->mimetype = interner.getMimetype();
interner.getMissingExternal(missing); interner.getMissingExternal(missing);
@ -820,13 +828,20 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
// somehow slipped through previous processing. // somehow slipped through previous processing.
bool highlightTerms = fdoc.text.length() < bool highlightTerms = fdoc.text.length() <
(unsigned long)prefs.maxhltextmbs * 1024 * 1024; (unsigned long)prefs.maxhltextmbs * 1024 * 1024;
// Final text is produced in chunks so that we can display the top // Final text is produced in chunks so that we can display the top
// while still inserting at bottom // while still inserting at bottom
list<QString> qrichlst; list<QString> qrichlst;
bool inputishtml = !fdoc.mimetype.compare("text/html");
if (highlightTerms) { if (highlightTerms) {
progress.setLabelText(tr("Creating preview text")); progress.setLabelText(tr("Creating preview text"));
qApp->processEvents(); qApp->processEvents();
if (inputishtml) {
LOGDEB(("Preview: got html %s\n", fdoc.text.c_str()));
m_plaintorich.set_inputhtml(true);
} else {
m_plaintorich.set_inputhtml(false);
}
list<string> richlst; list<string> richlst;
ToRichThread rthr(fdoc.text, m_hData, richlst, m_plaintorich); ToRichThread rthr(fdoc.text, m_hData, richlst, m_plaintorich);
rthr.start(); rthr.start();
@ -855,23 +870,29 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
richlst.back() += "<b>Cancelled !</b>"; richlst.back() += "<b>Cancelled !</b>";
} }
} }
// Convert to QString list // Convert C++ string list to QString list
for (list<string>::iterator it = richlst.begin(); for (list<string>::iterator it = richlst.begin();
it != richlst.end(); it++) { it != richlst.end(); it++) {
qrichlst.push_back(QString::fromUtf8(it->c_str(), it->length())); qrichlst.push_back(QString::fromUtf8(it->c_str(), it->length()));
} }
} else { } else {
// No plaintorich() call. LOGDEB(("Preview: no hilighting\n"));
// In this case, the text will no be identified as // No plaintorich() call. In this case, either the text is
// richtxt/html (no <html> or <qt> etc. at the beginning), and // html and the html quoting is hopefully correct, or it's
// there is no need to escape special characters. // plain-text and there is no need to escape special
// Also we need to split in chunks (so that the top is displayed faster), // characters. We'd still want to split in chunks (so that the
// and we must do it on a QString (to avoid utf8 issues). // top is displayed faster), but we must not cut tags, and
// it's too difficult on html. For text we do the splitting on
// a QString to avoid utf8 issues.
QString qr = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length()); QString qr = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length());
int l = 0; int l = 0;
for (int pos = 0; pos < (int)qr.length(); pos += l) { if (inputishtml) {
l = MIN(CHUNKL, qr.length() - pos); qrichlst.push_back(qr);
qrichlst.push_back(qr.mid(pos, l)); } else {
for (int pos = 0; pos < (int)qr.length(); pos += l) {
l = MIN(CHUNKL, qr.length() - pos);
qrichlst.push_back(qr.mid(pos, l));
}
} }
} }
@ -895,7 +916,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc,
qApp->processEvents(); qApp->processEvents();
editor->append(*it); editor->append(*it);
LOGDEB(("Preview:: loaded: [%s]\n",
string((const char *)(*it).utf8()).c_str()));
// Stay at top // Stay at top
if (instep < 5) { if (instep < 5) {
editor->setCursorPosition(0,0); editor->setCursorPosition(0,0);

View File

@ -1,6 +1,6 @@
#ifndef _PREVIEW_W_H_INCLUDED_ #ifndef _PREVIEW_W_H_INCLUDED_
#define _PREVIEW_W_H_INCLUDED_ #define _PREVIEW_W_H_INCLUDED_
/* @(#$Id: preview_w.h,v 1.18 2008-07-01 08:27:58 dockes Exp $ (C) 2006 J.F.Dockes */ /* @(#$Id: preview_w.h,v 1.19 2008-10-03 08:09:35 dockes Exp $ (C) 2006 J.F.Dockes */
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@ -69,12 +69,16 @@ class TabData {
class PlainToRichQtPreview : public PlainToRich { class PlainToRichQtPreview : public PlainToRich {
public: public:
int lastanchor; int lastanchor;
PlainToRichQtPreview() { PlainToRichQtPreview(bool inputhtml = false) : PlainToRich(inputhtml) {
lastanchor = 0; lastanchor = 0;
} }
virtual ~PlainToRichQtPreview() {} virtual ~PlainToRichQtPreview() {}
virtual string header() { virtual string header() {
return string("<qt><head><title></title></head><body><p>"); if (m_inputhtml) {
return snull;
} else {
return string("<qt><head><title></title></head><body><p>");
}
} }
virtual string startMatch() {return string("<termtag>");} virtual string startMatch() {return string("<termtag>");}
virtual string endMatch() {return string("</termtag>");} virtual string endMatch() {return string("</termtag>");}

View File

@ -388,6 +388,17 @@
<bool>false</bool> <bool>false</bool>
</property> </property>
</widget> </widget>
<widget class="QCheckBox">
<property name="name">
<cstring>previewHtmlCB</cstring>
</property>
<property name="text">
<string>Prefer Html to plain text for preview.</string>
</property>
<property name="checked">
<bool>false</bool>
</property>
</widget>
</vbox> </vbox>
</widget> </widget>
</vbox> </vbox>

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.25 2008-07-28 08:42:52 dockes Exp $ (C) 2005 J.F.Dockes"; static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.26 2008-10-03 08:09:36 dockes Exp $ (C) 2005 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -106,7 +106,7 @@ void UIPrefsDialog::setFromPrefs()
initStartSortCB->setChecked(prefs.startWithSortToolOpen); initStartSortCB->setChecked(prefs.startWithSortToolOpen);
useDesktopOpenCB->setChecked(prefs.useDesktopOpen); useDesktopOpenCB->setChecked(prefs.useDesktopOpen);
keepSortCB->setChecked(prefs.keepSort); keepSortCB->setChecked(prefs.keepSort);
previewHtmlCB->setChecked(prefs.previewHtml);
// Query terms color // Query terms color
qtermColorLE->setText(prefs.qtermcolor); qtermColorLE->setText(prefs.qtermcolor);
@ -209,6 +209,7 @@ void UIPrefsDialog::accept()
prefs.startWithSortToolOpen = initStartSortCB->isChecked(); prefs.startWithSortToolOpen = initStartSortCB->isChecked();
prefs.useDesktopOpen = useDesktopOpenCB->isChecked(); prefs.useDesktopOpen = useDesktopOpenCB->isChecked();
prefs.keepSort = keepSortCB->isChecked(); prefs.keepSort = keepSortCB->isChecked();
prefs.previewHtml = previewHtmlCB->isChecked();
prefs.syntAbsLen = syntlenSB->value(); prefs.syntAbsLen = syntlenSB->value();
prefs.syntAbsCtx = syntctxSB->value(); prefs.syntAbsCtx = syntctxSB->value();