From 2c27cbb5040414201996459fe47e300762d579fc Mon Sep 17 00:00:00 2001 From: dockes Date: Fri, 3 Oct 2008 08:09:36 +0000 Subject: [PATCH] add option to preview html instead of plain text --- src/common/rclconfig.cpp | 4 +- src/qtgui/guiutils.cpp | 4 +- src/qtgui/guiutils.h | 3 +- src/qtgui/plaintorich.cpp | 126 +++++++++++++++++++++++++------------- src/qtgui/plaintorich.h | 13 +++- src/qtgui/preview_w.cpp | 48 +++++++++++---- src/qtgui/preview_w.h | 10 ++- src/qtgui/uiprefs.ui | 11 ++++ src/qtgui/uiprefs_w.cpp | 5 +- 9 files changed, 155 insertions(+), 69 deletions(-) diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index ed50768e..8001c011 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.58 2008-09-16 08:18:30 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.59 2008-10-03 08:09:35 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -509,7 +509,7 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) for (list::const_iterator it = sl.begin(); it != sl.end(); it++) { string fld = fieldCanon(stringtolower(*it)); - LOGDEB(("Inserting [%s] in stored list\n", fld.c_str())); + LOGDEB0(("Inserting [%s] in stored list\n", fld.c_str())); m_storedFields.insert(fld); } } diff --git a/src/qtgui/guiutils.cpp b/src/qtgui/guiutils.cpp index 01b3800b..25f840bf 100644 --- a/src/qtgui/guiutils.cpp +++ b/src/qtgui/guiutils.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.38 2008-09-28 14:20:50 dockes Exp $ (C) 2005 Jean-Francois Dockes"; +static char rcsid[] = "@(#$Id: guiutils.cpp,v 1.39 2008-10-03 08:09:35 dockes Exp $ (C) 2005 Jean-Francois Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -147,6 +147,8 @@ void rwSettings(bool writing) "/Recoll/prefs/startWithAdvSearchOpen", Bool, false); SETTING_RW(prefs.startWithSortToolOpen, "/Recoll/prefs/startWithSortToolOpen", Bool, false); + SETTING_RW(prefs.previewHtml, + "/Recoll/prefs/previewHtml", Bool, true); QString advSearchClauses; QString ascdflt; diff --git a/src/qtgui/guiutils.h b/src/qtgui/guiutils.h index 618bc982..93e992a9 100644 --- a/src/qtgui/guiutils.h +++ b/src/qtgui/guiutils.h @@ -17,7 +17,7 @@ #ifndef _GUIUTILS_H_INCLUDED_ #define _GUIUTILS_H_INCLUDED_ /* - * @(#$Id: guiutils.h,v 1.28 2008-09-28 14:20:50 dockes Exp $ (C) 2005 Jean-Francois Dockes + * @(#$Id: guiutils.h,v 1.29 2008-10-03 08:09:35 dockes Exp $ (C) 2005 Jean-Francois Dockes * jean-francois.dockes@wanadoo.fr * * This program is free software; you can redistribute it and/or modify @@ -81,6 +81,7 @@ class PrefsPack { bool queryReplaceAbstract; bool startWithAdvSearchOpen; bool startWithSortToolOpen; + bool previewHtml; // Extra query indexes. This are encoded to base64 before storing // to the qt settings file to avoid any bin string/ charset conv issues list allExtraDbs; diff --git a/src/qtgui/plaintorich.cpp b/src/qtgui/plaintorich.cpp index 575cd8b9..cef33fb3 100644 --- a/src/qtgui/plaintorich.cpp +++ b/src/qtgui/plaintorich.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.32 2008-07-04 09:29:50 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.33 2008-10-03 08:09:35 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -296,10 +296,12 @@ bool myTextSplitCB::matchGroups() // duplicate whitespace etc...). This was tricky business, dependant // on qtextedit internals, and we don't do it any more, so we finally // don't know the term par/car positions in the editor text. -// Instead, we mark the search term positions either with html anchor -// (qt currently has problems with them), or a special string, and the -// caller will use the editor's find() function to position on it -bool PlainToRich::plaintorich(const string& in, list& out, +// Instead, we now mark the search term positions with html anchors +// +// We output the result in chunks, arranging not to cut in the middle of +// a tag, which would confuse qtextedit. +bool PlainToRich::plaintorich(const string& in, + list& out, // Output chunk list const HiliteData& hdata, int chunksize) { @@ -323,16 +325,17 @@ bool PlainToRich::plaintorich(const string& in, list& out, LOGDEB0((" %s", sterms.c_str())); } - // We first use the text splitter to break the text into words, - // and compare the words to the search terms, which yields the - // query terms positions inside the text + // Compute the positions for the query terms. We use the text + // splitter to break the text into words, and compare the words to + // the search terms, myTextSplitCB cb(terms, groups, slacks); TextSplit splitter(&cb); - // Note that splitter returns the term locations in byte, not - // character offset + // Note: the splitter returns the term locations in byte, not + // character, offsets. splitter.text_to_words(in); LOGDEB0(("plaintorich: split done %d mS\n", chron.millis())); + // Compute the positions for NEAR and PHRASE groups. cb.matchGroups(); out.clear(); @@ -346,7 +349,7 @@ bool PlainToRich::plaintorich(const string& in, list& out, // output highlight tags and to compute term positions in the // output text vector >::iterator tPosIt = cb.tboffs.begin(); - vector >::iterator tboffsend = cb.tboffs.end(); + vector >::iterator tPosEnd = cb.tboffs.end(); #if 0 for (vector >::const_iterator it = cb.tboffs.begin(); @@ -357,12 +360,21 @@ bool PlainToRich::plaintorich(const string& in, list& out, // Input character iterator Utf8Iter chariter(in); - // State variable used to limitate the number of consecutive empty lines + // State variable used to limit the number of consecutive empty lines int ateol = 0; // Value for numbered anchors at each term match int anchoridx = 1; - + // html state + bool intag = false, inparamvalue = false; + unsigned int headend = 0; + if (m_inputhtml) { + headend = in.find(""); + if (headend == string::npos) + headend = in.find(""); + if (headend != string::npos) + headend += 7; + } for (string::size_type pos = 0; pos != string::npos; pos = chariter++) { // Check from time to time if we need to stop if ((pos & 0xfff) == 0) { @@ -371,51 +383,77 @@ bool PlainToRich::plaintorich(const string& in, list& out, // If we still have terms positions, check (byte) position. If // we are at or after a term match, mark. - if (tPosIt != tboffsend) { + if (tPosIt != tPosEnd) { int ibyteidx = chariter.getBpos(); if (ibyteidx == tPosIt->first) { - *olit += startAnchor(anchoridx++); - *olit += startMatch(); + if (!intag && ibyteidx > headend) { + *olit += startAnchor(anchoridx); + *olit += startMatch(); + } + anchoridx++; } else if (ibyteidx == tPosIt->second) { - // Output end tag, then skip all highlight areas that - // would overlap this one - *olit += endMatch(); - *olit += endAnchor(); + // Output end or match region tags + if (!intag && ibyteidx > headend) { + *olit += endMatch(); + *olit += endAnchor(); + } + // Skip all highlight areas that would overlap this one int crend = tPosIt->second; while (tPosIt != cb.tboffs.end() && tPosIt->first < crend) tPosIt++; - // Maybe end this chunk, begin next - if (olit->size() > (unsigned int)chunksize) { + + // Maybe end this chunk, begin next. Don't do it on html + // there is just no way to do it right (qtextedit cant grok + // chunks cut in the middle of for example). + if (!m_inputhtml && olit->size() > (unsigned int)chunksize) { out.push_back(""); olit++; } } } - switch(*chariter) { - case '\n': - if (ateol < 2) { - *olit += "
\n"; - ateol++; - } - break; - case '\r': - break; - case '<': - ateol = 0; - *olit += "<"; - break; - case '&': - ateol = 0; - *olit += "&"; - break; - default: - // We don't change the eol status for whitespace, want a real line - if (!(*chariter == ' ' || *chariter == '\t')) { - ateol = 0; + if (m_inputhtml) { + switch (*chariter) { + case '<': + if (!inparamvalue) + intag = true; + break; + case '>': + if (!inparamvalue) + intag = false; + break; + case '"': + if (intag) { + inparamvalue = !inparamvalue; + } + break; } chariter.appendchartostring(*olit); - } + } else switch (*chariter) { + case '\n': + if (ateol < 2) { + *olit += "
\n"; + ateol++; + } + break; + case '\r': + break; + case '<': + ateol = 0; + *olit += "<"; + break; + case '&': + ateol = 0; + *olit += "&"; + break; + default: + // We don't change the eol status for whitespace, want + // a real line + if (!(*chariter == ' ' || *chariter == '\t')) { + ateol = 0; + } + chariter.appendchartostring(*olit); + } } #if 0 { diff --git a/src/qtgui/plaintorich.h b/src/qtgui/plaintorich.h index 29ee4c93..2caccb59 100644 --- a/src/qtgui/plaintorich.h +++ b/src/qtgui/plaintorich.h @@ -16,7 +16,7 @@ */ #ifndef _PLAINTORICH_H_INCLUDED_ #define _PLAINTORICH_H_INCLUDED_ -/* @(#$Id: plaintorich.h,v 1.17 2008-07-01 08:27:58 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: plaintorich.h,v 1.18 2008-10-03 08:09:35 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include @@ -37,12 +37,15 @@ struct HiliteData { /** * A class for highlighting search results. Overridable methods allow - * for different styles + * for different styles. We can handle plain text or html input. In the latter + * case, we may fail to highligt term groups if they are mixed with html tags. */ class PlainToRich { public: - static const string snull; + PlainToRich(bool inputhtml = false) : m_inputhtml(inputhtml) {} virtual ~PlainToRich() {} + void set_inputhtml(bool v) {m_inputhtml = v;} + /** * Transform plain text for highlighting search terms, ie in the * preview window or result list entries. @@ -75,6 +78,10 @@ public: virtual string endMatch() {return snull;} virtual string startAnchor(int) {return snull;} virtual string endAnchor() {return snull;} + +protected: + static const string snull; + bool m_inputhtml; }; #endif /* _PLAINTORICH_H_INCLUDED_ */ diff --git a/src/qtgui/preview_w.cpp b/src/qtgui/preview_w.cpp index 29b82a0d..7d963019 100644 --- a/src/qtgui/preview_w.cpp +++ b/src/qtgui/preview_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.36 2008-09-08 16:49:10 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: preview_w.cpp,v 1.37 2008-10-03 08:09:35 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -672,6 +672,10 @@ class LoadThread : public QThread { } FileInterner interner(filename, &st, rclconfig, tmpdir, mtype); + // We don't set the interner's target mtype to html because we + // do want the html filter to do its work: we won't use the + // text, but we need the conversion to utf-8 + // interner.setTargetMType("text/html"); try { FileInterner::Status ret = interner.internfile(*out, ipath); if (ret == FileInterner::FIDone || ret == FileInterner::FIAgain) { @@ -682,6 +686,10 @@ class LoadThread : public QThread { // a mysterious error. Happens when the file name matches a // a search term of course. *statusp = 0; + if (prefs.previewHtml && !interner.get_html().empty()) { + out->text = interner.get_html(); + out->mimetype = "text/html"; + } } else { out->mimetype = interner.getMimetype(); interner.getMissingExternal(missing); @@ -820,13 +828,20 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, // somehow slipped through previous processing. bool highlightTerms = fdoc.text.length() < (unsigned long)prefs.maxhltextmbs * 1024 * 1024; + // Final text is produced in chunks so that we can display the top // while still inserting at bottom list qrichlst; - + bool inputishtml = !fdoc.mimetype.compare("text/html"); if (highlightTerms) { progress.setLabelText(tr("Creating preview text")); qApp->processEvents(); + if (inputishtml) { + LOGDEB(("Preview: got html %s\n", fdoc.text.c_str())); + m_plaintorich.set_inputhtml(true); + } else { + m_plaintorich.set_inputhtml(false); + } list richlst; ToRichThread rthr(fdoc.text, m_hData, richlst, m_plaintorich); rthr.start(); @@ -855,23 +870,29 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, richlst.back() += "Cancelled !"; } } - // Convert to QString list + // Convert C++ string list to QString list for (list::iterator it = richlst.begin(); it != richlst.end(); it++) { qrichlst.push_back(QString::fromUtf8(it->c_str(), it->length())); } } else { - // No plaintorich() call. - // In this case, the text will no be identified as - // richtxt/html (no or etc. at the beginning), and - // there is no need to escape special characters. - // Also we need to split in chunks (so that the top is displayed faster), - // and we must do it on a QString (to avoid utf8 issues). + LOGDEB(("Preview: no hilighting\n")); + // No plaintorich() call. In this case, either the text is + // html and the html quoting is hopefully correct, or it's + // plain-text and there is no need to escape special + // characters. We'd still want to split in chunks (so that the + // top is displayed faster), but we must not cut tags, and + // it's too difficult on html. For text we do the splitting on + // a QString to avoid utf8 issues. QString qr = QString::fromUtf8(fdoc.text.c_str(), fdoc.text.length()); int l = 0; - for (int pos = 0; pos < (int)qr.length(); pos += l) { - l = MIN(CHUNKL, qr.length() - pos); - qrichlst.push_back(qr.mid(pos, l)); + if (inputishtml) { + qrichlst.push_back(qr); + } else { + for (int pos = 0; pos < (int)qr.length(); pos += l) { + l = MIN(CHUNKL, qr.length() - pos); + qrichlst.push_back(qr.mid(pos, l)); + } } } @@ -895,7 +916,8 @@ bool Preview::loadFileInCurrentTab(string fn, size_t sz, const Rcl::Doc &idoc, qApp->processEvents(); editor->append(*it); - + LOGDEB(("Preview:: loaded: [%s]\n", + string((const char *)(*it).utf8()).c_str())); // Stay at top if (instep < 5) { editor->setCursorPosition(0,0); diff --git a/src/qtgui/preview_w.h b/src/qtgui/preview_w.h index 7e4c230b..c6c0fcf6 100644 --- a/src/qtgui/preview_w.h +++ b/src/qtgui/preview_w.h @@ -1,6 +1,6 @@ #ifndef _PREVIEW_W_H_INCLUDED_ #define _PREVIEW_W_H_INCLUDED_ -/* @(#$Id: preview_w.h,v 1.18 2008-07-01 08:27:58 dockes Exp $ (C) 2006 J.F.Dockes */ +/* @(#$Id: preview_w.h,v 1.19 2008-10-03 08:09:35 dockes Exp $ (C) 2006 J.F.Dockes */ /* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -69,12 +69,16 @@ class TabData { class PlainToRichQtPreview : public PlainToRich { public: int lastanchor; - PlainToRichQtPreview() { + PlainToRichQtPreview(bool inputhtml = false) : PlainToRich(inputhtml) { lastanchor = 0; } virtual ~PlainToRichQtPreview() {} virtual string header() { - return string("

"); + if (m_inputhtml) { + return snull; + } else { + return string("

"); + } } virtual string startMatch() {return string("");} virtual string endMatch() {return string("");} diff --git a/src/qtgui/uiprefs.ui b/src/qtgui/uiprefs.ui index 8440ad83..334c177f 100644 --- a/src/qtgui/uiprefs.ui +++ b/src/qtgui/uiprefs.ui @@ -388,6 +388,17 @@ false + + + previewHtmlCB + + + Prefer Html to plain text for preview. + + + false + + diff --git a/src/qtgui/uiprefs_w.cpp b/src/qtgui/uiprefs_w.cpp index 9b78f3ce..cffe42f6 100644 --- a/src/qtgui/uiprefs_w.cpp +++ b/src/qtgui/uiprefs_w.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.25 2008-07-28 08:42:52 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: uiprefs_w.cpp,v 1.26 2008-10-03 08:09:36 dockes Exp $ (C) 2005 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -106,7 +106,7 @@ void UIPrefsDialog::setFromPrefs() initStartSortCB->setChecked(prefs.startWithSortToolOpen); useDesktopOpenCB->setChecked(prefs.useDesktopOpen); keepSortCB->setChecked(prefs.keepSort); - + previewHtmlCB->setChecked(prefs.previewHtml); // Query terms color qtermColorLE->setText(prefs.qtermcolor); @@ -209,6 +209,7 @@ void UIPrefsDialog::accept() prefs.startWithSortToolOpen = initStartSortCB->isChecked(); prefs.useDesktopOpen = useDesktopOpenCB->isChecked(); prefs.keepSort = keepSortCB->isChecked(); + prefs.previewHtml = previewHtmlCB->isChecked(); prefs.syntAbsLen = syntlenSB->value(); prefs.syntAbsCtx = syntctxSB->value();