Support explicit HTML markup in fields when the markup="html" attribute is present

This commit is contained in:
Jean-Francois Dockes 2012-10-25 14:22:20 +02:00
parent e28a6186dc
commit 17f8b652d4
5 changed files with 66 additions and 14 deletions

View File

@ -56,7 +56,8 @@ DEF_CSTR(null, "");
DEF_CSTR(plus, "+");
DEF_CSTR(textplain, "text/plain");
DEF_CSTR(url, "url");
// Marker for HTML format fields
DEF_CSTR(fldhtm, "\007");
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
// used to store all data generated by format-translating filters. It is

View File

@ -360,9 +360,7 @@ MyHtmlParser::opening_tag(const string &tag)
if (get_parameter("name", name)) {
lowercase_term(name);
if (name == "date") {
// Yes this doesnt exist. It's output by filters
// And the format isn't even standard http/html
// FIXME
// Specific to Recoll filters.
decode_entities(content);
struct tm tm;
if (strptime(content.c_str(),
@ -376,10 +374,22 @@ MyHtmlParser::opening_tag(const string &tag)
}
} else if (name == "robots") {
} else {
string markup;
bool ishtml = false;
if (get_parameter("markup", markup)) {
if (!stringlowercmp("html", markup)) {
ishtml = true;
}
}
if (!meta[name].empty())
meta[name] += ' ';
decode_entities(content);
meta[name] += content;
if (ishtml &&
meta[name].compare(0, cstr_fldhtm.size(),
cstr_fldhtm)) {
meta[name].insert(0, cstr_fldhtm);
}
}
}
string hdr;
@ -417,8 +427,7 @@ MyHtmlParser::opening_tag(const string &tag)
}
}
break;
}
if (tag == "marquee" || tag == "menu" || tag == "multicol")
} else if (tag == "marquee" || tag == "menu" || tag == "multicol")
pending_space = true;
break;
case 'o':
@ -441,12 +450,11 @@ MyHtmlParser::opening_tag(const string &tag)
if (tag == "style") {
in_style_tag = true;
break;
}
if (tag == "script") {
} else if (tag == "script") {
in_script_tag = true;
break;
}
if (tag == "select") pending_space = true;
} else if (tag == "select")
pending_space = true;
break;
case 't':
if (tag == "table" || tag == "td" || tag == "textarea" ||

View File

@ -110,6 +110,14 @@ void ResListPager::resultPageNext()
m_resultsInCurrentPage = pagelen;
m_respage = npage;
}
static string maybeEscapeHtml(const string& fld)
{
if (fld.compare(0, cstr_fldhtm.size(), cstr_fldhtm))
return escapeHtml(fld);
else
return fld.substr(cstr_fldhtm.size());
}
void ResListPager::resultPageFor(int docnum)
{
@ -263,21 +271,21 @@ void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
subs["I"] = iconurl;
subs["i"] = doc.ipath;
subs["K"] = !doc.meta[Rcl::Doc::keykw].empty() ?
string("[") + escapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
string("[") + maybeEscapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
subs["L"] = linksbuf.str();
subs["N"] = numbuf;
subs["M"] = doc.mimetype;
subs["R"] = doc.meta[Rcl::Doc::keyrr];
subs["S"] = sizebuf;
subs["T"] = escapeHtml(titleOrFilename);
subs["t"] = escapeHtml(doc.meta[Rcl::Doc::keytt]);
subs["T"] = maybeEscapeHtml(titleOrFilename);
subs["t"] = maybeEscapeHtml(doc.meta[Rcl::Doc::keytt]);
subs["U"] = url;
// Let %(xx) access all metadata. HTML-neuter everything:
for (map<string,string>::iterator it = doc.meta.begin();
it != doc.meta.end(); it++) {
if (!it->first.empty())
subs[it->first] = escapeHtml(it->second);
subs[it->first] = maybeEscapeHtml(it->second);
}
string formatted;

View File

@ -84,6 +84,22 @@
<h2>News</h2>
<div class="news">
<ul>
<li>2012-10-25: a problem with a simple workaround has caused
several reported <span class="important">recollindex
crashes</span> recently. If you store and index
Mozilla/Thunderbird email out of the standard location
(~/.thunderbird), you should add the following at the end of
your configuration file (e.g.:
~/.recoll/recoll.conf): <pre><tt>
[/path/to/my/mozilla/mail]
mhmboxquirks = tbird
</tt></pre> Adjust the path to your local value of course...
Without this hint, recollindex has trouble finding the
message delimiters inside the folder files, and will
possibly use all the computer's memory and crash. Apart from
crashes, which only occur for very big folders, this also
causes incorrect mail indexing.
</li>
<li>2012-10-19: the source for <a href="recoll-1.18.001.tar.gz">
recoll 1.18.001</a> is available, and this is a call to
volunteers to test it. There are binary

View File

@ -100,6 +100,25 @@
<h2>Nouvelles: </h2>
<ul>
<li>2012-10-25: Un problème avec une solution simple peut provoquer
des <span class="important">plantages de
recollindex</span>.
Si vous indexez des messages mail Mozilla/Thunderbird
ailleurs qu'à l'endroit standard (~/.thunderbird), vous
devriez ajouter les lignes qui suivent à la fin de votre
fichier de configuration (~/.recoll/recoll.conf):
<pre><tt>
[/path/to/my/mozilla/mail]
mhmboxquirks = tbird
</tt></pre> Changez le chemin d'accès pour le votre bien
sûr. Sans cette indication, recollindex a des difficultés à
déterminer les limites de message dans les fichiers mailbox,
et peut arriver à utiliser toute la mémoire de la machine,
et à se planter. Dans les cas moins graves (avec des
fichiers de taille "raisonnable"), cela provoque aussi une
indexation incorrecte des messages.
</li>
<li>2012-10-16: <a href="filters/filters.html">un nouveau filtre
pour les documents EPUB</a>.</li>
<li>2012-05-24: Sortie de la