Support explicit HTML markup in fields when the markup="html" attribute is present
This commit is contained in:
parent
e28a6186dc
commit
17f8b652d4
@ -56,7 +56,8 @@ DEF_CSTR(null, "");
|
||||
DEF_CSTR(plus, "+");
|
||||
DEF_CSTR(textplain, "text/plain");
|
||||
DEF_CSTR(url, "url");
|
||||
|
||||
// Marker for HTML format fields
|
||||
DEF_CSTR(fldhtm, "\007");
|
||||
|
||||
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
|
||||
// used to store all data generated by format-translating filters. It is
|
||||
|
||||
@ -360,9 +360,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
if (get_parameter("name", name)) {
|
||||
lowercase_term(name);
|
||||
if (name == "date") {
|
||||
// Yes this doesnt exist. It's output by filters
|
||||
// And the format isn't even standard http/html
|
||||
// FIXME
|
||||
// Specific to Recoll filters.
|
||||
decode_entities(content);
|
||||
struct tm tm;
|
||||
if (strptime(content.c_str(),
|
||||
@ -376,10 +374,22 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
}
|
||||
} else if (name == "robots") {
|
||||
} else {
|
||||
string markup;
|
||||
bool ishtml = false;
|
||||
if (get_parameter("markup", markup)) {
|
||||
if (!stringlowercmp("html", markup)) {
|
||||
ishtml = true;
|
||||
}
|
||||
}
|
||||
if (!meta[name].empty())
|
||||
meta[name] += ' ';
|
||||
decode_entities(content);
|
||||
meta[name] += content;
|
||||
if (ishtml &&
|
||||
meta[name].compare(0, cstr_fldhtm.size(),
|
||||
cstr_fldhtm)) {
|
||||
meta[name].insert(0, cstr_fldhtm);
|
||||
}
|
||||
}
|
||||
}
|
||||
string hdr;
|
||||
@ -417,8 +427,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (tag == "marquee" || tag == "menu" || tag == "multicol")
|
||||
} else if (tag == "marquee" || tag == "menu" || tag == "multicol")
|
||||
pending_space = true;
|
||||
break;
|
||||
case 'o':
|
||||
@ -441,12 +450,11 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
if (tag == "style") {
|
||||
in_style_tag = true;
|
||||
break;
|
||||
}
|
||||
if (tag == "script") {
|
||||
} else if (tag == "script") {
|
||||
in_script_tag = true;
|
||||
break;
|
||||
}
|
||||
if (tag == "select") pending_space = true;
|
||||
} else if (tag == "select")
|
||||
pending_space = true;
|
||||
break;
|
||||
case 't':
|
||||
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
||||
|
||||
@ -110,6 +110,14 @@ void ResListPager::resultPageNext()
|
||||
m_resultsInCurrentPage = pagelen;
|
||||
m_respage = npage;
|
||||
}
|
||||
static string maybeEscapeHtml(const string& fld)
|
||||
{
|
||||
if (fld.compare(0, cstr_fldhtm.size(), cstr_fldhtm))
|
||||
return escapeHtml(fld);
|
||||
else
|
||||
return fld.substr(cstr_fldhtm.size());
|
||||
}
|
||||
|
||||
|
||||
void ResListPager::resultPageFor(int docnum)
|
||||
{
|
||||
@ -263,21 +271,21 @@ void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
|
||||
subs["I"] = iconurl;
|
||||
subs["i"] = doc.ipath;
|
||||
subs["K"] = !doc.meta[Rcl::Doc::keykw].empty() ?
|
||||
string("[") + escapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
|
||||
string("[") + maybeEscapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
|
||||
subs["L"] = linksbuf.str();
|
||||
subs["N"] = numbuf;
|
||||
subs["M"] = doc.mimetype;
|
||||
subs["R"] = doc.meta[Rcl::Doc::keyrr];
|
||||
subs["S"] = sizebuf;
|
||||
subs["T"] = escapeHtml(titleOrFilename);
|
||||
subs["t"] = escapeHtml(doc.meta[Rcl::Doc::keytt]);
|
||||
subs["T"] = maybeEscapeHtml(titleOrFilename);
|
||||
subs["t"] = maybeEscapeHtml(doc.meta[Rcl::Doc::keytt]);
|
||||
subs["U"] = url;
|
||||
|
||||
// Let %(xx) access all metadata. HTML-neuter everything:
|
||||
for (map<string,string>::iterator it = doc.meta.begin();
|
||||
it != doc.meta.end(); it++) {
|
||||
if (!it->first.empty())
|
||||
subs[it->first] = escapeHtml(it->second);
|
||||
subs[it->first] = maybeEscapeHtml(it->second);
|
||||
}
|
||||
|
||||
string formatted;
|
||||
|
||||
@ -84,6 +84,22 @@
|
||||
<h2>News</h2>
|
||||
<div class="news">
|
||||
<ul>
|
||||
<li>2012-10-25: a problem with a simple workaround has caused
|
||||
several reported <span class="important">recollindex
|
||||
crashes</span> recently. If you store and index
|
||||
Mozilla/Thunderbird email out of the standard location
|
||||
(~/.thunderbird), you should add the following at the end of
|
||||
your configuration file (e.g.:
|
||||
~/.recoll/recoll.conf): <pre><tt>
|
||||
[/path/to/my/mozilla/mail]
|
||||
mhmboxquirks = tbird
|
||||
</tt></pre> Adjust the path to your local value of course...
|
||||
Without this hint, recollindex has trouble finding the
|
||||
message delimiters inside the folder files, and will
|
||||
possibly use all the computer's memory and crash. Apart from
|
||||
crashes, which only occur for very big folders, this also
|
||||
causes incorrect mail indexing.
|
||||
</li>
|
||||
<li>2012-10-19: the source for <a href="recoll-1.18.001.tar.gz">
|
||||
recoll 1.18.001</a> is available, and this is a call to
|
||||
volunteers to test it. There are binary
|
||||
|
||||
@ -100,6 +100,25 @@
|
||||
<h2>Nouvelles: </h2>
|
||||
<ul>
|
||||
|
||||
<li>2012-10-25: Un problème avec une solution simple peut provoquer
|
||||
des <span class="important">plantages de
|
||||
recollindex</span>.
|
||||
Si vous indexez des messages mail Mozilla/Thunderbird
|
||||
ailleurs qu'à l'endroit standard (~/.thunderbird), vous
|
||||
devriez ajouter les lignes qui suivent à la fin de votre
|
||||
fichier de configuration (~/.recoll/recoll.conf):
|
||||
<pre><tt>
|
||||
[/path/to/my/mozilla/mail]
|
||||
mhmboxquirks = tbird
|
||||
</tt></pre> Changez le chemin d'accès pour le votre bien
|
||||
sûr. Sans cette indication, recollindex a des difficultés à
|
||||
déterminer les limites de message dans les fichiers mailbox,
|
||||
et peut arriver à utiliser toute la mémoire de la machine,
|
||||
et à se planter. Dans les cas moins graves (avec des
|
||||
fichiers de taille "raisonnable"), cela provoque aussi une
|
||||
indexation incorrecte des messages.
|
||||
</li>
|
||||
|
||||
<li>2012-10-16: <a href="filters/filters.html">un nouveau filtre
|
||||
pour les documents EPUB</a>.</li>
|
||||
<li>2012-05-24: Sortie de la
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user