Support explicit HTML markup in fields when the markup="html" attribute is present
This commit is contained in:
parent
e28a6186dc
commit
17f8b652d4
@ -56,7 +56,8 @@ DEF_CSTR(null, "");
|
|||||||
DEF_CSTR(plus, "+");
|
DEF_CSTR(plus, "+");
|
||||||
DEF_CSTR(textplain, "text/plain");
|
DEF_CSTR(textplain, "text/plain");
|
||||||
DEF_CSTR(url, "url");
|
DEF_CSTR(url, "url");
|
||||||
|
// Marker for HTML format fields
|
||||||
|
DEF_CSTR(fldhtm, "\007");
|
||||||
|
|
||||||
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
|
// Values used as keys inside Dijon::Filter::metaData[]. This structure is
|
||||||
// used to store all data generated by format-translating filters. It is
|
// used to store all data generated by format-translating filters. It is
|
||||||
|
|||||||
@ -360,9 +360,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
if (get_parameter("name", name)) {
|
if (get_parameter("name", name)) {
|
||||||
lowercase_term(name);
|
lowercase_term(name);
|
||||||
if (name == "date") {
|
if (name == "date") {
|
||||||
// Yes this doesnt exist. It's output by filters
|
// Specific to Recoll filters.
|
||||||
// And the format isn't even standard http/html
|
|
||||||
// FIXME
|
|
||||||
decode_entities(content);
|
decode_entities(content);
|
||||||
struct tm tm;
|
struct tm tm;
|
||||||
if (strptime(content.c_str(),
|
if (strptime(content.c_str(),
|
||||||
@ -376,10 +374,22 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
}
|
}
|
||||||
} else if (name == "robots") {
|
} else if (name == "robots") {
|
||||||
} else {
|
} else {
|
||||||
|
string markup;
|
||||||
|
bool ishtml = false;
|
||||||
|
if (get_parameter("markup", markup)) {
|
||||||
|
if (!stringlowercmp("html", markup)) {
|
||||||
|
ishtml = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (!meta[name].empty())
|
if (!meta[name].empty())
|
||||||
meta[name] += ' ';
|
meta[name] += ' ';
|
||||||
decode_entities(content);
|
decode_entities(content);
|
||||||
meta[name] += content;
|
meta[name] += content;
|
||||||
|
if (ishtml &&
|
||||||
|
meta[name].compare(0, cstr_fldhtm.size(),
|
||||||
|
cstr_fldhtm)) {
|
||||||
|
meta[name].insert(0, cstr_fldhtm);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
string hdr;
|
string hdr;
|
||||||
@ -417,8 +427,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
} else if (tag == "marquee" || tag == "menu" || tag == "multicol")
|
||||||
if (tag == "marquee" || tag == "menu" || tag == "multicol")
|
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'o':
|
case 'o':
|
||||||
@ -441,12 +450,11 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
if (tag == "style") {
|
if (tag == "style") {
|
||||||
in_style_tag = true;
|
in_style_tag = true;
|
||||||
break;
|
break;
|
||||||
}
|
} else if (tag == "script") {
|
||||||
if (tag == "script") {
|
|
||||||
in_script_tag = true;
|
in_script_tag = true;
|
||||||
break;
|
break;
|
||||||
}
|
} else if (tag == "select")
|
||||||
if (tag == "select") pending_space = true;
|
pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
||||||
|
|||||||
@ -110,6 +110,14 @@ void ResListPager::resultPageNext()
|
|||||||
m_resultsInCurrentPage = pagelen;
|
m_resultsInCurrentPage = pagelen;
|
||||||
m_respage = npage;
|
m_respage = npage;
|
||||||
}
|
}
|
||||||
|
static string maybeEscapeHtml(const string& fld)
|
||||||
|
{
|
||||||
|
if (fld.compare(0, cstr_fldhtm.size(), cstr_fldhtm))
|
||||||
|
return escapeHtml(fld);
|
||||||
|
else
|
||||||
|
return fld.substr(cstr_fldhtm.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void ResListPager::resultPageFor(int docnum)
|
void ResListPager::resultPageFor(int docnum)
|
||||||
{
|
{
|
||||||
@ -263,21 +271,21 @@ void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
|
|||||||
subs["I"] = iconurl;
|
subs["I"] = iconurl;
|
||||||
subs["i"] = doc.ipath;
|
subs["i"] = doc.ipath;
|
||||||
subs["K"] = !doc.meta[Rcl::Doc::keykw].empty() ?
|
subs["K"] = !doc.meta[Rcl::Doc::keykw].empty() ?
|
||||||
string("[") + escapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
|
string("[") + maybeEscapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
|
||||||
subs["L"] = linksbuf.str();
|
subs["L"] = linksbuf.str();
|
||||||
subs["N"] = numbuf;
|
subs["N"] = numbuf;
|
||||||
subs["M"] = doc.mimetype;
|
subs["M"] = doc.mimetype;
|
||||||
subs["R"] = doc.meta[Rcl::Doc::keyrr];
|
subs["R"] = doc.meta[Rcl::Doc::keyrr];
|
||||||
subs["S"] = sizebuf;
|
subs["S"] = sizebuf;
|
||||||
subs["T"] = escapeHtml(titleOrFilename);
|
subs["T"] = maybeEscapeHtml(titleOrFilename);
|
||||||
subs["t"] = escapeHtml(doc.meta[Rcl::Doc::keytt]);
|
subs["t"] = maybeEscapeHtml(doc.meta[Rcl::Doc::keytt]);
|
||||||
subs["U"] = url;
|
subs["U"] = url;
|
||||||
|
|
||||||
// Let %(xx) access all metadata. HTML-neuter everything:
|
// Let %(xx) access all metadata. HTML-neuter everything:
|
||||||
for (map<string,string>::iterator it = doc.meta.begin();
|
for (map<string,string>::iterator it = doc.meta.begin();
|
||||||
it != doc.meta.end(); it++) {
|
it != doc.meta.end(); it++) {
|
||||||
if (!it->first.empty())
|
if (!it->first.empty())
|
||||||
subs[it->first] = escapeHtml(it->second);
|
subs[it->first] = maybeEscapeHtml(it->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
string formatted;
|
string formatted;
|
||||||
|
|||||||
@ -84,6 +84,22 @@
|
|||||||
<h2>News</h2>
|
<h2>News</h2>
|
||||||
<div class="news">
|
<div class="news">
|
||||||
<ul>
|
<ul>
|
||||||
|
<li>2012-10-25: a problem with a simple workaround has caused
|
||||||
|
several reported <span class="important">recollindex
|
||||||
|
crashes</span> recently. If you store and index
|
||||||
|
Mozilla/Thunderbird email out of the standard location
|
||||||
|
(~/.thunderbird), you should add the following at the end of
|
||||||
|
your configuration file (e.g.:
|
||||||
|
~/.recoll/recoll.conf): <pre><tt>
|
||||||
|
[/path/to/my/mozilla/mail]
|
||||||
|
mhmboxquirks = tbird
|
||||||
|
</tt></pre> Adjust the path to your local value of course...
|
||||||
|
Without this hint, recollindex has trouble finding the
|
||||||
|
message delimiters inside the folder files, and will
|
||||||
|
possibly use all the computer's memory and crash. Apart from
|
||||||
|
crashes, which only occur for very big folders, this also
|
||||||
|
causes incorrect mail indexing.
|
||||||
|
</li>
|
||||||
<li>2012-10-19: the source for <a href="recoll-1.18.001.tar.gz">
|
<li>2012-10-19: the source for <a href="recoll-1.18.001.tar.gz">
|
||||||
recoll 1.18.001</a> is available, and this is a call to
|
recoll 1.18.001</a> is available, and this is a call to
|
||||||
volunteers to test it. There are binary
|
volunteers to test it. There are binary
|
||||||
|
|||||||
@ -100,6 +100,25 @@
|
|||||||
<h2>Nouvelles: </h2>
|
<h2>Nouvelles: </h2>
|
||||||
<ul>
|
<ul>
|
||||||
|
|
||||||
|
<li>2012-10-25: Un problème avec une solution simple peut provoquer
|
||||||
|
des <span class="important">plantages de
|
||||||
|
recollindex</span>.
|
||||||
|
Si vous indexez des messages mail Mozilla/Thunderbird
|
||||||
|
ailleurs qu'à l'endroit standard (~/.thunderbird), vous
|
||||||
|
devriez ajouter les lignes qui suivent à la fin de votre
|
||||||
|
fichier de configuration (~/.recoll/recoll.conf):
|
||||||
|
<pre><tt>
|
||||||
|
[/path/to/my/mozilla/mail]
|
||||||
|
mhmboxquirks = tbird
|
||||||
|
</tt></pre> Changez le chemin d'accès pour le votre bien
|
||||||
|
sûr. Sans cette indication, recollindex a des difficultés à
|
||||||
|
déterminer les limites de message dans les fichiers mailbox,
|
||||||
|
et peut arriver à utiliser toute la mémoire de la machine,
|
||||||
|
et à se planter. Dans les cas moins graves (avec des
|
||||||
|
fichiers de taille "raisonnable"), cela provoque aussi une
|
||||||
|
indexation incorrecte des messages.
|
||||||
|
</li>
|
||||||
|
|
||||||
<li>2012-10-16: <a href="filters/filters.html">un nouveau filtre
|
<li>2012-10-16: <a href="filters/filters.html">un nouveau filtre
|
||||||
pour les documents EPUB</a>.</li>
|
pour les documents EPUB</a>.</li>
|
||||||
<li>2012-05-24: Sortie de la
|
<li>2012-05-24: Sortie de la
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user