html/xml meta: avoid appending a value that is already present in the string
This commit is contained in:
parent
552510db06
commit
e5af1651fa
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2005 J.F.Dockes
|
/* Copyright (C) 2005 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -38,10 +38,10 @@ bool MimeHandlerExecMultiple::startCmd()
|
|||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerExecMultiple::startCmd\n");
|
LOGDEB("MimeHandlerExecMultiple::startCmd\n");
|
||||||
if (params.empty()) {
|
if (params.empty()) {
|
||||||
// Hu ho
|
// Hu ho
|
||||||
LOGERR("MHExecMultiple::startCmd: empty params\n");
|
LOGERR("MHExecMultiple::startCmd: empty params\n");
|
||||||
m_reason = "RECFILTERROR BADCONFIG";
|
m_reason = "RECFILTERROR BADCONFIG";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Command name
|
// Command name
|
||||||
@ -55,7 +55,7 @@ bool MimeHandlerExecMultiple::startCmd()
|
|||||||
|
|
||||||
m_cmd.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
m_cmd.putenv("RECOLL_CONFDIR", m_config->getConfDir());
|
||||||
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||||
"RECOLL_FILTER_FORPREVIEW=no");
|
"RECOLL_FILTER_FORPREVIEW=no");
|
||||||
|
|
||||||
m_cmd.setrlimit_as(m_filtermaxmbytes);
|
m_cmd.setrlimit_as(m_filtermaxmbytes);
|
||||||
m_adv.setmaxsecs(m_filtermaxseconds);
|
m_adv.setmaxsecs(m_filtermaxseconds);
|
||||||
@ -156,11 +156,11 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
{
|
{
|
||||||
LOGDEB("MimeHandlerExecMultiple::next_document(): [" << m_fn << "]\n");
|
LOGDEB("MimeHandlerExecMultiple::next_document(): [" << m_fn << "]\n");
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (missingHelper) {
|
if (missingHelper) {
|
||||||
LOGDEB("MHExecMultiple::next_document(): helper known missing\n");
|
LOGDEB("MHExecMultiple::next_document(): helper known missing\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_cmd.getChildPid() <= 0 && !startCmd()) {
|
if (m_cmd.getChildPid() <= 0 && !startCmd()) {
|
||||||
@ -178,15 +178,15 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
ostringstream obuf;
|
ostringstream obuf;
|
||||||
string file_md5;
|
string file_md5;
|
||||||
if (m_filefirst) {
|
if (m_filefirst) {
|
||||||
if (!m_forPreview && !m_nomd5) {
|
if (!m_forPreview && !m_nomd5) {
|
||||||
string md5, xmd5, reason;
|
string md5, xmd5, reason;
|
||||||
if (MD5File(m_fn, md5, &reason)) {
|
if (MD5File(m_fn, md5, &reason)) {
|
||||||
file_md5 = MD5HexPrint(md5, xmd5);
|
file_md5 = MD5HexPrint(md5, xmd5);
|
||||||
} else {
|
} else {
|
||||||
LOGERR("MimeHandlerExecM: cant compute md5 for [" << m_fn <<
|
LOGERR("MimeHandlerExecM: cant compute md5 for [" << m_fn <<
|
||||||
"]: " << reason << "\n");
|
"]: " << reason << "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
obuf << "FileName: " << m_fn.length() << "\n" << m_fn;
|
obuf << "FileName: " << m_fn.length() << "\n" << m_fn;
|
||||||
// m_filefirst is set to true by set_document_file()
|
// m_filefirst is set to true by set_document_file()
|
||||||
m_filefirst = false;
|
m_filefirst = false;
|
||||||
@ -194,13 +194,13 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
obuf << "Filename: " << 0 << "\n";
|
obuf << "Filename: " << 0 << "\n";
|
||||||
}
|
}
|
||||||
if (!m_ipath.empty()) {
|
if (!m_ipath.empty()) {
|
||||||
LOGDEB("next_doc: sending ipath " << m_ipath.length() << " val [" <<
|
LOGDEB("next_doc: sending ipath " << m_ipath.length() << " val [" <<
|
||||||
m_ipath << "]\n");
|
m_ipath << "]\n");
|
||||||
obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
|
obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
|
||||||
}
|
}
|
||||||
if (!m_dfltInputCharset.empty()) {
|
if (!m_dfltInputCharset.empty()) {
|
||||||
obuf << "DflInCS: " << m_dfltInputCharset.length() << "\n"
|
obuf << "DflInCS: " << m_dfltInputCharset.length() << "\n"
|
||||||
<< m_dfltInputCharset;
|
<< m_dfltInputCharset;
|
||||||
}
|
}
|
||||||
obuf << "Mimetype: " << m_mimeType.length() << "\n" << m_mimeType;
|
obuf << "Mimetype: " << m_mimeType.length() << "\n" << m_mimeType;
|
||||||
obuf << "\n";
|
obuf << "\n";
|
||||||
@ -247,10 +247,10 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
eofnow_received = true;
|
eofnow_received = true;
|
||||||
} else if (!stringlowercmp("fileerror:", name)) {
|
} else if (!stringlowercmp("fileerror:", name)) {
|
||||||
LOGDEB("MHExecMultiple: got FILEERROR\n");
|
LOGDEB("MHExecMultiple: got FILEERROR\n");
|
||||||
fileerror_received = true;
|
fileerror_received = true;
|
||||||
} else if (!stringlowercmp("subdocerror:", name)) {
|
} else if (!stringlowercmp("subdocerror:", name)) {
|
||||||
LOGDEB("MHExecMultiple: got SUBDOCERROR\n");
|
LOGDEB("MHExecMultiple: got SUBDOCERROR\n");
|
||||||
subdocerror_received = true;
|
subdocerror_received = true;
|
||||||
} else if (!stringlowercmp("ipath:", name)) {
|
} else if (!stringlowercmp("ipath:", name)) {
|
||||||
ipath = data;
|
ipath = data;
|
||||||
LOGDEB("MHExecMultiple: got ipath [" << data << "]\n");
|
LOGDEB("MHExecMultiple: got ipath [" << data << "]\n");
|
||||||
@ -264,7 +264,11 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
string nm = stringtolower((const string&)name);
|
string nm = stringtolower((const string&)name);
|
||||||
trimstring(nm, ":");
|
trimstring(nm, ":");
|
||||||
LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n");
|
LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n");
|
||||||
m_metaData[nm] += data;
|
auto it = m_metaData.find(nm);
|
||||||
|
if (it == m_metaData.end() ||
|
||||||
|
it->second.find(data) == std::string::npos) {
|
||||||
|
m_metaData[nm] += data;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (loop == 200) {
|
if (loop == 200) {
|
||||||
// ??
|
// ??
|
||||||
@ -279,7 +283,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (subdocerror_received) {
|
if (subdocerror_received) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// It used to be that eof could be signalled just by an empty document, but
|
// It used to be that eof could be signalled just by an empty document, but
|
||||||
@ -291,13 +295,13 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!ipath.empty()) {
|
if (!ipath.empty()) {
|
||||||
// If this has an ipath, it is an internal doc from a
|
// If this has an ipath, it is an internal doc from a
|
||||||
// multi-document file. In this case, either the filter
|
// multi-document file. In this case, either the filter
|
||||||
// supplies the mimetype, or the ipath MUST be a filename-like
|
// supplies the mimetype, or the ipath MUST be a filename-like
|
||||||
// string which we can use to compute a mime type
|
// string which we can use to compute a mime type
|
||||||
m_metaData[cstr_dj_keyipath] = ipath;
|
m_metaData[cstr_dj_keyipath] = ipath;
|
||||||
if (mtype.empty()) {
|
if (mtype.empty()) {
|
||||||
LOGDEB0("MHExecMultiple: no mime type from filter, using ipath "
|
LOGDEB0("MHExecMultiple: no mime type from filter, using ipath "
|
||||||
"for a guess\n");
|
"for a guess\n");
|
||||||
mtype = mimetype(ipath, 0, m_config, false);
|
mtype = mimetype(ipath, 0, m_config, false);
|
||||||
if (mtype.empty()) {
|
if (mtype.empty()) {
|
||||||
@ -313,16 +317,16 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
m_metaData[cstr_dj_keymt] = mtype;
|
m_metaData[cstr_dj_keymt] = mtype;
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
MD5String(m_metaData[cstr_dj_keycontent], md5);
|
MD5String(m_metaData[cstr_dj_keycontent], md5);
|
||||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// "Self" document.
|
// "Self" document.
|
||||||
m_metaData[cstr_dj_keymt] = mtype.empty() ? cstr_texthtml : mtype;
|
m_metaData[cstr_dj_keymt] = mtype.empty() ? cstr_texthtml : mtype;
|
||||||
m_metaData.erase(cstr_dj_keyipath);
|
m_metaData.erase(cstr_dj_keyipath);
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
m_metaData[cstr_dj_keymd5] = file_md5;
|
m_metaData[cstr_dj_keymd5] = file_md5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -339,4 +343,3 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
LOGDEB2("MHExecMultiple: metadata: \n" << metadataAsString());
|
LOGDEB2("MHExecMultiple: metadata: \n" << metadataAsString());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -38,7 +38,7 @@ bool MimeHandlerHtml::set_document_file_impl(const string& mt, const string &fn)
|
|||||||
string reason;
|
string reason;
|
||||||
if (!file_to_string(fn, otext, &reason)) {
|
if (!file_to_string(fn, otext, &reason)) {
|
||||||
LOGERR("textHtmlToDoc: cant read: " << fn << ": " << reason << "\n");
|
LOGERR("textHtmlToDoc: cant read: " << fn << ": " << reason << "\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_filename = fn;
|
m_filename = fn;
|
||||||
return set_document_string(mt, otext);
|
return set_document_string(mt, otext);
|
||||||
@ -51,10 +51,10 @@ bool MimeHandlerHtml::set_document_string_impl(const string& mt,
|
|||||||
m_havedoc = true;
|
m_havedoc = true;
|
||||||
|
|
||||||
if (!m_forPreview) {
|
if (!m_forPreview) {
|
||||||
// We want to compute the md5 now because we may modify m_html later
|
// We want to compute the md5 now because we may modify m_html later
|
||||||
string md5, xmd5;
|
string md5, xmd5;
|
||||||
MD5String(htext, md5);
|
MD5String(htext, md5);
|
||||||
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
m_metaData[cstr_dj_keymd5] = MD5HexPrint(md5, xmd5);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -62,7 +62,7 @@ bool MimeHandlerHtml::set_document_string_impl(const string& mt,
|
|||||||
bool MimeHandlerHtml::next_document()
|
bool MimeHandlerHtml::next_document()
|
||||||
{
|
{
|
||||||
if (m_havedoc == false)
|
if (m_havedoc == false)
|
||||||
return false;
|
return false;
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
// If set_doc(fn), take note of file name.
|
// If set_doc(fn), take note of file name.
|
||||||
string fn = m_filename;
|
string fn = m_filename;
|
||||||
@ -70,12 +70,12 @@ bool MimeHandlerHtml::next_document()
|
|||||||
|
|
||||||
string charset = m_dfltInputCharset;
|
string charset = m_dfltInputCharset;
|
||||||
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
|
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
|
||||||
<< "]\n");
|
<< "]\n");
|
||||||
// Override default input charset if someone took care to set one:
|
// Override default input charset if someone took care to set one:
|
||||||
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
||||||
if (it != m_metaData.end() && !it->second.empty()) {
|
if (it != m_metaData.end() && !it->second.empty()) {
|
||||||
charset = it->second;
|
charset = it->second;
|
||||||
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<
|
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<
|
||||||
charset << "]\n");
|
charset << "]\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,78 +88,78 @@ bool MimeHandlerHtml::next_document()
|
|||||||
|
|
||||||
MyHtmlParser result;
|
MyHtmlParser result;
|
||||||
for (int pass = 0; pass < 2; pass++) {
|
for (int pass = 0; pass < 2; pass++) {
|
||||||
string transcoded;
|
string transcoded;
|
||||||
LOGDEB("Html::mkDoc: pass " << pass << "\n");
|
LOGDEB("Html::mkDoc: pass " << pass << "\n");
|
||||||
MyHtmlParser p;
|
MyHtmlParser p;
|
||||||
|
|
||||||
// Try transcoding. If it fails, use original text.
|
// Try transcoding. If it fails, use original text.
|
||||||
int ecnt;
|
int ecnt;
|
||||||
if (!transcode(m_html, transcoded, charset, "UTF-8", &ecnt)) {
|
if (!transcode(m_html, transcoded, charset, "UTF-8", &ecnt)) {
|
||||||
LOGDEB("textHtmlToDoc: transcode failed from cs '" <<
|
LOGDEB("textHtmlToDoc: transcode failed from cs '" <<
|
||||||
charset << "' to UTF-8 for[" << (fn.empty()?"unknown":fn) <<
|
charset << "' to UTF-8 for[" << (fn.empty()?"unknown":fn) <<
|
||||||
"]");
|
"]");
|
||||||
transcoded = m_html;
|
transcoded = m_html;
|
||||||
// We don't know the charset, at all
|
// We don't know the charset, at all
|
||||||
p.reset_charsets();
|
p.reset_charsets();
|
||||||
charset.clear();
|
charset.clear();
|
||||||
} else {
|
} else {
|
||||||
if (ecnt) {
|
if (ecnt) {
|
||||||
if (pass == 0) {
|
if (pass == 0) {
|
||||||
LOGDEB("textHtmlToDoc: init transcode had " << ecnt <<
|
LOGDEB("textHtmlToDoc: init transcode had " << ecnt <<
|
||||||
" errors for ["<<(fn.empty()?"unknown":fn)<< "]\n");
|
" errors for ["<<(fn.empty()?"unknown":fn)<< "]\n");
|
||||||
} else {
|
} else {
|
||||||
LOGERR("textHtmlToDoc: final transcode had " << ecnt <<
|
LOGERR("textHtmlToDoc: final transcode had " << ecnt <<
|
||||||
" errors for ["<< (fn.empty()?"unknown":fn)<< "]\n");
|
" errors for ["<< (fn.empty()?"unknown":fn)<< "]\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// charset has the putative source charset, transcoded is now
|
// charset has the putative source charset, transcoded is now
|
||||||
// in utf-8
|
// in utf-8
|
||||||
p.set_charsets(charset, "utf-8");
|
p.set_charsets(charset, "utf-8");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
p.parse_html(transcoded);
|
p.parse_html(transcoded);
|
||||||
// No exception: ok? But throw true to use the same
|
// No exception: ok? But throw true to use the same
|
||||||
// code path as if an exception had been thrown by parse_html
|
// code path as if an exception had been thrown by parse_html
|
||||||
throw true;
|
throw true;
|
||||||
break;
|
break;
|
||||||
} catch (bool diag) {
|
} catch (bool diag) {
|
||||||
result = p;
|
result = p;
|
||||||
if (diag == true) {
|
if (diag == true) {
|
||||||
// Parser throws true at end of text. ok
|
// Parser throws true at end of text. ok
|
||||||
|
|
||||||
if (m_forPreview) {
|
if (m_forPreview) {
|
||||||
// Save the html text
|
// Save the html text
|
||||||
m_html = transcoded;
|
m_html = transcoded;
|
||||||
// In many cases, we need to change the charset decl,
|
// In many cases, we need to change the charset decl,
|
||||||
// because the file was transcoded. It seems that just
|
// because the file was transcoded. It seems that just
|
||||||
// inserting one is enough (only the 1st one seems to
|
// inserting one is enough (only the 1st one seems to
|
||||||
// be used by browsers/qtextedit).
|
// be used by browsers/qtextedit).
|
||||||
string::size_type idx = m_html.find("<head>");
|
string::size_type idx = m_html.find("<head>");
|
||||||
if (idx == string::npos)
|
if (idx == string::npos)
|
||||||
idx = m_html.find("<HEAD>");
|
idx = m_html.find("<HEAD>");
|
||||||
if (idx != string::npos)
|
if (idx != string::npos)
|
||||||
m_html.replace(idx+6, 0,
|
m_html.replace(idx+6, 0,
|
||||||
"<meta http-equiv=\"content-type\" "
|
"<meta http-equiv=\"content-type\" "
|
||||||
"content=\"text/html; charset=utf-8\">");
|
"content=\"text/html; charset=utf-8\">");
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB("textHtmlToDoc: charset [" << charset << "] doc charset ["<<
|
LOGDEB("textHtmlToDoc: charset [" << charset << "] doc charset ["<<
|
||||||
result.get_charset() << "]\n");
|
result.get_charset() << "]\n");
|
||||||
if (!result.get_charset().empty() &&
|
if (!result.get_charset().empty() &&
|
||||||
!samecharset(result.get_charset(), result.fromcharset)) {
|
!samecharset(result.get_charset(), result.fromcharset)) {
|
||||||
LOGDEB("textHtmlToDoc: reparse for charsets\n");
|
LOGDEB("textHtmlToDoc: reparse for charsets\n");
|
||||||
// Set the origin charset as specified in document before
|
// Set the origin charset as specified in document before
|
||||||
// transcoding again
|
// transcoding again
|
||||||
charset = result.get_charset();
|
charset = result.get_charset();
|
||||||
} else {
|
} else {
|
||||||
LOGERR("textHtmlToDoc:: error: non charset exception\n");
|
LOGERR("textHtmlToDoc:: error: non charset exception\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
m_metaData[cstr_dj_keyorigcharset] = result.get_charset();
|
m_metaData[cstr_dj_keyorigcharset] = result.get_charset();
|
||||||
@ -168,13 +168,13 @@ bool MimeHandlerHtml::next_document()
|
|||||||
// Avoid setting empty values which would crush ones possibly inherited
|
// Avoid setting empty values which would crush ones possibly inherited
|
||||||
// from parent (if we're an attachment)
|
// from parent (if we're an attachment)
|
||||||
if (!result.dmtime.empty())
|
if (!result.dmtime.empty())
|
||||||
m_metaData[cstr_dj_keymd] = result.dmtime;
|
m_metaData[cstr_dj_keymd] = result.dmtime;
|
||||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
|
||||||
for (map<string,string>::const_iterator it = result.meta.begin();
|
for (const auto& entry : result.meta) {
|
||||||
it != result.meta.end(); it++) {
|
if (!entry.second.empty()) {
|
||||||
if (!it->second.empty())
|
m_metaData[entry.first] = entry.second;
|
||||||
m_metaData[it->first] = it->second;
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -161,19 +161,19 @@ map<string, string> my_named_ents;
|
|||||||
class NamedEntsInitializer {
|
class NamedEntsInitializer {
|
||||||
public:
|
public:
|
||||||
NamedEntsInitializer()
|
NamedEntsInitializer()
|
||||||
{
|
{
|
||||||
for (int i = 0;;) {
|
for (int i = 0;;) {
|
||||||
const char *ent;
|
const char *ent;
|
||||||
const char *val;
|
const char *val;
|
||||||
ent = epairs[i++];
|
ent = epairs[i++];
|
||||||
if (ent == 0)
|
if (ent == 0)
|
||||||
break;
|
break;
|
||||||
val = epairs[i++];
|
val = epairs[i++];
|
||||||
if (val == 0)
|
if (val == 0)
|
||||||
break;
|
break;
|
||||||
my_named_ents[string(ent)] = val;
|
my_named_ents[string(ent)] = val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
static NamedEntsInitializer namedEntsInitializerInstance;
|
static NamedEntsInitializer namedEntsInitializerInstance;
|
||||||
|
|
||||||
@ -198,58 +198,58 @@ void MyHtmlParser::decode_entities(string &s)
|
|||||||
// so don't do it. If charset known, caller has converted text to utf-8,
|
// so don't do it. If charset known, caller has converted text to utf-8,
|
||||||
// and this is also how we translate entities
|
// and this is also how we translate entities
|
||||||
// if (tocharset != "utf-8")
|
// if (tocharset != "utf-8")
|
||||||
// return;
|
// return;
|
||||||
|
|
||||||
// We need a const_iterator version of s.end() - otherwise the
|
// We need a const_iterator version of s.end() - otherwise the
|
||||||
// find() and find_if() templates don't work...
|
// find() and find_if() templates don't work...
|
||||||
string::const_iterator amp = s.begin(), s_end = s.end();
|
string::const_iterator amp = s.begin(), s_end = s.end();
|
||||||
while ((amp = find(amp, s_end, '&')) != s_end) {
|
while ((amp = find(amp, s_end, '&')) != s_end) {
|
||||||
unsigned int val = 0;
|
unsigned int val = 0;
|
||||||
string::const_iterator end, p = amp + 1;
|
string::const_iterator end, p = amp + 1;
|
||||||
string subs;
|
string subs;
|
||||||
if (p != s_end && *p == '#') {
|
if (p != s_end && *p == '#') {
|
||||||
p++;
|
p++;
|
||||||
if (p != s_end && (*p == 'x' || *p == 'X')) {
|
if (p != s_end && (*p == 'x' || *p == 'X')) {
|
||||||
// hex
|
// hex
|
||||||
p++;
|
p++;
|
||||||
end = find_if(p, s_end, p_notxdigit);
|
end = find_if(p, s_end, p_notxdigit);
|
||||||
sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val);
|
sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val);
|
||||||
} else {
|
} else {
|
||||||
// number
|
// number
|
||||||
end = find_if(p, s_end, p_notdigit);
|
end = find_if(p, s_end, p_notdigit);
|
||||||
val = atoi(s.substr(p - s.begin(), end - p).c_str());
|
val = atoi(s.substr(p - s.begin(), end - p).c_str());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
end = find_if(p, s_end, p_notalnum);
|
end = find_if(p, s_end, p_notalnum);
|
||||||
string code = s.substr(p - s.begin(), end - p);
|
string code = s.substr(p - s.begin(), end - p);
|
||||||
map<string, string>::const_iterator i;
|
map<string, string>::const_iterator i;
|
||||||
i = my_named_ents.find(code);
|
i = my_named_ents.find(code);
|
||||||
if (i != my_named_ents.end())
|
if (i != my_named_ents.end())
|
||||||
subs = i->second;
|
subs = i->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end < s_end && *end == ';')
|
if (end < s_end && *end == ';')
|
||||||
end++;
|
end++;
|
||||||
|
|
||||||
if (val) {
|
if (val) {
|
||||||
// The code is the code position for a unicode char. We need
|
// The code is the code position for a unicode char. We need
|
||||||
// to translate it to an utf-8 string.
|
// to translate it to an utf-8 string.
|
||||||
string utf16be;
|
string utf16be;
|
||||||
utf16be += char(val / 256);
|
utf16be += char(val / 256);
|
||||||
utf16be += char(val % 256);
|
utf16be += char(val % 256);
|
||||||
transcode(utf16be, subs, "UTF-16BE", "UTF-8");
|
transcode(utf16be, subs, "UTF-16BE", "UTF-8");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subs.length() > 0) {
|
if (subs.length() > 0) {
|
||||||
string::size_type amp_pos = amp - s.begin();
|
string::size_type amp_pos = amp - s.begin();
|
||||||
s.replace(amp_pos, end - amp, subs);
|
s.replace(amp_pos, end - amp, subs);
|
||||||
s_end = s.end();
|
s_end = s.end();
|
||||||
// We've modified the string, so the iterators are no longer
|
// We've modified the string, so the iterators are no longer
|
||||||
// valid...
|
// valid...
|
||||||
amp = s.begin() + amp_pos + subs.length();
|
amp = s.begin() + amp_pos + subs.length();
|
||||||
} else {
|
} else {
|
||||||
amp = end;
|
amp = end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,35 +265,35 @@ MyHtmlParser::process_text(const string &text)
|
|||||||
CancelCheck::instance().checkCancel();
|
CancelCheck::instance().checkCancel();
|
||||||
|
|
||||||
if (!in_script_tag && !in_style_tag) {
|
if (!in_script_tag && !in_style_tag) {
|
||||||
if (in_title_tag) {
|
if (in_title_tag) {
|
||||||
titledump += text;
|
titledump += text;
|
||||||
} else if (!in_pre_tag) {
|
} else if (!in_pre_tag) {
|
||||||
string::size_type b = 0;
|
string::size_type b = 0;
|
||||||
bool only_space = true;
|
bool only_space = true;
|
||||||
while ((b = text.find_first_not_of(WHITESPACE, b)) != string::npos) {
|
while ((b = text.find_first_not_of(WHITESPACE, b)) != string::npos) {
|
||||||
only_space = false;
|
only_space = false;
|
||||||
// If space specifically needed or chunk begins with
|
// If space specifically needed or chunk begins with
|
||||||
// whitespace, add exactly one space
|
// whitespace, add exactly one space
|
||||||
if (pending_space || b != 0) {
|
if (pending_space || b != 0) {
|
||||||
dump += ' ';
|
dump += ' ';
|
||||||
}
|
}
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
string::size_type e = text.find_first_of(WHITESPACE, b);
|
string::size_type e = text.find_first_of(WHITESPACE, b);
|
||||||
if (e == string::npos) {
|
if (e == string::npos) {
|
||||||
dump += text.substr(b);
|
dump += text.substr(b);
|
||||||
pending_space = false;
|
pending_space = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
dump += text.substr(b, e - b);
|
dump += text.substr(b, e - b);
|
||||||
b = e + 1;
|
b = e + 1;
|
||||||
}
|
}
|
||||||
if (only_space)
|
if (only_space)
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
} else {
|
} else {
|
||||||
if (pending_space)
|
if (pending_space)
|
||||||
dump += ' ';
|
dump += ' ';
|
||||||
dump += text;
|
dump += text;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -305,175 +305,186 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
cout << "TAG: " << tag << ": " << endl;
|
cout << "TAG: " << tag << ": " << endl;
|
||||||
map<string, string>::const_iterator x;
|
map<string, string>::const_iterator x;
|
||||||
for (x = p.begin(); x != p.end(); x++) {
|
for (x = p.begin(); x != p.end(); x++) {
|
||||||
cout << " " << x->first << " -> '" << x->second << "'" << endl;
|
cout << " " << x->first << " -> '" << x->second << "'" << endl;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (tag.empty()) return true;
|
if (tag.empty()) return true;
|
||||||
switch (tag[0]) {
|
switch (tag[0]) {
|
||||||
case 'a':
|
case 'a':
|
||||||
if (tag == "address") pending_space = true;
|
if (tag == "address") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'b':
|
case 'b':
|
||||||
// body: some bad docs have several opening body tags and
|
// body: some bad docs have several opening body tags and
|
||||||
// even text before the body is displayed by Opera and
|
// even text before the body is displayed by Opera and
|
||||||
// Firefox. We used to reset the dump each time we saw a
|
// Firefox. We used to reset the dump each time we saw a
|
||||||
// body tag, but I can't see any reason to do so.
|
// body tag, but I can't see any reason to do so.
|
||||||
|
|
||||||
if (tag == "blockquote" || tag == "br") {
|
if (tag == "blockquote" || tag == "br") {
|
||||||
dump += '\n';
|
dump += '\n';
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'c':
|
case 'c':
|
||||||
if (tag == "center") pending_space = true;
|
if (tag == "center") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'd':
|
case 'd':
|
||||||
if (tag == "dd" || tag == "dir" || tag == "div" || tag == "dl" ||
|
if (tag == "dd" || tag == "dir" || tag == "div" || tag == "dl" ||
|
||||||
tag == "dt") pending_space = true;
|
tag == "dt") pending_space = true;
|
||||||
if (tag == "dt")
|
if (tag == "dt")
|
||||||
dump += '\n';
|
dump += '\n';
|
||||||
break;
|
break;
|
||||||
case 'e':
|
case 'e':
|
||||||
if (tag == "embed") pending_space = true;
|
if (tag == "embed") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
if (tag == "fieldset" || tag == "form") pending_space = true;
|
if (tag == "fieldset" || tag == "form") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'h':
|
case 'h':
|
||||||
// hr, and h1, ..., h6
|
// hr, and h1, ..., h6
|
||||||
if (tag.length() == 2 && strchr("r123456", tag[1])) {
|
if (tag.length() == 2 && strchr("r123456", tag[1])) {
|
||||||
dump += '\n';
|
dump += '\n';
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'i':
|
case 'i':
|
||||||
if (tag == "iframe" || tag == "img" || tag == "isindex" ||
|
if (tag == "iframe" || tag == "img" || tag == "isindex" ||
|
||||||
tag == "input") pending_space = true;
|
tag == "input") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'k':
|
case 'k':
|
||||||
if (tag == "keygen") pending_space = true;
|
if (tag == "keygen") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'l':
|
case 'l':
|
||||||
if (tag == "legend" || tag == "li" || tag == "listing") {
|
if (tag == "legend" || tag == "li" || tag == "listing") {
|
||||||
dump += '\n';
|
dump += '\n';
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'm':
|
case 'm':
|
||||||
if (tag == "meta") {
|
if (tag == "meta") {
|
||||||
string content;
|
string content;
|
||||||
if (get_parameter(cstr_html_content, content)) {
|
if (get_parameter(cstr_html_content, content)) {
|
||||||
string name;
|
string name;
|
||||||
if (get_parameter("name", name)) {
|
if (get_parameter("name", name)) {
|
||||||
lowercase_term(name);
|
lowercase_term(name);
|
||||||
if (name == "date") {
|
if (name == "date") {
|
||||||
// Specific to Recoll filters.
|
// Specific to Recoll filters.
|
||||||
decode_entities(content);
|
decode_entities(content);
|
||||||
struct tm tm;
|
struct tm tm;
|
||||||
memset(&tm, 0, sizeof(tm));
|
memset(&tm, 0, sizeof(tm));
|
||||||
if (strptime(content.c_str(),
|
if (strptime(content.c_str(),
|
||||||
" %Y-%m-%d %H:%M:%S ", &tm) ||
|
" %Y-%m-%d %H:%M:%S ", &tm) ||
|
||||||
strptime(content.c_str(),
|
strptime(content.c_str(),
|
||||||
"%Y-%m-%dT%H:%M:%S", &tm)
|
"%Y-%m-%dT%H:%M:%S", &tm)
|
||||||
) {
|
) {
|
||||||
char ascuxtime[100];
|
char ascuxtime[100];
|
||||||
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
||||||
dmtime = ascuxtime;
|
dmtime = ascuxtime;
|
||||||
}
|
}
|
||||||
} else if (name == "robots") {
|
} else if (name == "robots") {
|
||||||
} else {
|
} else {
|
||||||
string markup;
|
string markup;
|
||||||
bool ishtml = false;
|
bool ishtml = false;
|
||||||
if (get_parameter("markup", markup)) {
|
if (get_parameter("markup", markup)) {
|
||||||
if (!stringlowercmp("html", markup)) {
|
if (!stringlowercmp("html", markup)) {
|
||||||
ishtml = true;
|
ishtml = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!meta[name].empty())
|
decode_entities(content);
|
||||||
meta[name] += ' ';
|
// Set metadata field, avoid appending
|
||||||
decode_entities(content);
|
// multiple identical instances.
|
||||||
meta[name] += content;
|
auto it = meta.find(name);
|
||||||
if (ishtml &&
|
if (it == meta.end() || it->second.find(content) ==
|
||||||
meta[name].compare(0, cstr_fldhtm.size(),
|
string::npos) {
|
||||||
cstr_fldhtm)) {
|
if (it != meta.end()) {
|
||||||
meta[name].insert(0, cstr_fldhtm);
|
it->second += ' ';
|
||||||
}
|
it->second += content;
|
||||||
}
|
} else {
|
||||||
}
|
meta[name] = content;
|
||||||
string hdr;
|
}
|
||||||
if (get_parameter("http-equiv", hdr)) {
|
}
|
||||||
lowercase_term(hdr);
|
if (ishtml &&
|
||||||
if (hdr == "content-type") {
|
meta[name].compare(0, cstr_fldhtm.size(),
|
||||||
MimeHeaderValue p;
|
cstr_fldhtm)) {
|
||||||
parseMimeHeaderValue(content, p);
|
meta[name].insert(0, cstr_fldhtm);
|
||||||
map<string, string>::const_iterator k;
|
}
|
||||||
if ((k = p.params.find(cstr_html_charset)) !=
|
}
|
||||||
p.params.end()) {
|
}
|
||||||
charset = k->second;
|
string hdr;
|
||||||
if (!charset.empty() &&
|
if (get_parameter("http-equiv", hdr)) {
|
||||||
!samecharset(charset, fromcharset)) {
|
lowercase_term(hdr);
|
||||||
LOGDEB1("Doc http-equiv charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" );
|
if (hdr == "content-type") {
|
||||||
throw false;
|
MimeHeaderValue p;
|
||||||
}
|
parseMimeHeaderValue(content, p);
|
||||||
}
|
map<string, string>::const_iterator k;
|
||||||
}
|
if ((k = p.params.find(cstr_html_charset)) !=
|
||||||
}
|
p.params.end()) {
|
||||||
}
|
charset = k->second;
|
||||||
string newcharset;
|
if (!charset.empty() &&
|
||||||
if (get_parameter(cstr_html_charset, newcharset)) {
|
!samecharset(charset, fromcharset)) {
|
||||||
// HTML5 added: <meta charset="...">
|
LOGDEB1("Doc http-equiv charset '" << charset <<
|
||||||
lowercase_term(newcharset);
|
"' differs from dir deflt '" <<
|
||||||
charset = newcharset;
|
fromcharset << "'\n");
|
||||||
if (!charset.empty() &&
|
throw false;
|
||||||
!samecharset(charset, fromcharset)) {
|
}
|
||||||
LOGDEB1("Doc html5 charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" );
|
}
|
||||||
throw false;
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
string newcharset;
|
||||||
} else if (tag == "marquee" || tag == "menu" || tag == "multicol")
|
if (get_parameter(cstr_html_charset, newcharset)) {
|
||||||
pending_space = true;
|
// HTML5 added: <meta charset="...">
|
||||||
break;
|
lowercase_term(newcharset);
|
||||||
case 'o':
|
charset = newcharset;
|
||||||
if (tag == "ol" || tag == "option") pending_space = true;
|
if (!charset.empty() &&
|
||||||
break;
|
!samecharset(charset, fromcharset)) {
|
||||||
case 'p':
|
LOGDEB1("Doc html5 charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" );
|
||||||
if (tag == "p" || tag == "plaintext") {
|
throw false;
|
||||||
dump += '\n';
|
}
|
||||||
pending_space = true;
|
}
|
||||||
} else if (tag == "pre") {
|
break;
|
||||||
in_pre_tag = true;
|
} else if (tag == "marquee" || tag == "menu" || tag == "multicol")
|
||||||
dump += '\n';
|
pending_space = true;
|
||||||
pending_space = true;
|
break;
|
||||||
}
|
case 'o':
|
||||||
break;
|
if (tag == "ol" || tag == "option") pending_space = true;
|
||||||
case 'q':
|
break;
|
||||||
if (tag == "q") pending_space = true;
|
case 'p':
|
||||||
break;
|
if (tag == "p" || tag == "plaintext") {
|
||||||
case 's':
|
dump += '\n';
|
||||||
if (tag == "style") {
|
pending_space = true;
|
||||||
in_style_tag = true;
|
} else if (tag == "pre") {
|
||||||
break;
|
in_pre_tag = true;
|
||||||
} else if (tag == "script") {
|
dump += '\n';
|
||||||
in_script_tag = true;
|
pending_space = true;
|
||||||
break;
|
}
|
||||||
} else if (tag == "select")
|
break;
|
||||||
pending_space = true;
|
case 'q':
|
||||||
break;
|
if (tag == "q") pending_space = true;
|
||||||
case 't':
|
break;
|
||||||
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
case 's':
|
||||||
tag == "th") {
|
if (tag == "style") {
|
||||||
pending_space = true;
|
in_style_tag = true;
|
||||||
} else if (tag == "title") {
|
break;
|
||||||
in_title_tag = true;
|
} else if (tag == "script") {
|
||||||
}
|
in_script_tag = true;
|
||||||
break;
|
break;
|
||||||
case 'u':
|
} else if (tag == "select")
|
||||||
if (tag == "ul") pending_space = true;
|
pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 't':
|
||||||
if (tag == "xmp") pending_space = true;
|
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
||||||
break;
|
tag == "th") {
|
||||||
|
pending_space = true;
|
||||||
|
} else if (tag == "title") {
|
||||||
|
in_title_tag = true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'u':
|
||||||
|
if (tag == "ul") pending_space = true;
|
||||||
|
break;
|
||||||
|
case 'x':
|
||||||
|
if (tag == "xmp") pending_space = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -484,85 +495,85 @@ MyHtmlParser::closing_tag(const string &tag)
|
|||||||
LOGDEB2("closing_tag: [" << (tag) << "]\n" );
|
LOGDEB2("closing_tag: [" << (tag) << "]\n" );
|
||||||
if (tag.empty()) return true;
|
if (tag.empty()) return true;
|
||||||
switch (tag[0]) {
|
switch (tag[0]) {
|
||||||
case 'a':
|
case 'a':
|
||||||
if (tag == "address") pending_space = true;
|
if (tag == "address") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'b':
|
case 'b':
|
||||||
// body: We used to signal and end of doc here by returning
|
// body: We used to signal and end of doc here by returning
|
||||||
// false but the browsers just ignore body and html
|
// false but the browsers just ignore body and html
|
||||||
// closing tags if there is further text, so it seems right
|
// closing tags if there is further text, so it seems right
|
||||||
// to do the same
|
// to do the same
|
||||||
|
|
||||||
if (tag == "blockquote" || tag == "br") pending_space = true;
|
if (tag == "blockquote" || tag == "br") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'c':
|
case 'c':
|
||||||
if (tag == "center") pending_space = true;
|
if (tag == "center") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'd':
|
case 'd':
|
||||||
if (tag == "dd" || tag == "dir" || tag == "div" || tag == "dl" ||
|
if (tag == "dd" || tag == "dir" || tag == "div" || tag == "dl" ||
|
||||||
tag == "dt") pending_space = true;
|
tag == "dt") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
if (tag == "fieldset" || tag == "form") pending_space = true;
|
if (tag == "fieldset" || tag == "form") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'h':
|
case 'h':
|
||||||
// hr, and h1, ..., h6
|
// hr, and h1, ..., h6
|
||||||
if (tag.length() == 2 && strchr("r123456", tag[1]))
|
if (tag.length() == 2 && strchr("r123456", tag[1]))
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'i':
|
case 'i':
|
||||||
if (tag == "iframe") pending_space = true;
|
if (tag == "iframe") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'l':
|
case 'l':
|
||||||
if (tag == "legend" || tag == "li" || tag == "listing")
|
if (tag == "legend" || tag == "li" || tag == "listing")
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'm':
|
case 'm':
|
||||||
if (tag == "marquee" || tag == "menu") pending_space = true;
|
if (tag == "marquee" || tag == "menu") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'o':
|
case 'o':
|
||||||
if (tag == "ol" || tag == "option") pending_space = true;
|
if (tag == "ol" || tag == "option") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'p':
|
case 'p':
|
||||||
if (tag == "p") {
|
if (tag == "p") {
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
} else if (tag == "pre") {
|
} else if (tag == "pre") {
|
||||||
pending_space = true;
|
pending_space = true;
|
||||||
in_pre_tag = false;
|
in_pre_tag = false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'q':
|
case 'q':
|
||||||
if (tag == "q") pending_space = true;
|
if (tag == "q") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
if (tag == "style") {
|
if (tag == "style") {
|
||||||
in_style_tag = false;
|
in_style_tag = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (tag == "script") {
|
if (tag == "script") {
|
||||||
in_script_tag = false;
|
in_script_tag = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (tag == "select") pending_space = true;
|
if (tag == "select") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
if (tag == "title") {
|
if (tag == "title") {
|
||||||
in_title_tag = false;
|
in_title_tag = false;
|
||||||
if (meta.find("title") == meta.end()|| meta["title"].empty()) {
|
if (meta.find("title") == meta.end()|| meta["title"].empty()) {
|
||||||
meta["title"] = titledump;
|
meta["title"] = titledump;
|
||||||
titledump.clear();
|
titledump.clear();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
||||||
tag == "th") pending_space = true;
|
tag == "th") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'u':
|
case 'u':
|
||||||
if (tag == "ul") pending_space = true;
|
if (tag == "ul") pending_space = true;
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
if (tag == "xmp") pending_space = true;
|
if (tag == "xmp") pending_space = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user