html/xml meta: avoid appending a value that is already present in the string

This commit is contained in:
Jean-Francois Dockes 2020-01-30 08:37:46 +01:00
parent 552510db06
commit e5af1651fa
3 changed files with 454 additions and 440 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2005 J.F.Dockes /* Copyright (C) 2005 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -264,8 +264,12 @@ bool MimeHandlerExecMultiple::next_document()
string nm = stringtolower((const string&)name); string nm = stringtolower((const string&)name);
trimstring(nm, ":"); trimstring(nm, ":");
LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n"); LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n");
auto it = m_metaData.find(nm);
if (it == m_metaData.end() ||
it->second.find(data) == std::string::npos) {
m_metaData[nm] += data; m_metaData[nm] += data;
} }
}
if (loop == 200) { if (loop == 200) {
// ?? // ??
LOGERR("MHExecMultiple: handler sent more than 200 attributes\n"); LOGERR("MHExecMultiple: handler sent more than 200 attributes\n");
@ -339,4 +343,3 @@ bool MimeHandlerExecMultiple::next_document()
LOGDEB2("MHExecMultiple: metadata: \n" << metadataAsString()); LOGDEB2("MHExecMultiple: metadata: \n" << metadataAsString());
return true; return true;
} }

View File

@ -171,10 +171,10 @@ bool MimeHandlerHtml::next_document()
m_metaData[cstr_dj_keymd] = result.dmtime; m_metaData[cstr_dj_keymd] = result.dmtime;
m_metaData[cstr_dj_keymt] = cstr_textplain; m_metaData[cstr_dj_keymt] = cstr_textplain;
for (map<string,string>::const_iterator it = result.meta.begin(); for (const auto& entry : result.meta) {
it != result.meta.end(); it++) { if (!entry.second.empty()) {
if (!it->second.empty()) m_metaData[entry.first] = entry.second;
m_metaData[it->first] = it->second; }
} }
return true; return true;
} }

View File

@ -389,10 +389,19 @@ MyHtmlParser::opening_tag(const string &tag)
ishtml = true; ishtml = true;
} }
} }
if (!meta[name].empty())
meta[name] += ' ';
decode_entities(content); decode_entities(content);
meta[name] += content; // Set metadata field, avoid appending
// multiple identical instances.
auto it = meta.find(name);
if (it == meta.end() || it->second.find(content) ==
string::npos) {
if (it != meta.end()) {
it->second += ' ';
it->second += content;
} else {
meta[name] = content;
}
}
if (ishtml && if (ishtml &&
meta[name].compare(0, cstr_fldhtm.size(), meta[name].compare(0, cstr_fldhtm.size(),
cstr_fldhtm)) { cstr_fldhtm)) {
@ -412,7 +421,9 @@ MyHtmlParser::opening_tag(const string &tag)
charset = k->second; charset = k->second;
if (!charset.empty() && if (!charset.empty() &&
!samecharset(charset, fromcharset)) { !samecharset(charset, fromcharset)) {
LOGDEB1("Doc http-equiv charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" ); LOGDEB1("Doc http-equiv charset '" << charset <<
"' differs from dir deflt '" <<
fromcharset << "'\n");
throw false; throw false;
} }
} }