use common method when concatenating multiple values for a metadata element. Use a comma as separator

This commit is contained in:
Jean-Francois Dockes 2020-08-11 11:39:22 +02:00
parent fd0cf698a1
commit 13333e6512
6 changed files with 40 additions and 37 deletions

View File

@ -32,6 +32,7 @@ using namespace std;
#include "rclconfig.h"
#include "mimetype.h"
#include "idfile.h"
#include "rclutil.h"
#include <sys/types.h>
#include "safesyswait.h"
@ -266,11 +267,7 @@ bool MimeHandlerExecMultiple::next_document()
string nm = stringtolower((const string&)name);
trimstring(nm, ":");
LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n");
auto it = m_metaData.find(nm);
if (it == m_metaData.end() ||
it->second.find(data) == std::string::npos) {
m_metaData[nm] += data;
}
addmeta(m_metaData, nm, data);
}
if (loop == 200) {
// ??

View File

@ -73,7 +73,7 @@ bool MimeHandlerHtml::next_document()
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
<< "]\n");
// Override default input charset if someone took care to set one:
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
const auto it = m_metaData.find(cstr_dj_keycharset);
if (it != m_metaData.end() && !it->second.empty()) {
charset = it->second;
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<

View File

@ -39,6 +39,7 @@
#include "cancelcheck.h"
#include "log.h"
#include "transcode.h"
#include "rclutil.h"
static const string cstr_html_charset("charset");
static const string cstr_html_content("content");
@ -193,7 +194,7 @@ MyHtmlParser::MyHtmlParser()
void MyHtmlParser::decode_entities(string &s)
{
LOGDEB2("MyHtmlParser::decode_entities\n" );
LOGDEB2("MyHtmlParser::decode_entities\n");
// This has no meaning whatsoever if the character encoding is unknown,
// so don't do it. If charset known, caller has converted text to utf-8,
// and this is also how we translate entities
@ -261,7 +262,10 @@ void MyHtmlParser::decode_entities(string &s)
void
MyHtmlParser::process_text(const string &text)
{
LOGDEB2("process_text: title " << (in_title_tag) << " script " << (in_script_tag) << " style " << (in_style_tag) << " pre " << (in_pre_tag) << " pending_space " << (pending_space) << " txt [" << (text) << "]\n" );
LOGDEB2("process_text: title " << in_title_tag << " script " <<
in_script_tag << " style " << in_style_tag << " pre " <<
in_pre_tag << " pending_space " << pending_space << " txt [" <<
text << "]\n");
CancelCheck::instance().checkCancel();
if (!in_script_tag && !in_style_tag) {
@ -300,7 +304,7 @@ MyHtmlParser::process_text(const string &text)
bool
MyHtmlParser::opening_tag(const string &tag)
{
LOGDEB2("opening_tag: [" << (tag) << "]\n" );
LOGDEB2("opening_tag: [" << tag << "]\n");
#if 0
cout << "TAG: " << tag << ": " << endl;
map<string, string>::const_iterator x;
@ -390,23 +394,12 @@ MyHtmlParser::opening_tag(const string &tag)
}
}
decode_entities(content);
// Set metadata field, avoid appending
// multiple identical instances.
auto it = meta.find(name);
if (it == meta.end() || it->second.find(content) ==
string::npos) {
if (it != meta.end()) {
it->second += ' ';
it->second += content;
} else {
meta[name] = content;
}
}
if (ishtml &&
meta[name].compare(0, cstr_fldhtm.size(),
cstr_fldhtm)) {
meta[name].insert(0, cstr_fldhtm);
content.compare(0, cstr_fldhtm.size(),
cstr_fldhtm)) {
content.insert(0, cstr_fldhtm);
}
addmeta(meta, name, content);
}
}
string hdr;
@ -437,7 +430,8 @@ MyHtmlParser::opening_tag(const string &tag)
charset = newcharset;
if (!charset.empty() &&
!samecharset(charset, fromcharset)) {
LOGDEB1("Doc html5 charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" );
LOGDEB1("Doc html5 charset '" << charset <<
"' differs from dir deflt '"<<fromcharset <<"'\n");
throw false;
}
}
@ -492,7 +486,7 @@ MyHtmlParser::opening_tag(const string &tag)
bool
MyHtmlParser::closing_tag(const string &tag)
{
LOGDEB2("closing_tag: [" << (tag) << "]\n" );
LOGDEB2("closing_tag: [" << tag << "]\n");
if (tag.empty()) return true;
switch (tag[0]) {
case 'a':

View File

@ -22,6 +22,7 @@
#include <vector>
#include "smallut.h"
#include "rclutil.h"
namespace Rcl {
@ -191,17 +192,7 @@ public:
// Create entry or append text to existing entry.
bool addmeta(const std::string& nm, const std::string& value) {
auto mit = meta.find(nm);
if (mit == meta.end()) {
meta[nm] = value;
} else if (mit->second.empty()) {
mit->second = value;
} else {
// It may happen that the same attr exists several times
// in the internfile stack. Avoid duplicating values.
if (mit->second != value)
mit->second += std::string(" - ") + value;
}
::addmeta(meta, nm, value);
return true;
}

View File

@ -66,6 +66,24 @@ template void map_ss_cp_noshr<map<string, string> >(
template void map_ss_cp_noshr<unordered_map<string, string> >(
unordered_map<string,string> s, unordered_map<string,string>*d);
// Add data to metadata field, store multiple values as CSV, avoid
// appending multiple identical instances.
template <class T> void addmeta(
T& store, const string& nm, const string& value)
{
auto it = store.find(nm);
if (it == store.end() || it->second.empty()) {
store[nm] = value;
} else if (it->second.find(value) == string::npos) {
store[nm] += ',';
store[nm] += value;
}
}
template void addmeta<map<string, string>>(
map<string, string>&, const string&, const string&);
template void addmeta<unordered_map<string, string>>(
unordered_map<string, string>&, const string&, const string&);
#ifdef _WIN32
static bool path_hasdrive(const string& s)
{

View File

@ -115,5 +115,8 @@ extern bool thumbPathForUrl(const std::string& url, int size,
// string data (to pass to other thread):
template <class T> void map_ss_cp_noshr(T s, T *d);
// Set or extend metadata field. We store the data as CSV
template <class T> void addmeta(T& store, const std::string& nm,
const std::string& value);
#endif /* _RCLUTIL_H_INCLUDED_ */