diff --git a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp index 397f759b..d15975b0 100644 --- a/src/internfile/mh_execm.cpp +++ b/src/internfile/mh_execm.cpp @@ -32,6 +32,7 @@ using namespace std; #include "rclconfig.h" #include "mimetype.h" #include "idfile.h" +#include "rclutil.h" #include #include "safesyswait.h" @@ -266,11 +267,7 @@ bool MimeHandlerExecMultiple::next_document() string nm = stringtolower((const string&)name); trimstring(nm, ":"); LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n"); - auto it = m_metaData.find(nm); - if (it == m_metaData.end() || - it->second.find(data) == std::string::npos) { - m_metaData[nm] += data; - } + addmeta(m_metaData, nm, data); } if (loop == 200) { // ?? diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index c94d2d26..9e776ec3 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -73,7 +73,7 @@ bool MimeHandlerHtml::next_document() LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset << "]\n"); // Override default input charset if someone took care to set one: - map::const_iterator it = m_metaData.find(cstr_dj_keycharset); + const auto it = m_metaData.find(cstr_dj_keycharset); if (it != m_metaData.end() && !it->second.empty()) { charset = it->second; LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" << diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index 10f2ab13..db6a6edc 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -39,6 +39,7 @@ #include "cancelcheck.h" #include "log.h" #include "transcode.h" +#include "rclutil.h" static const string cstr_html_charset("charset"); static const string cstr_html_content("content"); @@ -193,7 +194,7 @@ MyHtmlParser::MyHtmlParser() void MyHtmlParser::decode_entities(string &s) { - LOGDEB2("MyHtmlParser::decode_entities\n" ); + LOGDEB2("MyHtmlParser::decode_entities\n"); // This has no meaning whatsoever if the character encoding is unknown, // so don't do it. If charset known, caller has converted text to utf-8, // and this is also how we translate entities @@ -261,7 +262,10 @@ void MyHtmlParser::decode_entities(string &s) void MyHtmlParser::process_text(const string &text) { - LOGDEB2("process_text: title " << (in_title_tag) << " script " << (in_script_tag) << " style " << (in_style_tag) << " pre " << (in_pre_tag) << " pending_space " << (pending_space) << " txt [" << (text) << "]\n" ); + LOGDEB2("process_text: title " << in_title_tag << " script " << + in_script_tag << " style " << in_style_tag << " pre " << + in_pre_tag << " pending_space " << pending_space << " txt [" << + text << "]\n"); CancelCheck::instance().checkCancel(); if (!in_script_tag && !in_style_tag) { @@ -300,7 +304,7 @@ MyHtmlParser::process_text(const string &text) bool MyHtmlParser::opening_tag(const string &tag) { - LOGDEB2("opening_tag: [" << (tag) << "]\n" ); + LOGDEB2("opening_tag: [" << tag << "]\n"); #if 0 cout << "TAG: " << tag << ": " << endl; map::const_iterator x; @@ -390,23 +394,12 @@ MyHtmlParser::opening_tag(const string &tag) } } decode_entities(content); - // Set metadata field, avoid appending - // multiple identical instances. - auto it = meta.find(name); - if (it == meta.end() || it->second.find(content) == - string::npos) { - if (it != meta.end()) { - it->second += ' '; - it->second += content; - } else { - meta[name] = content; - } - } if (ishtml && - meta[name].compare(0, cstr_fldhtm.size(), - cstr_fldhtm)) { - meta[name].insert(0, cstr_fldhtm); + content.compare(0, cstr_fldhtm.size(), + cstr_fldhtm)) { + content.insert(0, cstr_fldhtm); } + addmeta(meta, name, content); } } string hdr; @@ -437,7 +430,8 @@ MyHtmlParser::opening_tag(const string &tag) charset = newcharset; if (!charset.empty() && !samecharset(charset, fromcharset)) { - LOGDEB1("Doc html5 charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" ); + LOGDEB1("Doc html5 charset '" << charset << + "' differs from dir deflt '"< #include "smallut.h" +#include "rclutil.h" namespace Rcl { @@ -191,17 +192,7 @@ public: // Create entry or append text to existing entry. bool addmeta(const std::string& nm, const std::string& value) { - auto mit = meta.find(nm); - if (mit == meta.end()) { - meta[nm] = value; - } else if (mit->second.empty()) { - mit->second = value; - } else { - // It may happen that the same attr exists several times - // in the internfile stack. Avoid duplicating values. - if (mit->second != value) - mit->second += std::string(" - ") + value; - } + ::addmeta(meta, nm, value); return true; } diff --git a/src/utils/rclutil.cpp b/src/utils/rclutil.cpp index a7d67666..00d5b247 100644 --- a/src/utils/rclutil.cpp +++ b/src/utils/rclutil.cpp @@ -66,6 +66,24 @@ template void map_ss_cp_noshr >( template void map_ss_cp_noshr >( unordered_map s, unordered_map*d); +// Add data to metadata field, store multiple values as CSV, avoid +// appending multiple identical instances. +template void addmeta( + T& store, const string& nm, const string& value) +{ + auto it = store.find(nm); + if (it == store.end() || it->second.empty()) { + store[nm] = value; + } else if (it->second.find(value) == string::npos) { + store[nm] += ','; + store[nm] += value; + } +} +template void addmeta>( + map&, const string&, const string&); +template void addmeta>( + unordered_map&, const string&, const string&); + #ifdef _WIN32 static bool path_hasdrive(const string& s) { diff --git a/src/utils/rclutil.h b/src/utils/rclutil.h index f1226575..a8236daa 100644 --- a/src/utils/rclutil.h +++ b/src/utils/rclutil.h @@ -115,5 +115,8 @@ extern bool thumbPathForUrl(const std::string& url, int size, // string data (to pass to other thread): template void map_ss_cp_noshr(T s, T *d); +// Set or extend metadata field. We store the data as CSV +template void addmeta(T& store, const std::string& nm, + const std::string& value); #endif /* _RCLUTIL_H_INCLUDED_ */