use common method when concatenating multiple values for a metadata element. Use a comma as separator
This commit is contained in:
parent
fd0cf698a1
commit
13333e6512
@ -32,6 +32,7 @@ using namespace std;
|
||||
#include "rclconfig.h"
|
||||
#include "mimetype.h"
|
||||
#include "idfile.h"
|
||||
#include "rclutil.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include "safesyswait.h"
|
||||
@ -266,11 +267,7 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
string nm = stringtolower((const string&)name);
|
||||
trimstring(nm, ":");
|
||||
LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n");
|
||||
auto it = m_metaData.find(nm);
|
||||
if (it == m_metaData.end() ||
|
||||
it->second.find(data) == std::string::npos) {
|
||||
m_metaData[nm] += data;
|
||||
}
|
||||
addmeta(m_metaData, nm, data);
|
||||
}
|
||||
if (loop == 200) {
|
||||
// ??
|
||||
|
||||
@ -73,7 +73,7 @@ bool MimeHandlerHtml::next_document()
|
||||
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
|
||||
<< "]\n");
|
||||
// Override default input charset if someone took care to set one:
|
||||
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
||||
const auto it = m_metaData.find(cstr_dj_keycharset);
|
||||
if (it != m_metaData.end() && !it->second.empty()) {
|
||||
charset = it->second;
|
||||
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<
|
||||
|
||||
@ -39,6 +39,7 @@
|
||||
#include "cancelcheck.h"
|
||||
#include "log.h"
|
||||
#include "transcode.h"
|
||||
#include "rclutil.h"
|
||||
|
||||
static const string cstr_html_charset("charset");
|
||||
static const string cstr_html_content("content");
|
||||
@ -193,7 +194,7 @@ MyHtmlParser::MyHtmlParser()
|
||||
|
||||
void MyHtmlParser::decode_entities(string &s)
|
||||
{
|
||||
LOGDEB2("MyHtmlParser::decode_entities\n" );
|
||||
LOGDEB2("MyHtmlParser::decode_entities\n");
|
||||
// This has no meaning whatsoever if the character encoding is unknown,
|
||||
// so don't do it. If charset known, caller has converted text to utf-8,
|
||||
// and this is also how we translate entities
|
||||
@ -261,7 +262,10 @@ void MyHtmlParser::decode_entities(string &s)
|
||||
void
|
||||
MyHtmlParser::process_text(const string &text)
|
||||
{
|
||||
LOGDEB2("process_text: title " << (in_title_tag) << " script " << (in_script_tag) << " style " << (in_style_tag) << " pre " << (in_pre_tag) << " pending_space " << (pending_space) << " txt [" << (text) << "]\n" );
|
||||
LOGDEB2("process_text: title " << in_title_tag << " script " <<
|
||||
in_script_tag << " style " << in_style_tag << " pre " <<
|
||||
in_pre_tag << " pending_space " << pending_space << " txt [" <<
|
||||
text << "]\n");
|
||||
CancelCheck::instance().checkCancel();
|
||||
|
||||
if (!in_script_tag && !in_style_tag) {
|
||||
@ -300,7 +304,7 @@ MyHtmlParser::process_text(const string &text)
|
||||
bool
|
||||
MyHtmlParser::opening_tag(const string &tag)
|
||||
{
|
||||
LOGDEB2("opening_tag: [" << (tag) << "]\n" );
|
||||
LOGDEB2("opening_tag: [" << tag << "]\n");
|
||||
#if 0
|
||||
cout << "TAG: " << tag << ": " << endl;
|
||||
map<string, string>::const_iterator x;
|
||||
@ -390,23 +394,12 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
}
|
||||
}
|
||||
decode_entities(content);
|
||||
// Set metadata field, avoid appending
|
||||
// multiple identical instances.
|
||||
auto it = meta.find(name);
|
||||
if (it == meta.end() || it->second.find(content) ==
|
||||
string::npos) {
|
||||
if (it != meta.end()) {
|
||||
it->second += ' ';
|
||||
it->second += content;
|
||||
} else {
|
||||
meta[name] = content;
|
||||
}
|
||||
}
|
||||
if (ishtml &&
|
||||
meta[name].compare(0, cstr_fldhtm.size(),
|
||||
cstr_fldhtm)) {
|
||||
meta[name].insert(0, cstr_fldhtm);
|
||||
content.compare(0, cstr_fldhtm.size(),
|
||||
cstr_fldhtm)) {
|
||||
content.insert(0, cstr_fldhtm);
|
||||
}
|
||||
addmeta(meta, name, content);
|
||||
}
|
||||
}
|
||||
string hdr;
|
||||
@ -437,7 +430,8 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
charset = newcharset;
|
||||
if (!charset.empty() &&
|
||||
!samecharset(charset, fromcharset)) {
|
||||
LOGDEB1("Doc html5 charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" );
|
||||
LOGDEB1("Doc html5 charset '" << charset <<
|
||||
"' differs from dir deflt '"<<fromcharset <<"'\n");
|
||||
throw false;
|
||||
}
|
||||
}
|
||||
@ -492,7 +486,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
||||
bool
|
||||
MyHtmlParser::closing_tag(const string &tag)
|
||||
{
|
||||
LOGDEB2("closing_tag: [" << (tag) << "]\n" );
|
||||
LOGDEB2("closing_tag: [" << tag << "]\n");
|
||||
if (tag.empty()) return true;
|
||||
switch (tag[0]) {
|
||||
case 'a':
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "smallut.h"
|
||||
#include "rclutil.h"
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
@ -191,17 +192,7 @@ public:
|
||||
|
||||
// Create entry or append text to existing entry.
|
||||
bool addmeta(const std::string& nm, const std::string& value) {
|
||||
auto mit = meta.find(nm);
|
||||
if (mit == meta.end()) {
|
||||
meta[nm] = value;
|
||||
} else if (mit->second.empty()) {
|
||||
mit->second = value;
|
||||
} else {
|
||||
// It may happen that the same attr exists several times
|
||||
// in the internfile stack. Avoid duplicating values.
|
||||
if (mit->second != value)
|
||||
mit->second += std::string(" - ") + value;
|
||||
}
|
||||
::addmeta(meta, nm, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -66,6 +66,24 @@ template void map_ss_cp_noshr<map<string, string> >(
|
||||
template void map_ss_cp_noshr<unordered_map<string, string> >(
|
||||
unordered_map<string,string> s, unordered_map<string,string>*d);
|
||||
|
||||
// Add data to metadata field, store multiple values as CSV, avoid
|
||||
// appending multiple identical instances.
|
||||
template <class T> void addmeta(
|
||||
T& store, const string& nm, const string& value)
|
||||
{
|
||||
auto it = store.find(nm);
|
||||
if (it == store.end() || it->second.empty()) {
|
||||
store[nm] = value;
|
||||
} else if (it->second.find(value) == string::npos) {
|
||||
store[nm] += ',';
|
||||
store[nm] += value;
|
||||
}
|
||||
}
|
||||
template void addmeta<map<string, string>>(
|
||||
map<string, string>&, const string&, const string&);
|
||||
template void addmeta<unordered_map<string, string>>(
|
||||
unordered_map<string, string>&, const string&, const string&);
|
||||
|
||||
#ifdef _WIN32
|
||||
static bool path_hasdrive(const string& s)
|
||||
{
|
||||
|
||||
@ -115,5 +115,8 @@ extern bool thumbPathForUrl(const std::string& url, int size,
|
||||
// string data (to pass to other thread):
|
||||
template <class T> void map_ss_cp_noshr(T s, T *d);
|
||||
|
||||
// Set or extend metadata field. We store the data as CSV
|
||||
template <class T> void addmeta(T& store, const std::string& nm,
|
||||
const std::string& value);
|
||||
|
||||
#endif /* _RCLUTIL_H_INCLUDED_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user