use common method when concatenating multiple values for a metadata element. Use a comma as separator
This commit is contained in:
parent
fd0cf698a1
commit
13333e6512
@ -32,6 +32,7 @@ using namespace std;
|
|||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "mimetype.h"
|
#include "mimetype.h"
|
||||||
#include "idfile.h"
|
#include "idfile.h"
|
||||||
|
#include "rclutil.h"
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include "safesyswait.h"
|
#include "safesyswait.h"
|
||||||
@ -266,11 +267,7 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
string nm = stringtolower((const string&)name);
|
string nm = stringtolower((const string&)name);
|
||||||
trimstring(nm, ":");
|
trimstring(nm, ":");
|
||||||
LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n");
|
LOGDEB("MHExecMultiple: got [" << nm << "] -> [" << data << "]\n");
|
||||||
auto it = m_metaData.find(nm);
|
addmeta(m_metaData, nm, data);
|
||||||
if (it == m_metaData.end() ||
|
|
||||||
it->second.find(data) == std::string::npos) {
|
|
||||||
m_metaData[nm] += data;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (loop == 200) {
|
if (loop == 200) {
|
||||||
// ??
|
// ??
|
||||||
|
|||||||
@ -73,7 +73,7 @@ bool MimeHandlerHtml::next_document()
|
|||||||
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
|
LOGDEB("MHHtml::next_doc.: default supposed input charset: [" << charset
|
||||||
<< "]\n");
|
<< "]\n");
|
||||||
// Override default input charset if someone took care to set one:
|
// Override default input charset if someone took care to set one:
|
||||||
map<string,string>::const_iterator it = m_metaData.find(cstr_dj_keycharset);
|
const auto it = m_metaData.find(cstr_dj_keycharset);
|
||||||
if (it != m_metaData.end() && !it->second.empty()) {
|
if (it != m_metaData.end() && !it->second.empty()) {
|
||||||
charset = it->second;
|
charset = it->second;
|
||||||
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<
|
LOGDEB("MHHtml: next_doc.: input charset from ext. metadata: [" <<
|
||||||
|
|||||||
@ -39,6 +39,7 @@
|
|||||||
#include "cancelcheck.h"
|
#include "cancelcheck.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "transcode.h"
|
#include "transcode.h"
|
||||||
|
#include "rclutil.h"
|
||||||
|
|
||||||
static const string cstr_html_charset("charset");
|
static const string cstr_html_charset("charset");
|
||||||
static const string cstr_html_content("content");
|
static const string cstr_html_content("content");
|
||||||
@ -193,7 +194,7 @@ MyHtmlParser::MyHtmlParser()
|
|||||||
|
|
||||||
void MyHtmlParser::decode_entities(string &s)
|
void MyHtmlParser::decode_entities(string &s)
|
||||||
{
|
{
|
||||||
LOGDEB2("MyHtmlParser::decode_entities\n" );
|
LOGDEB2("MyHtmlParser::decode_entities\n");
|
||||||
// This has no meaning whatsoever if the character encoding is unknown,
|
// This has no meaning whatsoever if the character encoding is unknown,
|
||||||
// so don't do it. If charset known, caller has converted text to utf-8,
|
// so don't do it. If charset known, caller has converted text to utf-8,
|
||||||
// and this is also how we translate entities
|
// and this is also how we translate entities
|
||||||
@ -261,7 +262,10 @@ void MyHtmlParser::decode_entities(string &s)
|
|||||||
void
|
void
|
||||||
MyHtmlParser::process_text(const string &text)
|
MyHtmlParser::process_text(const string &text)
|
||||||
{
|
{
|
||||||
LOGDEB2("process_text: title " << (in_title_tag) << " script " << (in_script_tag) << " style " << (in_style_tag) << " pre " << (in_pre_tag) << " pending_space " << (pending_space) << " txt [" << (text) << "]\n" );
|
LOGDEB2("process_text: title " << in_title_tag << " script " <<
|
||||||
|
in_script_tag << " style " << in_style_tag << " pre " <<
|
||||||
|
in_pre_tag << " pending_space " << pending_space << " txt [" <<
|
||||||
|
text << "]\n");
|
||||||
CancelCheck::instance().checkCancel();
|
CancelCheck::instance().checkCancel();
|
||||||
|
|
||||||
if (!in_script_tag && !in_style_tag) {
|
if (!in_script_tag && !in_style_tag) {
|
||||||
@ -300,7 +304,7 @@ MyHtmlParser::process_text(const string &text)
|
|||||||
bool
|
bool
|
||||||
MyHtmlParser::opening_tag(const string &tag)
|
MyHtmlParser::opening_tag(const string &tag)
|
||||||
{
|
{
|
||||||
LOGDEB2("opening_tag: [" << (tag) << "]\n" );
|
LOGDEB2("opening_tag: [" << tag << "]\n");
|
||||||
#if 0
|
#if 0
|
||||||
cout << "TAG: " << tag << ": " << endl;
|
cout << "TAG: " << tag << ": " << endl;
|
||||||
map<string, string>::const_iterator x;
|
map<string, string>::const_iterator x;
|
||||||
@ -390,23 +394,12 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
decode_entities(content);
|
decode_entities(content);
|
||||||
// Set metadata field, avoid appending
|
|
||||||
// multiple identical instances.
|
|
||||||
auto it = meta.find(name);
|
|
||||||
if (it == meta.end() || it->second.find(content) ==
|
|
||||||
string::npos) {
|
|
||||||
if (it != meta.end()) {
|
|
||||||
it->second += ' ';
|
|
||||||
it->second += content;
|
|
||||||
} else {
|
|
||||||
meta[name] = content;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (ishtml &&
|
if (ishtml &&
|
||||||
meta[name].compare(0, cstr_fldhtm.size(),
|
content.compare(0, cstr_fldhtm.size(),
|
||||||
cstr_fldhtm)) {
|
cstr_fldhtm)) {
|
||||||
meta[name].insert(0, cstr_fldhtm);
|
content.insert(0, cstr_fldhtm);
|
||||||
}
|
}
|
||||||
|
addmeta(meta, name, content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
string hdr;
|
string hdr;
|
||||||
@ -437,7 +430,8 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
charset = newcharset;
|
charset = newcharset;
|
||||||
if (!charset.empty() &&
|
if (!charset.empty() &&
|
||||||
!samecharset(charset, fromcharset)) {
|
!samecharset(charset, fromcharset)) {
|
||||||
LOGDEB1("Doc html5 charset '" << (charset) << "' differs from dir deflt '" << (fromcharset) << "'\n" );
|
LOGDEB1("Doc html5 charset '" << charset <<
|
||||||
|
"' differs from dir deflt '"<<fromcharset <<"'\n");
|
||||||
throw false;
|
throw false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -492,7 +486,7 @@ MyHtmlParser::opening_tag(const string &tag)
|
|||||||
bool
|
bool
|
||||||
MyHtmlParser::closing_tag(const string &tag)
|
MyHtmlParser::closing_tag(const string &tag)
|
||||||
{
|
{
|
||||||
LOGDEB2("closing_tag: [" << (tag) << "]\n" );
|
LOGDEB2("closing_tag: [" << tag << "]\n");
|
||||||
if (tag.empty()) return true;
|
if (tag.empty()) return true;
|
||||||
switch (tag[0]) {
|
switch (tag[0]) {
|
||||||
case 'a':
|
case 'a':
|
||||||
|
|||||||
@ -22,6 +22,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
#include "rclutil.h"
|
||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
|
||||||
@ -191,17 +192,7 @@ public:
|
|||||||
|
|
||||||
// Create entry or append text to existing entry.
|
// Create entry or append text to existing entry.
|
||||||
bool addmeta(const std::string& nm, const std::string& value) {
|
bool addmeta(const std::string& nm, const std::string& value) {
|
||||||
auto mit = meta.find(nm);
|
::addmeta(meta, nm, value);
|
||||||
if (mit == meta.end()) {
|
|
||||||
meta[nm] = value;
|
|
||||||
} else if (mit->second.empty()) {
|
|
||||||
mit->second = value;
|
|
||||||
} else {
|
|
||||||
// It may happen that the same attr exists several times
|
|
||||||
// in the internfile stack. Avoid duplicating values.
|
|
||||||
if (mit->second != value)
|
|
||||||
mit->second += std::string(" - ") + value;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -66,6 +66,24 @@ template void map_ss_cp_noshr<map<string, string> >(
|
|||||||
template void map_ss_cp_noshr<unordered_map<string, string> >(
|
template void map_ss_cp_noshr<unordered_map<string, string> >(
|
||||||
unordered_map<string,string> s, unordered_map<string,string>*d);
|
unordered_map<string,string> s, unordered_map<string,string>*d);
|
||||||
|
|
||||||
|
// Add data to metadata field, store multiple values as CSV, avoid
|
||||||
|
// appending multiple identical instances.
|
||||||
|
template <class T> void addmeta(
|
||||||
|
T& store, const string& nm, const string& value)
|
||||||
|
{
|
||||||
|
auto it = store.find(nm);
|
||||||
|
if (it == store.end() || it->second.empty()) {
|
||||||
|
store[nm] = value;
|
||||||
|
} else if (it->second.find(value) == string::npos) {
|
||||||
|
store[nm] += ',';
|
||||||
|
store[nm] += value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template void addmeta<map<string, string>>(
|
||||||
|
map<string, string>&, const string&, const string&);
|
||||||
|
template void addmeta<unordered_map<string, string>>(
|
||||||
|
unordered_map<string, string>&, const string&, const string&);
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
static bool path_hasdrive(const string& s)
|
static bool path_hasdrive(const string& s)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -115,5 +115,8 @@ extern bool thumbPathForUrl(const std::string& url, int size,
|
|||||||
// string data (to pass to other thread):
|
// string data (to pass to other thread):
|
||||||
template <class T> void map_ss_cp_noshr(T s, T *d);
|
template <class T> void map_ss_cp_noshr(T s, T *d);
|
||||||
|
|
||||||
|
// Set or extend metadata field. We store the data as CSV
|
||||||
|
template <class T> void addmeta(T& store, const std::string& nm,
|
||||||
|
const std::string& value);
|
||||||
|
|
||||||
#endif /* _RCLUTIL_H_INCLUDED_ */
|
#endif /* _RCLUTIL_H_INCLUDED_ */
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user