This commit is contained in:
Jean-Francois Dockes 2020-08-10 16:52:41 +02:00
parent d932d19562
commit fd0cf698a1
2 changed files with 53 additions and 53 deletions

View File

@ -25,24 +25,24 @@
* Convert html to utf-8 text and extract whatever metadata we can find. * Convert html to utf-8 text and extract whatever metadata we can find.
*/ */
class MimeHandlerHtml : public RecollFilter { class MimeHandlerHtml : public RecollFilter {
public: public:
MimeHandlerHtml(RclConfig *cnf, const std::string& id) MimeHandlerHtml(RclConfig *cnf, const std::string& id)
: RecollFilter(cnf, id) { : RecollFilter(cnf, id) {
} }
virtual ~MimeHandlerHtml() {} virtual ~MimeHandlerHtml() {}
virtual bool is_data_input_ok(DataInput input) const override { virtual bool is_data_input_ok(DataInput input) const override {
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING) if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
return true; return true;
return false; return false;
} }
virtual bool next_document() override; virtual bool next_document() override;
const std::string& get_html() { const std::string& get_html() {
return m_html; return m_html;
} }
virtual void clear_impl() override { virtual void clear_impl() override {
m_filename.erase(); m_filename.erase();
m_html.erase(); m_html.erase();
} }
protected: protected:
virtual bool set_document_file_impl(const std::string& mt, virtual bool set_document_file_impl(const std::string& mt,

View File

@ -31,63 +31,63 @@ class RclConfig;
class RecollFilter : public Dijon::Filter { class RecollFilter : public Dijon::Filter {
public: public:
RecollFilter(RclConfig *config, const std::string& id) RecollFilter(RclConfig *config, const std::string& id)
: m_config(config), m_id(id) { : m_config(config), m_id(id) {
} }
virtual ~RecollFilter() {} virtual ~RecollFilter() {}
virtual void setConfig(RclConfig *config) { virtual void setConfig(RclConfig *config) {
m_config = config; m_config = config;
} }
virtual bool set_property(Properties p, const std::string &v) { virtual bool set_property(Properties p, const std::string &v) {
switch (p) { switch (p) {
case DJF_UDI: case DJF_UDI:
m_udi = v; m_udi = v;
break; break;
case DEFAULT_CHARSET: case DEFAULT_CHARSET:
m_dfltInputCharset = v; m_dfltInputCharset = v;
break; break;
case OPERATING_MODE: case OPERATING_MODE:
if (!v.empty() && v[0] == 'v') if (!v.empty() && v[0] == 'v')
m_forPreview = true; m_forPreview = true;
else else
m_forPreview = false; m_forPreview = false;
break; break;
} }
return true; return true;
} }
// We don't use this for now // We don't use this for now
virtual bool set_document_uri(const std::string& mtype, virtual bool set_document_uri(const std::string& mtype,
const std::string &) { const std::string &) {
m_mimeType = mtype; m_mimeType = mtype;
return false; return false;
} }
virtual bool set_document_file(const std::string& mtype, virtual bool set_document_file(const std::string& mtype,
const std::string &file_path) { const std::string &file_path) {
m_mimeType = mtype; m_mimeType = mtype;
return set_document_file_impl(mtype, file_path); return set_document_file_impl(mtype, file_path);
} }
virtual bool set_document_string(const std::string& mtype, virtual bool set_document_string(const std::string& mtype,
const std::string &contents) { const std::string &contents) {
m_mimeType = mtype; m_mimeType = mtype;
return set_document_string_impl(mtype, contents); return set_document_string_impl(mtype, contents);
} }
virtual bool set_document_data(const std::string& mtype, virtual bool set_document_data(const std::string& mtype,
const char *cp, size_t sz) const char *cp, size_t sz)
{ {
return set_document_string(mtype, std::string(cp, sz)); return set_document_string(mtype, std::string(cp, sz));
} }
virtual void set_docsize(int64_t size) { virtual void set_docsize(int64_t size) {
m_docsize = size; m_docsize = size;
} }
virtual int64_t get_docsize() const { virtual int64_t get_docsize() const {
return m_docsize; return m_docsize;
} }
virtual bool has_documents() const { virtual bool has_documents() const {
@ -96,33 +96,33 @@ public:
// Most doc types are single-doc // Most doc types are single-doc
virtual bool skip_to_document(const std::string& s) { virtual bool skip_to_document(const std::string& s) {
if (s.empty()) if (s.empty())
return true; return true;
return false; return false;
} }
virtual bool is_data_input_ok(DataInput input) const { virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME) if (input == DOCUMENT_FILE_NAME)
return true; return true;
return false; return false;
} }
virtual std::string get_error() const { virtual std::string get_error() const {
return m_reason; return m_reason;
} }
virtual const std::string& get_id() const { virtual const std::string& get_id() const {
return m_id; return m_id;
} }
// Classes which need to do local work in clear() need // Classes which need to do local work in clear() need
// to implement clear_impl() // to implement clear_impl()
virtual void clear() final { virtual void clear() final {
clear_impl(); clear_impl();
Dijon::Filter::clear(); Dijon::Filter::clear();
m_forPreview = m_havedoc = false; m_forPreview = m_havedoc = false;
m_dfltInputCharset.clear(); m_dfltInputCharset.clear();
m_reason.clear(); m_reason.clear();
} }
virtual void clear_impl() {} virtual void clear_impl() {}
@ -180,7 +180,7 @@ extern void returnMimeHandler(RecollFilter *);
extern void clearMimeHandlerCache(); extern void clearMimeHandlerCache();
namespace Rcl { namespace Rcl {
class Doc; class Doc;
} }
/// Can this mime type be interned ? /// Can this mime type be interned ?
extern bool canIntern(const std::string mimetype, RclConfig *cfg); extern bool canIntern(const std::string mimetype, RclConfig *cfg);