indents + use range-base for loops in extrameta.cpp

2022-09-22 17:10:07 +02:00 · 2022-09-22 17:10:07 +02:00 · 20c3a7ed12
commit 20c3a7ed12
parent 9e0018034c
6 changed files with 315 additions and 333 deletions
--- a/src/internfile/extrameta.cpp
+++ b/src/internfile/extrameta.cpp
@ -30,20 +30,18 @@ using std::string;
 using std::map;
 static void docfieldfrommeta(RclConfig* cfg, const string& name, 
-                 const string &value, Rcl::Doc& doc)
+                             const string &value, Rcl::Doc& doc)
 {
    string fieldname = cfg->fieldCanon(name);
-    LOGDEB0("Internfile:: setting [" << fieldname <<
+    LOGDEB0("Internfile:: setting [" << fieldname << "] from cmd/xattr value [" << value << "]\n");
            "] from cmd/xattr value [" << value << "]\n");
    if (fieldname == cstr_dj_keymd) {
-    doc.dmtime = value;
+        doc.dmtime = value;
    } else {
-    doc.meta[fieldname] = value;
+        doc.meta[fieldname] = value;
    }
 }
-void reapXAttrs(const RclConfig* cfg, const string& path, 
+void reapXAttrs(const RclConfig* cfg, const string& path,  map<string, string>& xfields)
        map<string, string>& xfields)
 {
    LOGDEB2("reapXAttrs: [" << path << "]\n");
 #ifndef _WIN32
@ -51,39 +49,35 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
    vector<string> xnames;
    if (!pxattr::list(path, &xnames)) {
        if (errno == ENOTSUP) {
-            LOGDEB("FileInterner::reapXattrs: pxattr::list: errno " <<
+            LOGDEB("FileInterner::reapXattrs: pxattr::list: errno " << errno << "\n");
                   errno << "\n");
        } else {
-            LOGERR("FileInterner::reapXattrs: pxattr::list: errno " <<
+            LOGSYSERR("FileInterner::reapXattrs", "pxattr::list", path);
                   errno << "\n");
        }
-    return;
+        return;
    }
    const map<string, string>& xtof = cfg->getXattrToField();
    // Record the xattrs: names found in the config are either skipped
    // or mapped depending if the translation is empty. Other names
    // are recorded as-is
-    for (vector<string>::const_iterator it = xnames.begin();
+    for (const auto& xkey : xnames) {
-     it != xnames.end(); it++) {
+        string key = xkey;
-    string key = *it;
+        auto mit = xtof.find(xkey);
-    map<string, string>::const_iterator mit = xtof.find(*it);
+        if (mit != xtof.end()) {
-    if (mit != xtof.end()) {
+            if (mit->second.empty()) {
-        if (mit->second.empty()) {
+                continue;
-        continue;
+            } else {
-        } else {
+                key = mit->second;
-        key = mit->second;
+            }
        }
-    }
+        string value;
-    string value;
+        if (!pxattr::get(path, xkey, &value, pxattr::PXATTR_NOFOLLOW)) {
-    if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) {
+            LOGSYSERR("FileInterner::reapXattrs", "pxattr::get", path + " : " + xkey);
-        LOGERR("FileInterner::reapXattrs: pxattr::get failed for " << *it
+            continue;
-                   << ", errno " << errno << "\n");
+        }
-        continue;
+        // Encode should we ?
-    }
+        xfields[key] = value;
-    // Encode should we ?
+        LOGDEB2("reapXAttrs: [" << key << "] -> [" << value << "]\n");
    xfields[key] = value;
    LOGDEB2("reapXAttrs: [" << key << "] -> [" << value << "]\n");
    }
 #else
    PRETEND_USE(cfg);
@ -92,35 +86,30 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
 #endif
 }
-void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields, 
+void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields, Rcl::Doc& doc)
             Rcl::Doc& doc)
 {
-    for (map<string,string>::const_iterator it = xfields.begin(); 
+    for (const auto& fld : xfields) {
-     it != xfields.end(); it++) {
+        docfieldfrommeta(cfg, fld.first, fld.second, doc);
    docfieldfrommeta(cfg, it->first, it->second, doc);
    }
 }
-void reapMetaCmds(RclConfig* cfg, const string& path, 
+void reapMetaCmds(RclConfig* cfg, const string& path, map<string, string>& cfields)
          map<string, string>& cfields)
 {
-    const vector<MDReaper>& reapers = cfg->getMDReapers();
+    const auto& reapers = cfg->getMDReapers();
    if (reapers.empty())
-    return;
+        return;
    map<char,string> smap = {{'f', path}};
-    for (vector<MDReaper>::const_iterator rp = reapers.begin();
+    for (const auto& reaper : reapers) {
-     rp != reapers.end(); rp++) {
+        vector<string> cmd;
-    vector<string> cmd;
+        for (const auto& arg : reaper.cmdv) {
-    for (vector<string>::const_iterator it = rp->cmdv.begin();
+            string s;
-         it != rp->cmdv.end(); it++) {
+            pcSubst(arg, s, smap);
-        string s;
+            cmd.push_back(s);
-        pcSubst(*it, s, smap);
+        }
-        cmd.push_back(s);
+        string output;
-    }
+        if (ExecCmd::backtick(cmd, output)) {
-    string output;
+            cfields[reaper.fieldname] =  output;
-    if (ExecCmd::backtick(cmd, output)) {
+        }
        cfields[rp->fieldname] =  output;
    }
    }
 }
@ -132,26 +121,23 @@ void reapMetaCmds(RclConfig* cfg, const string& path,
 // "modificationdate" will set mtime instead of an ordinary field,
 // and the output from anything beginning with "rclmulti" will be
 // interpreted as multiple fields in configuration file format...
-void docFieldsFromMetaCmds(RclConfig *cfg, const map<string, string>& cfields, 
+void docFieldsFromMetaCmds(RclConfig *cfg, const map<string, string>& cfields, Rcl::Doc& doc)
               Rcl::Doc& doc)
 {
-    for (map<string,string>::const_iterator it = cfields.begin(); 
+    for (const auto& cfld : cfields) {
-     it != cfields.end(); it++) {
+        if (!cfld.first.compare(0, 8, "rclmulti")) {
-    if (!it->first.compare(0, 8, "rclmulti")) {
+            ConfSimple simple(cfld.second);
-        ConfSimple simple(it->second);
+            if (simple.ok()) {
-        if (simple.ok()) {
+                auto names = simple.getNames("");
-        vector<string> names = simple.getNames("");
+                for (const auto& nm : names) {
-        for (vector<string>::const_iterator nm = names.begin(); 
+                    string value;
-             nm != names.end(); nm++) {
+                    if (simple.get(nm, value)) {
-            string value;
+                        docfieldfrommeta(cfg, nm, value, doc);
-            if (simple.get(*nm, value)) {
+                    }
-            docfieldfrommeta(cfg, *nm, value, doc);
+                }
            }
        } else {
            docfieldfrommeta(cfg, cfld.first, cfld.second, doc);
        }
        }
    } else {
        docfieldfrommeta(cfg, it->first, it->second, doc);
    }
    }
 }
--- a/src/internfile/htmlparse.cpp
+++ b/src/internfile/htmlparse.cpp
@ -34,7 +34,7 @@ inline void
 lowercase_string(string &str)
 {
    for (string::iterator i = str.begin(); i != str.end(); ++i) {
-    *i = tolower(static_cast<unsigned char>(*i));
+        *i = tolower(static_cast<unsigned char>(*i));
    }
 }
@ -68,7 +68,7 @@ inline static bool
 p_nottag(char c)
 {
    return !isalnum(static_cast<unsigned char>(c)) &&
-    c != '.' && c != '-' && c != ':'; // ':' for XML namespaces.
+        c != '.' && c != '-' && c != ':'; // ':' for XML namespaces.
 }
 inline static bool
@ -99,14 +99,14 @@ HtmlParser::HtmlParser()
 #if 0
    static const struct ent { const char *n; unsigned int v; } ents[] = {
 #include "namedentities.h"
-    { NULL, 0 }
+        { NULL, 0 }
    };
    if (named_ents.empty()) {
-    const struct ent *i = ents;
+        const struct ent *i = ents;
-    while (i->n) {
+        while (i->n) {
-        named_ents[string(i->n)] = i->v;
+            named_ents[string(i->n)] = i->v;
-        ++i;
+            ++i;
-    }
+        }
    }
 #endif
 }
@ -123,45 +123,45 @@ HtmlParser::decode_entities(string &)
    // find() and find_if() templates don't work...
    string::const_iterator amp = s.begin(), s_end = s.end();
    while ((amp = find(amp, s_end, '&')) != s_end) {
-    unsigned int val = 0;
+        unsigned int val = 0;
-    string::const_iterator end, p = amp + 1;
+        string::const_iterator end, p = amp + 1;
-    if (p != s_end && *p == '#') {
+        if (p != s_end && *p == '#') {
-        p++;
+            p++;
-        if (p != s_end && (*p == 'x' || *p == 'X')) {
+            if (p != s_end && (*p == 'x' || *p == 'X')) {
-        // hex
+                // hex
-        p++;
+                p++;
-        end = find_if(p, s_end, p_notxdigit);
+                end = find_if(p, s_end, p_notxdigit);
-        sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val);
+                sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val);
            } else {
                // number
                end = find_if(p, s_end, p_notdigit);
                val = atoi(s.substr(p - s.begin(), end - p).c_str());
            }
        } else {
-        // number
+            end = find_if(p, s_end, p_notalnum);
-        end = find_if(p, s_end, p_notdigit);
+            string code = s.substr(p - s.begin(), end - p);
-        val = atoi(s.substr(p - s.begin(), end - p).c_str());
+            map<string, unsigned int>::const_iterator i;
            i = named_ents.find(code);
            if (i != named_ents.end()) val = i->second;
        }
-    } else {
+        if (end < s_end && *end == ';') end++;
-        end = find_if(p, s_end, p_notalnum);
+        if (val) {
-        string code = s.substr(p - s.begin(), end - p);
+            string::size_type amp_pos = amp - s.begin();
-        map<string, unsigned int>::const_iterator i;
+            if (val < 0x80) {
-        i = named_ents.find(code);
+                s.replace(amp_pos, end - amp, 1u, char(val));
-        if (i != named_ents.end()) val = i->second;
+            } else {
-    }
+                // Convert unicode value val to UTF-8.
-    if (end < s_end && *end == ';') end++;
+                char seq[4];
-    if (val) {
+                unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq);
-        string::size_type amp_pos = amp - s.begin();
+                s.replace(amp_pos, end - amp, seq, len);
-        if (val < 0x80) {
+            }
-        s.replace(amp_pos, end - amp, 1u, char(val));
+            s_end = s.end();
            // We've modified the string, so the iterators are no longer
            // valid...
            amp = s.begin() + amp_pos + 1;
        } else {
-        // Convert unicode value val to UTF-8.
+            amp = end;
        char seq[4];
        unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq);
        s.replace(amp_pos, end - amp, seq, len);
        }
        s_end = s.end();
        // We've modified the string, so the iterators are no longer
        // valid...
        amp = s.begin() + amp_pos + 1;
    } else {
        amp = end;
    }
    }
 #endif
 }
@ -175,222 +175,222 @@ HtmlParser::parse_html(const string &body)
    string::const_iterator start = body.begin();
    while (true) {
-    // Skip through until we find an HTML tag, a comment, or the end of
+        // Skip through until we find an HTML tag, a comment, or the end of
-    // document.  Ignore isolated occurrences of `<' which don't start
+        // document.  Ignore isolated occurrences of `<' which don't start
-    // a tag or comment.    
+        // a tag or comment.    
-    string::const_iterator p = start;
+        string::const_iterator p = start;
-    while (true) {
+        while (true) {
-        p = find(p, body.end(), '<');
+            p = find(p, body.end(), '<');
-        if (p == body.end()) break;
+            if (p == body.end()) break;
-        unsigned char ch = *(p + 1);
+            unsigned char ch = *(p + 1);
-        // Tag, closing tag, or comment (or SGML declaration).
+            // Tag, closing tag, or comment (or SGML declaration).
-        if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break;
+            if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break;
-        if (ch == '?') {
+            if (ch == '?') {
-        // PHP code or XML declaration.
+                // PHP code or XML declaration.
-        // XML declaration is only valid at the start of the first line.
+                // XML declaration is only valid at the start of the first line.
-        // FIXME: need to deal with BOMs...
+                // FIXME: need to deal with BOMs...
-        if (p != body.begin() || body.size() < 20) break;
+                if (p != body.begin() || body.size() < 20) break;
-        // XML declaration looks something like this:
+                // XML declaration looks something like this:
-        // <?xml version="1.0" encoding="UTF-8"?>
+                // <?xml version="1.0" encoding="UTF-8"?>
-        if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break;
+                if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break;
-        if (strchr(" \t\r\n", p[5]) == NULL) break;
+                if (strchr(" \t\r\n", p[5]) == NULL) break;
-        string::const_iterator decl_end = find(p + 6, body.end(), '?');
+                string::const_iterator decl_end = find(p + 6, body.end(), '?');
-        if (decl_end == body.end()) break;
+                if (decl_end == body.end()) break;
-        // Default charset for XML is UTF-8.
+                // Default charset for XML is UTF-8.
-        charset = "utf-8";
+                charset = "utf-8";
-        string decl(p + 6, decl_end);
+                string decl(p + 6, decl_end);
-        size_t enc = decl.find("encoding");
+                size_t enc = decl.find("encoding");
-        if (enc == string::npos) break;
+                if (enc == string::npos) break;
-        enc = decl.find_first_not_of(" \t\r\n", enc + 8);
+                enc = decl.find_first_not_of(" \t\r\n", enc + 8);
-        if (enc == string::npos || enc == decl.size()) break;
+                if (enc == string::npos || enc == decl.size()) break;
-        if (decl[enc] != '=') break;
+                if (decl[enc] != '=') break;
-        enc = decl.find_first_not_of(" \t\r\n", enc + 1);
+                enc = decl.find_first_not_of(" \t\r\n", enc + 1);
-        if (enc == string::npos || enc == decl.size()) break;
+                if (enc == string::npos || enc == decl.size()) break;
-        if (decl[enc] != '"' && decl[enc] != '\'') break;
+                if (decl[enc] != '"' && decl[enc] != '\'') break;
-        char quote = decl[enc++];
+                char quote = decl[enc++];
-        size_t enc_end = decl.find(quote, enc);
+                size_t enc_end = decl.find(quote, enc);
-        if (enc != string::npos)
+                if (enc != string::npos)
-            charset = decl.substr(enc, enc_end - enc);
+                    charset = decl.substr(enc, enc_end - enc);
        break;
        }
        p++;
    }
    // Process text up to start of tag.
    if (p > start || p == body.end()) {
        string text = body.substr(start - body.begin(), p - start);
        decode_entities(text);
        process_text(text);
    }
    if (p == body.end()) {
        do_eof();
        break;
    }
    start = p + 1;
    if (start == body.end()) break;
    if (*start == '!') {
        if (++start == body.end()) break;
        if (++start == body.end()) break;
        // comment or SGML declaration
        if (*(start - 1) == '-' && *start == '-') {
        ++start;
        string::const_iterator close = find(start, body.end(), '>');
        // An unterminated comment swallows rest of document
        // (like Netscape, but unlike MSIE IIRC)
        if (close == body.end()) break;
        p = close;
        // look for -->
        while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-'))
            p = find(p + 1, body.end(), '>');
        if (p != body.end()) {
            // Check for htdig's "ignore this bit" comments.
            if (p - start == 15 && string(start, p - 2) == "htdig_noindex") {
            string::size_type i;
            i = body.find("<!--/htdig_noindex-->", p + 1 - body.begin());
            if (i == string::npos) break;
            start = body.begin() + i + 21;
            continue;
            }
            // If we found --> skip to there.
            start = p;
        } else {
            // Otherwise skip to the first > we found (as Netscape does).
            start = close;
        }
        } else {
        // just an SGML declaration, perhaps giving the DTD - ignore it
        start = find(start - 1, body.end(), '>');
        if (start == body.end()) break;
        }
        ++start;
    } else if (*start == '?') {
        if (++start == body.end()) break;
        // PHP - swallow until ?> or EOF
        start = find(start + 1, body.end(), '>');
        // look for ?>
        while (start != body.end() && *(start - 1) != '?')
        start = find(start + 1, body.end(), '>');
        // unterminated PHP swallows rest of document (rather arbitrarily
        // but it avoids polluting the database when things go wrong)
        if (start != body.end()) ++start;
    } else {
        // opening or closing tag
        int closing = 0;
        if (*start == '/') {
        closing = 1;
        start = find_if(start + 1, body.end(), p_notwhitespace);
        }
        p = start;
        start = find_if(start, body.end(), p_nottag);
        string tag = body.substr(p - body.begin(), start - p);
        // convert tagname to lowercase
        lowercase_string(tag);
        if (closing) {
        if (!closing_tag(tag))
            return;
        if (in_script && tag == "script") in_script = false;
        /* ignore any bogus parameters on closing tags */
        p = find(start, body.end(), '>');
        if (p == body.end()) break;
        start = p + 1;
        } else {
        bool empty_element = false;
        // FIXME: parse parameters lazily.
        while (start < body.end() && *start != '>') {
            string name, value;
            p = find_if(start, body.end(), p_whitespaceeqgt);
            size_t name_len = p - start;
            if (name_len == 1) {
            if (*start == '/' && p < body.end() && *p == '>') {
                // E.g. <tag foo="bar" />
                start = p;
                empty_element = true;
                break;
            }
-            }
+            p++;
            name.assign(body, start - body.begin(), name_len);
            p = find_if(p, body.end(), p_notwhitespace);
            start = p;
            if (start != body.end() && *start == '=') {
            start = find_if(start + 1, body.end(), p_notwhitespace);
            p = body.end();
            int quote = *start;
            if (quote == '"' || quote == '\'') {
                start++;
                p = find(start, body.end(), quote);
            }
            if (p == body.end()) {
                // unquoted or no closing quote
                p = find_if(start, body.end(), p_whitespacegt);
            }
            value.assign(body, start - body.begin(), p - start);
            start = find_if(p, body.end(), p_notwhitespace);
            if (!name.empty()) {
                // convert parameter name to lowercase
                lowercase_string(name);
                // in case of multiple entries, use the first
                // (as Netscape does)
                parameters.insert(make_pair(name, value));
            }
            }
        }
        // Process text up to start of tag.
        if (p > start || p == body.end()) {
            string text = body.substr(start - body.begin(), p - start);
            decode_entities(text);
            process_text(text);
        }
        if (p == body.end()) {
            do_eof();
            break;
        }
        start = p + 1;
        if (start == body.end()) break;
        if (*start == '!') {
            if (++start == body.end()) break;
            if (++start == body.end()) break;
            // comment or SGML declaration
            if (*(start - 1) == '-' && *start == '-') {
                ++start;
                string::const_iterator close = find(start, body.end(), '>');
                // An unterminated comment swallows rest of document
                // (like Netscape, but unlike MSIE IIRC)
                if (close == body.end()) break;
                p = close;
                // look for -->
                while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-'))
                    p = find(p + 1, body.end(), '>');
                if (p != body.end()) {
                    // Check for htdig's "ignore this bit" comments.
                    if (p - start == 15 && string(start, p - 2) == "htdig_noindex") {
                        string::size_type i;
                        i = body.find("<!--/htdig_noindex-->", p + 1 - body.begin());
                        if (i == string::npos) break;
                        start = body.begin() + i + 21;
                        continue;
                    }
                    // If we found --> skip to there.
                    start = p;
                } else {
                    // Otherwise skip to the first > we found (as Netscape does).
                    start = close;
                }
            } else {
                // just an SGML declaration, perhaps giving the DTD - ignore it
                start = find(start - 1, body.end(), '>');
                if (start == body.end()) break;
            }
            ++start;
        } else if (*start == '?') {
            if (++start == body.end()) break;
            // PHP - swallow until ?> or EOF
            start = find(start + 1, body.end(), '>');
            // look for ?>
            while (start != body.end() && *(start - 1) != '?')
                start = find(start + 1, body.end(), '>');
            // unterminated PHP swallows rest of document (rather arbitrarily
            // but it avoids polluting the database when things go wrong)
            if (start != body.end()) ++start;
        } else {
            // opening or closing tag
            int closing = 0;
            if (*start == '/') {
                closing = 1;
                start = find_if(start + 1, body.end(), p_notwhitespace);
            }
            p = start;
            start = find_if(start, body.end(), p_nottag);
            string tag = body.substr(p - body.begin(), start - p);
            // convert tagname to lowercase
            lowercase_string(tag);
            if (closing) {
                if (!closing_tag(tag))
                    return;
                if (in_script && tag == "script") in_script = false;
                /* ignore any bogus parameters on closing tags */
                p = find(start, body.end(), '>');
                if (p == body.end()) break;
                start = p + 1;
            } else {
                bool empty_element = false;
                // FIXME: parse parameters lazily.
                while (start < body.end() && *start != '>') {
                    string name, value;
                    p = find_if(start, body.end(), p_whitespaceeqgt);
                    size_t name_len = p - start;
                    if (name_len == 1) {
                        if (*start == '/' && p < body.end() && *p == '>') {
                            // E.g. <tag foo="bar" />
                            start = p;
                            empty_element = true;
                            break;
                        }
                    }
                    name.assign(body, start - body.begin(), name_len);
                    p = find_if(p, body.end(), p_notwhitespace);
                    start = p;
                    if (start != body.end() && *start == '=') {
                        start = find_if(start + 1, body.end(), p_notwhitespace);
                        p = body.end();
                        int quote = *start;
                        if (quote == '"' || quote == '\'') {
                            start++;
                            p = find(start, body.end(), quote);
                        }
                        if (p == body.end()) {
                            // unquoted or no closing quote
                            p = find_if(start, body.end(), p_whitespacegt);
                        }
                        value.assign(body, start - body.begin(), p - start);
                        start = find_if(p, body.end(), p_notwhitespace);
                        if (!name.empty()) {
                            // convert parameter name to lowercase
                            lowercase_string(name);
                            // in case of multiple entries, use the first
                            // (as Netscape does)
                            parameters.insert(make_pair(name, value));
                        }
                    }
                }
 #if 0
-        cout << "<" << tag;
+                cout << "<" << tag;
-        map<string, string>::const_iterator x;
+                map<string, string>::const_iterator x;
-        for (x = parameters.begin(); x != parameters.end(); x++) {
+                for (x = parameters.begin(); x != parameters.end(); x++) {
-            cout << " " << x->first << "=\"" << x->second << "\"";
+                    cout << " " << x->first << "=\"" << x->second << "\"";
-        }
+                }
-        cout << ">\n";
+                cout << ">\n";
 #endif
-        if (!opening_tag(tag))
+                if (!opening_tag(tag))
-            return;
+                    return;
-        parameters.clear();
+                parameters.clear();
-        if (empty_element) {
+                if (empty_element) {
-            if (!closing_tag(tag))
+                    if (!closing_tag(tag))
-            return;
+                        return;
                }
                // In <script> tags we ignore opening tags to avoid problems
                // with "a<b".
                if (tag == "script") in_script = true;
                if (start != body.end() && *start == '>') ++start;
            }
        }
        // In <script> tags we ignore opening tags to avoid problems
        // with "a<b".
        if (tag == "script") in_script = true;
        if (start != body.end() && *start == '>') ++start;
        }
    }
    }
 }
--- a/src/internfile/htmlparse.h
+++ b/src/internfile/htmlparse.h
@ -32,17 +32,17 @@ using std::map;
 class HtmlParser {
    map<string, string> parameters;
-    protected:
+protected:
-        virtual void decode_entities(string &s);
+    virtual void decode_entities(string &s);
-        bool in_script;
+    bool in_script;
-        string charset;
+    string charset;
    static map<string, unsigned int> named_ents;
    bool get_parameter(const string & param, string & value) const;
-    public:
+public:
    virtual void process_text(const string &/*text*/) { }
    virtual bool opening_tag(const string &/*tag*/) { return true; }
-        virtual bool closing_tag(const string &/*tag*/) { return true; }
+    virtual bool closing_tag(const string &/*tag*/) { return true; }
    virtual void parse_html(const string &text);
    virtual void do_eof() {}
    HtmlParser();
--- a/src/internfile/mh_mbox.h
+++ b/src/internfile/mh_mbox.h
@ -39,8 +39,7 @@ public:
    virtual void clear_impl() override;
 protected:
-    virtual bool set_document_file_impl(const std::string&,
+    virtual bool set_document_file_impl(const std::string&, const std::string&) override;
                                        const std::string&) override;
    class Internal;
 private:
--- a/src/internfile/mh_null.h
+++ b/src/internfile/mh_null.h
@ -33,11 +33,10 @@
 /// Associated to application/x-zerosize, so use the following in mimeconf:
 ///    <mimetype> = internal application/x-zerosize
 class MimeHandlerNull : public RecollFilter {
- public:
+public:
    MimeHandlerNull(RclConfig *cnf, const std::string& id) 
-    : RecollFilter(cnf, id) {
+        : RecollFilter(cnf, id) {}
-    }
+    virtual ~MimeHandlerNull() = default;
    virtual ~MimeHandlerNull() {}
    MimeHandlerNull(const MimeHandlerNull&) = delete;
    MimeHandlerNull& operator=(const MimeHandlerNull&) = delete;
@ -45,14 +44,13 @@ class MimeHandlerNull : public RecollFilter {
        return true;
    }
-    virtual bool next_document() 
+    virtual bool next_document() {
-    {
+        if (m_havedoc == false)
-    if (m_havedoc == false)
+            return false;
-        return false;
+        m_havedoc = false; 
-    m_havedoc = false; 
+        m_metaData[cstr_dj_keycontent] = cstr_null;
-    m_metaData[cstr_dj_keycontent] = cstr_null;
+        m_metaData[cstr_dj_keymt] = cstr_textplain;
-    m_metaData[cstr_dj_keymt] = cstr_textplain;
+        return true;
    return true;
    }
 };
--- a/src/internfile/mh_symlink.h
+++ b/src/internfile/mh_symlink.h
@ -36,9 +36,8 @@
 class MimeHandlerSymlink : public RecollFilter {
 public:
    MimeHandlerSymlink(RclConfig *cnf, const std::string& id) 
-        : RecollFilter(cnf, id) {
+        : RecollFilter(cnf, id) {}
-    }
+    virtual ~MimeHandlerSymlink() = default;
    virtual ~MimeHandlerSymlink() {}
    MimeHandlerSymlink(const MimeHandlerSymlink&) = delete;
    MimeHandlerSymlink& operator=(const MimeHandlerSymlink&) = delete;