indents + use range-base for loops in extrameta.cpp

2022-09-22 17:10:07 +02:00 · 2022-09-22 17:10:07 +02:00 · 20c3a7ed12
commit 20c3a7ed12
parent 9e0018034c
6 changed files with 315 additions and 333 deletions
--- a/src/internfile/extrameta.cpp
+++ b/src/internfile/extrameta.cpp
@ -30,20 +30,18 @@ using std::string;
 using std::map;

 static void docfieldfrommeta(RclConfig* cfg, const string& name, 
-                 const string &value, Rcl::Doc& doc)
+                             const string &value, Rcl::Doc& doc)
 {
    string fieldname = cfg->fieldCanon(name);
-    LOGDEB0("Internfile:: setting [" << fieldname <<
-            "] from cmd/xattr value [" << value << "]\n");
+    LOGDEB0("Internfile:: setting [" << fieldname << "] from cmd/xattr value [" << value << "]\n");
    if (fieldname == cstr_dj_keymd) {
-    doc.dmtime = value;
+        doc.dmtime = value;
    } else {
-    doc.meta[fieldname] = value;
+        doc.meta[fieldname] = value;
    }
 }

-void reapXAttrs(const RclConfig* cfg, const string& path, 
-        map<string, string>& xfields)
+void reapXAttrs(const RclConfig* cfg, const string& path,  map<string, string>& xfields)
 {
    LOGDEB2("reapXAttrs: [" << path << "]\n");
 #ifndef _WIN32
@ -51,39 +49,35 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
    vector<string> xnames;
    if (!pxattr::list(path, &xnames)) {
        if (errno == ENOTSUP) {
-            LOGDEB("FileInterner::reapXattrs: pxattr::list: errno " <<
-                   errno << "\n");
+            LOGDEB("FileInterner::reapXattrs: pxattr::list: errno " << errno << "\n");
        } else {
-            LOGERR("FileInterner::reapXattrs: pxattr::list: errno " <<
-                   errno << "\n");
+            LOGSYSERR("FileInterner::reapXattrs", "pxattr::list", path);
        }
-    return;
+        return;
    }
    const map<string, string>& xtof = cfg->getXattrToField();

    // Record the xattrs: names found in the config are either skipped
    // or mapped depending if the translation is empty. Other names
    // are recorded as-is
-    for (vector<string>::const_iterator it = xnames.begin();
-     it != xnames.end(); it++) {
-    string key = *it;
-    map<string, string>::const_iterator mit = xtof.find(*it);
-    if (mit != xtof.end()) {
-        if (mit->second.empty()) {
-        continue;
-        } else {
-        key = mit->second;
+    for (const auto& xkey : xnames) {
+        string key = xkey;
+        auto mit = xtof.find(xkey);
+        if (mit != xtof.end()) {
+            if (mit->second.empty()) {
+                continue;
+            } else {
+                key = mit->second;
+            }
        }
-    }
-    string value;
-    if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) {
-        LOGERR("FileInterner::reapXattrs: pxattr::get failed for " << *it
-                   << ", errno " << errno << "\n");
-        continue;
-    }
-    // Encode should we ?
-    xfields[key] = value;
-    LOGDEB2("reapXAttrs: [" << key << "] -> [" << value << "]\n");
+        string value;
+        if (!pxattr::get(path, xkey, &value, pxattr::PXATTR_NOFOLLOW)) {
+            LOGSYSERR("FileInterner::reapXattrs", "pxattr::get", path + " : " + xkey);
+            continue;
+        }
+        // Encode should we ?
+        xfields[key] = value;
+        LOGDEB2("reapXAttrs: [" << key << "] -> [" << value << "]\n");
    }
 #else
    PRETEND_USE(cfg);
@ -92,35 +86,30 @@ void reapXAttrs(const RclConfig* cfg, const string& path,
 #endif
 }

-void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields, 
-             Rcl::Doc& doc)
+void docFieldsFromXattrs(RclConfig *cfg, const map<string, string>& xfields, Rcl::Doc& doc)
 {
-    for (map<string,string>::const_iterator it = xfields.begin(); 
-     it != xfields.end(); it++) {
-    docfieldfrommeta(cfg, it->first, it->second, doc);
+    for (const auto& fld : xfields) {
+        docfieldfrommeta(cfg, fld.first, fld.second, doc);
    }
 }

-void reapMetaCmds(RclConfig* cfg, const string& path, 
-          map<string, string>& cfields)
+void reapMetaCmds(RclConfig* cfg, const string& path, map<string, string>& cfields)
 {
-    const vector<MDReaper>& reapers = cfg->getMDReapers();
+    const auto& reapers = cfg->getMDReapers();
    if (reapers.empty())
-    return;
+        return;
    map<char,string> smap = {{'f', path}};
-    for (vector<MDReaper>::const_iterator rp = reapers.begin();
-     rp != reapers.end(); rp++) {
-    vector<string> cmd;
-    for (vector<string>::const_iterator it = rp->cmdv.begin();
-         it != rp->cmdv.end(); it++) {
-        string s;
-        pcSubst(*it, s, smap);
-        cmd.push_back(s);
-    }
-    string output;
-    if (ExecCmd::backtick(cmd, output)) {
-        cfields[rp->fieldname] =  output;
-    }
+    for (const auto& reaper : reapers) {
+        vector<string> cmd;
+        for (const auto& arg : reaper.cmdv) {
+            string s;
+            pcSubst(arg, s, smap);
+            cmd.push_back(s);
+        }
+        string output;
+        if (ExecCmd::backtick(cmd, output)) {
+            cfields[reaper.fieldname] =  output;
+        }
    }
 }

@ -132,26 +121,23 @@ void reapMetaCmds(RclConfig* cfg, const string& path,
 // "modificationdate" will set mtime instead of an ordinary field,
 // and the output from anything beginning with "rclmulti" will be
 // interpreted as multiple fields in configuration file format...
-void docFieldsFromMetaCmds(RclConfig *cfg, const map<string, string>& cfields, 
-               Rcl::Doc& doc)
+void docFieldsFromMetaCmds(RclConfig *cfg, const map<string, string>& cfields, Rcl::Doc& doc)
 {
-    for (map<string,string>::const_iterator it = cfields.begin(); 
-     it != cfields.end(); it++) {
-    if (!it->first.compare(0, 8, "rclmulti")) {
-        ConfSimple simple(it->second);
-        if (simple.ok()) {
-        vector<string> names = simple.getNames("");
-        for (vector<string>::const_iterator nm = names.begin(); 
-             nm != names.end(); nm++) {
-            string value;
-            if (simple.get(*nm, value)) {
-            docfieldfrommeta(cfg, *nm, value, doc);
+    for (const auto& cfld : cfields) {
+        if (!cfld.first.compare(0, 8, "rclmulti")) {
+            ConfSimple simple(cfld.second);
+            if (simple.ok()) {
+                auto names = simple.getNames("");
+                for (const auto& nm : names) {
+                    string value;
+                    if (simple.get(nm, value)) {
+                        docfieldfrommeta(cfg, nm, value, doc);
+                    }
+                }
            }
+        } else {
+            docfieldfrommeta(cfg, cfld.first, cfld.second, doc);
        }
-        }
-    } else {
-        docfieldfrommeta(cfg, it->first, it->second, doc);
-    }
    }
 }

--- a/src/internfile/htmlparse.cpp
+++ b/src/internfile/htmlparse.cpp
@ -34,7 +34,7 @@ inline void
 lowercase_string(string &str)
 {
    for (string::iterator i = str.begin(); i != str.end(); ++i) {
-    *i = tolower(static_cast<unsigned char>(*i));
+        *i = tolower(static_cast<unsigned char>(*i));
    }
 }

@ -68,7 +68,7 @@ inline static bool
 p_nottag(char c)
 {
    return !isalnum(static_cast<unsigned char>(c)) &&
-    c != '.' && c != '-' && c != ':'; // ':' for XML namespaces.
+        c != '.' && c != '-' && c != ':'; // ':' for XML namespaces.
 }

 inline static bool
@ -99,14 +99,14 @@ HtmlParser::HtmlParser()
 #if 0
    static const struct ent { const char *n; unsigned int v; } ents[] = {
 #include "namedentities.h"
-    { NULL, 0 }
+        { NULL, 0 }
    };
    if (named_ents.empty()) {
-    const struct ent *i = ents;
-    while (i->n) {
-        named_ents[string(i->n)] = i->v;
-        ++i;
-    }
+        const struct ent *i = ents;
+        while (i->n) {
+            named_ents[string(i->n)] = i->v;
+            ++i;
+        }
    }
 #endif
 }
@ -123,45 +123,45 @@ HtmlParser::decode_entities(string &)
    // find() and find_if() templates don't work...
    string::const_iterator amp = s.begin(), s_end = s.end();
    while ((amp = find(amp, s_end, '&')) != s_end) {
-    unsigned int val = 0;
-    string::const_iterator end, p = amp + 1;
-    if (p != s_end && *p == '#') {
-        p++;
-        if (p != s_end && (*p == 'x' || *p == 'X')) {
-        // hex
-        p++;
-        end = find_if(p, s_end, p_notxdigit);
-        sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val);
+        unsigned int val = 0;
+        string::const_iterator end, p = amp + 1;
+        if (p != s_end && *p == '#') {
+            p++;
+            if (p != s_end && (*p == 'x' || *p == 'X')) {
+                // hex
+                p++;
+                end = find_if(p, s_end, p_notxdigit);
+                sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val);
+            } else {
+                // number
+                end = find_if(p, s_end, p_notdigit);
+                val = atoi(s.substr(p - s.begin(), end - p).c_str());
+            }
        } else {
-        // number
-        end = find_if(p, s_end, p_notdigit);
-        val = atoi(s.substr(p - s.begin(), end - p).c_str());
+            end = find_if(p, s_end, p_notalnum);
+            string code = s.substr(p - s.begin(), end - p);
+            map<string, unsigned int>::const_iterator i;
+            i = named_ents.find(code);
+            if (i != named_ents.end()) val = i->second;
        }
-    } else {
-        end = find_if(p, s_end, p_notalnum);
-        string code = s.substr(p - s.begin(), end - p);
-        map<string, unsigned int>::const_iterator i;
-        i = named_ents.find(code);
-        if (i != named_ents.end()) val = i->second;
-    }
-    if (end < s_end && *end == ';') end++;
-    if (val) {
-        string::size_type amp_pos = amp - s.begin();
-        if (val < 0x80) {
-        s.replace(amp_pos, end - amp, 1u, char(val));
+        if (end < s_end && *end == ';') end++;
+        if (val) {
+            string::size_type amp_pos = amp - s.begin();
+            if (val < 0x80) {
+                s.replace(amp_pos, end - amp, 1u, char(val));
+            } else {
+                // Convert unicode value val to UTF-8.
+                char seq[4];
+                unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq);
+                s.replace(amp_pos, end - amp, seq, len);
+            }
+            s_end = s.end();
+            // We've modified the string, so the iterators are no longer
+            // valid...
+            amp = s.begin() + amp_pos + 1;
        } else {
-        // Convert unicode value val to UTF-8.
-        char seq[4];
-        unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq);
-        s.replace(amp_pos, end - amp, seq, len);
+            amp = end;
        }
-        s_end = s.end();
-        // We've modified the string, so the iterators are no longer
-        // valid...
-        amp = s.begin() + amp_pos + 1;
-    } else {
-        amp = end;
-    }
    }
 #endif
 }
@ -175,222 +175,222 @@ HtmlParser::parse_html(const string &body)
    string::const_iterator start = body.begin();

    while (true) {
-    // Skip through until we find an HTML tag, a comment, or the end of
-    // document.  Ignore isolated occurrences of `<' which don't start
-    // a tag or comment.    
-    string::const_iterator p = start;
-    while (true) {
-        p = find(p, body.end(), '<');
-        if (p == body.end()) break;
-        unsigned char ch = *(p + 1);
+        // Skip through until we find an HTML tag, a comment, or the end of
+        // document.  Ignore isolated occurrences of `<' which don't start
+        // a tag or comment.    
+        string::const_iterator p = start;
+        while (true) {
+            p = find(p, body.end(), '<');
+            if (p == body.end()) break;
+            unsigned char ch = *(p + 1);

-        // Tag, closing tag, or comment (or SGML declaration).
-        if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break;
+            // Tag, closing tag, or comment (or SGML declaration).
+            if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break;

-        if (ch == '?') {
-        // PHP code or XML declaration.
-        // XML declaration is only valid at the start of the first line.
-        // FIXME: need to deal with BOMs...
-        if (p != body.begin() || body.size() < 20) break;
+            if (ch == '?') {
+                // PHP code or XML declaration.
+                // XML declaration is only valid at the start of the first line.
+                // FIXME: need to deal with BOMs...
+                if (p != body.begin() || body.size() < 20) break;

-        // XML declaration looks something like this:
-        // <?xml version="1.0" encoding="UTF-8"?>
-        if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break;
-        if (strchr(" \t\r\n", p[5]) == NULL) break;
+                // XML declaration looks something like this:
+                // <?xml version="1.0" encoding="UTF-8"?>
+                if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break;
+                if (strchr(" \t\r\n", p[5]) == NULL) break;

-        string::const_iterator decl_end = find(p + 6, body.end(), '?');
-        if (decl_end == body.end()) break;
+                string::const_iterator decl_end = find(p + 6, body.end(), '?');
+                if (decl_end == body.end()) break;

-        // Default charset for XML is UTF-8.
-        charset = "utf-8";
+                // Default charset for XML is UTF-8.
+                charset = "utf-8";

-        string decl(p + 6, decl_end);
-        size_t enc = decl.find("encoding");
-        if (enc == string::npos) break;
+                string decl(p + 6, decl_end);
+                size_t enc = decl.find("encoding");
+                if (enc == string::npos) break;

-        enc = decl.find_first_not_of(" \t\r\n", enc + 8);
-        if (enc == string::npos || enc == decl.size()) break;
+                enc = decl.find_first_not_of(" \t\r\n", enc + 8);
+                if (enc == string::npos || enc == decl.size()) break;

-        if (decl[enc] != '=') break;
+                if (decl[enc] != '=') break;
        
-        enc = decl.find_first_not_of(" \t\r\n", enc + 1);
-        if (enc == string::npos || enc == decl.size()) break;
+                enc = decl.find_first_not_of(" \t\r\n", enc + 1);
+                if (enc == string::npos || enc == decl.size()) break;

-        if (decl[enc] != '"' && decl[enc] != '\'') break;
+                if (decl[enc] != '"' && decl[enc] != '\'') break;

-        char quote = decl[enc++];
-        size_t enc_end = decl.find(quote, enc);
+                char quote = decl[enc++];
+                size_t enc_end = decl.find(quote, enc);

-        if (enc != string::npos)
-            charset = decl.substr(enc, enc_end - enc);
+                if (enc != string::npos)
+                    charset = decl.substr(enc, enc_end - enc);

-        break;
-        }
-        p++;
-    }
-
-    // Process text up to start of tag.
-    if (p > start || p == body.end()) {
-        string text = body.substr(start - body.begin(), p - start);
-        decode_entities(text);
-        process_text(text);
-    }
-
-    if (p == body.end()) {
-        do_eof();
-        break;
-    }
-
-    start = p + 1;
-   
-    if (start == body.end()) break;
-
-    if (*start == '!') {
-        if (++start == body.end()) break;
-        if (++start == body.end()) break;
-        // comment or SGML declaration
-        if (*(start - 1) == '-' && *start == '-') {
-        ++start;
-        string::const_iterator close = find(start, body.end(), '>');
-        // An unterminated comment swallows rest of document
-        // (like Netscape, but unlike MSIE IIRC)
-        if (close == body.end()) break;
-
-        p = close;
-        // look for -->
-        while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-'))
-            p = find(p + 1, body.end(), '>');
-
-        if (p != body.end()) {
-            // Check for htdig's "ignore this bit" comments.
-            if (p - start == 15 && string(start, p - 2) == "htdig_noindex") {
-            string::size_type i;
-            i = body.find("<!--/htdig_noindex-->", p + 1 - body.begin());
-            if (i == string::npos) break;
-            start = body.begin() + i + 21;
-            continue;
-            }
-            // If we found --> skip to there.
-            start = p;
-        } else {
-            // Otherwise skip to the first > we found (as Netscape does).
-            start = close;
-        }
-        } else {
-        // just an SGML declaration, perhaps giving the DTD - ignore it
-        start = find(start - 1, body.end(), '>');
-        if (start == body.end()) break;
-        }
-        ++start;
-    } else if (*start == '?') {
-        if (++start == body.end()) break;
-        // PHP - swallow until ?> or EOF
-        start = find(start + 1, body.end(), '>');
-
-        // look for ?>
-        while (start != body.end() && *(start - 1) != '?')
-        start = find(start + 1, body.end(), '>');
-
-        // unterminated PHP swallows rest of document (rather arbitrarily
-        // but it avoids polluting the database when things go wrong)
-        if (start != body.end()) ++start;
-    } else {
-        // opening or closing tag
-        int closing = 0;
-
-        if (*start == '/') {
-        closing = 1;
-        start = find_if(start + 1, body.end(), p_notwhitespace);
-        }
-          
-        p = start;
-        start = find_if(start, body.end(), p_nottag);
-        string tag = body.substr(p - body.begin(), start - p);
-        // convert tagname to lowercase
-        lowercase_string(tag);
-
-        if (closing) {
-        if (!closing_tag(tag))
-            return;
-        if (in_script && tag == "script") in_script = false;
-
-        /* ignore any bogus parameters on closing tags */
-        p = find(start, body.end(), '>');
-        if (p == body.end()) break;
-        start = p + 1;
-        } else {
-        bool empty_element = false;
-        // FIXME: parse parameters lazily.
-        while (start < body.end() && *start != '>') {
-            string name, value;
-
-            p = find_if(start, body.end(), p_whitespaceeqgt);
-
-            size_t name_len = p - start;
-            if (name_len == 1) {
-            if (*start == '/' && p < body.end() && *p == '>') {
-                // E.g. <tag foo="bar" />
-                start = p;
-                empty_element = true;
                break;
            }
-            }
-
-            name.assign(body, start - body.begin(), name_len);
-
-            p = find_if(p, body.end(), p_notwhitespace);
-
-            start = p;
-            if (start != body.end() && *start == '=') {
-            start = find_if(start + 1, body.end(), p_notwhitespace);
-
-            p = body.end();
-
-            int quote = *start;
-            if (quote == '"' || quote == '\'') {
-                start++;
-                p = find(start, body.end(), quote);
-            }
-
-            if (p == body.end()) {
-                // unquoted or no closing quote
-                p = find_if(start, body.end(), p_whitespacegt);
-            }
-            value.assign(body, start - body.begin(), p - start);
-            start = find_if(p, body.end(), p_notwhitespace);
-
-            if (!name.empty()) {
-                // convert parameter name to lowercase
-                lowercase_string(name);
-                // in case of multiple entries, use the first
-                // (as Netscape does)
-                parameters.insert(make_pair(name, value));
-            }
-            }
+            p++;
        }
+
+        // Process text up to start of tag.
+        if (p > start || p == body.end()) {
+            string text = body.substr(start - body.begin(), p - start);
+            decode_entities(text);
+            process_text(text);
+        }
+
+        if (p == body.end()) {
+            do_eof();
+            break;
+        }
+
+        start = p + 1;
+   
+        if (start == body.end()) break;
+
+        if (*start == '!') {
+            if (++start == body.end()) break;
+            if (++start == body.end()) break;
+            // comment or SGML declaration
+            if (*(start - 1) == '-' && *start == '-') {
+                ++start;
+                string::const_iterator close = find(start, body.end(), '>');
+                // An unterminated comment swallows rest of document
+                // (like Netscape, but unlike MSIE IIRC)
+                if (close == body.end()) break;
+
+                p = close;
+                // look for -->
+                while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-'))
+                    p = find(p + 1, body.end(), '>');
+
+                if (p != body.end()) {
+                    // Check for htdig's "ignore this bit" comments.
+                    if (p - start == 15 && string(start, p - 2) == "htdig_noindex") {
+                        string::size_type i;
+                        i = body.find("<!--/htdig_noindex-->", p + 1 - body.begin());
+                        if (i == string::npos) break;
+                        start = body.begin() + i + 21;
+                        continue;
+                    }
+                    // If we found --> skip to there.
+                    start = p;
+                } else {
+                    // Otherwise skip to the first > we found (as Netscape does).
+                    start = close;
+                }
+            } else {
+                // just an SGML declaration, perhaps giving the DTD - ignore it
+                start = find(start - 1, body.end(), '>');
+                if (start == body.end()) break;
+            }
+            ++start;
+        } else if (*start == '?') {
+            if (++start == body.end()) break;
+            // PHP - swallow until ?> or EOF
+            start = find(start + 1, body.end(), '>');
+
+            // look for ?>
+            while (start != body.end() && *(start - 1) != '?')
+                start = find(start + 1, body.end(), '>');
+
+            // unterminated PHP swallows rest of document (rather arbitrarily
+            // but it avoids polluting the database when things go wrong)
+            if (start != body.end()) ++start;
+        } else {
+            // opening or closing tag
+            int closing = 0;
+
+            if (*start == '/') {
+                closing = 1;
+                start = find_if(start + 1, body.end(), p_notwhitespace);
+            }
+          
+            p = start;
+            start = find_if(start, body.end(), p_nottag);
+            string tag = body.substr(p - body.begin(), start - p);
+            // convert tagname to lowercase
+            lowercase_string(tag);
+
+            if (closing) {
+                if (!closing_tag(tag))
+                    return;
+                if (in_script && tag == "script") in_script = false;
+
+                /* ignore any bogus parameters on closing tags */
+                p = find(start, body.end(), '>');
+                if (p == body.end()) break;
+                start = p + 1;
+            } else {
+                bool empty_element = false;
+                // FIXME: parse parameters lazily.
+                while (start < body.end() && *start != '>') {
+                    string name, value;
+
+                    p = find_if(start, body.end(), p_whitespaceeqgt);
+
+                    size_t name_len = p - start;
+                    if (name_len == 1) {
+                        if (*start == '/' && p < body.end() && *p == '>') {
+                            // E.g. <tag foo="bar" />
+                            start = p;
+                            empty_element = true;
+                            break;
+                        }
+                    }
+
+                    name.assign(body, start - body.begin(), name_len);
+
+                    p = find_if(p, body.end(), p_notwhitespace);
+
+                    start = p;
+                    if (start != body.end() && *start == '=') {
+                        start = find_if(start + 1, body.end(), p_notwhitespace);
+
+                        p = body.end();
+
+                        int quote = *start;
+                        if (quote == '"' || quote == '\'') {
+                            start++;
+                            p = find(start, body.end(), quote);
+                        }
+
+                        if (p == body.end()) {
+                            // unquoted or no closing quote
+                            p = find_if(start, body.end(), p_whitespacegt);
+                        }
+                        value.assign(body, start - body.begin(), p - start);
+                        start = find_if(p, body.end(), p_notwhitespace);
+
+                        if (!name.empty()) {
+                            // convert parameter name to lowercase
+                            lowercase_string(name);
+                            // in case of multiple entries, use the first
+                            // (as Netscape does)
+                            parameters.insert(make_pair(name, value));
+                        }
+                    }
+                }
 #if 0
-        cout << "<" << tag;
-        map<string, string>::const_iterator x;
-        for (x = parameters.begin(); x != parameters.end(); x++) {
-            cout << " " << x->first << "=\"" << x->second << "\"";
-        }
-        cout << ">\n";
+                cout << "<" << tag;
+                map<string, string>::const_iterator x;
+                for (x = parameters.begin(); x != parameters.end(); x++) {
+                    cout << " " << x->first << "=\"" << x->second << "\"";
+                }
+                cout << ">\n";
 #endif
-        if (!opening_tag(tag))
-            return;
-        parameters.clear();
+                if (!opening_tag(tag))
+                    return;
+                parameters.clear();

-        if (empty_element) {
-            if (!closing_tag(tag))
-            return;
+                if (empty_element) {
+                    if (!closing_tag(tag))
+                        return;
+                }
+
+                // In <script> tags we ignore opening tags to avoid problems
+                // with "a<b".
+                if (tag == "script") in_script = true;
+
+                if (start != body.end() && *start == '>') ++start;
+            }
        }
-
-        // In <script> tags we ignore opening tags to avoid problems
-        // with "a<b".
-        if (tag == "script") in_script = true;
-
-        if (start != body.end() && *start == '>') ++start;
-        }
-    }
    }
 }
--- a/src/internfile/htmlparse.h
+++ b/src/internfile/htmlparse.h
@ -32,17 +32,17 @@ using std::map;

 class HtmlParser {
    map<string, string> parameters;
-    protected:
-        virtual void decode_entities(string &s);
-        bool in_script;
-        string charset;
+protected:
+    virtual void decode_entities(string &s);
+    bool in_script;
+    string charset;
    static map<string, unsigned int> named_ents;

    bool get_parameter(const string & param, string & value) const;
-    public:
+public:
    virtual void process_text(const string &/*text*/) { }
    virtual bool opening_tag(const string &/*tag*/) { return true; }
-        virtual bool closing_tag(const string &/*tag*/) { return true; }
+    virtual bool closing_tag(const string &/*tag*/) { return true; }
    virtual void parse_html(const string &text);
    virtual void do_eof() {}
    HtmlParser();
--- a/src/internfile/mh_mbox.h
+++ b/src/internfile/mh_mbox.h
@ -39,8 +39,7 @@ public:
    virtual void clear_impl() override;

 protected:
-    virtual bool set_document_file_impl(const std::string&,
-                                        const std::string&) override;
+    virtual bool set_document_file_impl(const std::string&, const std::string&) override;

    class Internal;
 private:
--- a/src/internfile/mh_null.h
+++ b/src/internfile/mh_null.h
@ -33,11 +33,10 @@
 /// Associated to application/x-zerosize, so use the following in mimeconf:
 ///    <mimetype> = internal application/x-zerosize
 class MimeHandlerNull : public RecollFilter {
- public:
+public:
    MimeHandlerNull(RclConfig *cnf, const std::string& id) 
-    : RecollFilter(cnf, id) {
-    }
-    virtual ~MimeHandlerNull() {}
+        : RecollFilter(cnf, id) {}
+    virtual ~MimeHandlerNull() = default;
    MimeHandlerNull(const MimeHandlerNull&) = delete;
    MimeHandlerNull& operator=(const MimeHandlerNull&) = delete;

@ -45,14 +44,13 @@ class MimeHandlerNull : public RecollFilter {
        return true;
    }
    
-    virtual bool next_document() 
-    {
-    if (m_havedoc == false)
-        return false;
-    m_havedoc = false; 
-    m_metaData[cstr_dj_keycontent] = cstr_null;
-    m_metaData[cstr_dj_keymt] = cstr_textplain;
-    return true;
+    virtual bool next_document() {
+        if (m_havedoc == false)
+            return false;
+        m_havedoc = false; 
+        m_metaData[cstr_dj_keycontent] = cstr_null;
+        m_metaData[cstr_dj_keymt] = cstr_textplain;
+        return true;
    }
 };

--- a/src/internfile/mh_symlink.h
+++ b/src/internfile/mh_symlink.h
@ -36,9 +36,8 @@
 class MimeHandlerSymlink : public RecollFilter {
 public:
    MimeHandlerSymlink(RclConfig *cnf, const std::string& id) 
-        : RecollFilter(cnf, id) {
-    }
-    virtual ~MimeHandlerSymlink() {}
+        : RecollFilter(cnf, id) {}
+    virtual ~MimeHandlerSymlink() = default;
    MimeHandlerSymlink(const MimeHandlerSymlink&) = delete;
    MimeHandlerSymlink& operator=(const MimeHandlerSymlink&) = delete;