indents and readability

2021-11-02 12:05:04 +01:00 · 2021-11-02 12:05:04 +01:00 · a24fc7bacc
commit a24fc7bacc
parent 4cc0bc90b6
5 changed files with 715 additions and 749 deletions
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@ -163,6 +163,9 @@ private:
    // Current span. Might be jf.dockes@wanadoo.f
    std::string        m_span; 
    // Words in span: byte positions of start and end of words in m_span. For example:
    // 0   4    9
    // bill@some.com -> (0,4) (5,9) (10,13)
    std::vector <std::pair<int, int> > m_words_in_span;
    // Current word: no punctuation at all in there. Byte offset
--- a/src/common/unacpp.cpp
+++ b/src/common/unacpp.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2019 J.F.Dockes
+/* Copyright (C) 2004-2021 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -37,16 +37,13 @@ bool unacmaybefold(const string &in, string &out,
    switch (what) {
    case UNACOP_UNAC:
-        status = unac_string(encoding, in.c_str(), in.length(), 
+        status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
                             &cout, &out_len);
        break;
    case UNACOP_UNACFOLD:
-        status = unacfold_string(encoding, in.c_str(), in.length(), 
+        status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
                                 &cout, &out_len);
        break;
    case UNACOP_FOLD:
-        status = fold_string(encoding, in.c_str(), in.length(), 
+        status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
                             &cout, &out_len);
        break;
    }
--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2005 J.F.Dockes
+/* Copyright (C) 2005-2021 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -78,8 +78,7 @@ public:
        string dumb = term;
        if (o_index_stripchars) {
            if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
-                LOGINFO("PlainToRich::takeword: unac failed for [" << term <<
+                LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n");
                        "]\n");
                return true;
            }
        }
@ -173,30 +172,25 @@ static string activate_urls(const string& in)
 }
 #endif
-// Fix result text for display inside the gui text window.
+// Enrich result text for display inside the gui text window.
 //
-// We call overridden functions to output header data, beginnings and ends of
+// We call overridden functions to output header data, beginnings and ends of matches etc.
 // matches etc.
 //
-// If the input is text, we output the result in chunks, arranging not
+// If the input is text, we output the result in chunks, arranging not to cut in the middle of a
-// to cut in the middle of a tag, which would confuse qtextedit. If
+// tag, which would confuse qtextedit. If the input is html, the body is always a single output
-// the input is html, the body is always a single output chunk.
+// chunk.
-bool PlainToRich::plaintorich(const string& in, 
+bool PlainToRich::plaintorich(
-                              list<string>& out, // Output chunk list
+    const string& in, list<string>& out, const HighlightData& hdata, int chunksize)
                              const HighlightData& hdata,
                              int chunksize)
 {
    Chrono chron;
    bool ret = true;
    LOGDEB1("plaintorichich: in: [" << in << "]\n");
    m_hdata = &hdata;
-    // Compute the positions for the query terms.  We use the text
+    // Compute the positions for the query terms.  We use the text splitter to break the text into
-    // splitter to break the text into words, and compare the words to
+    // words, and compare the words to the search terms,
    // the search terms,
    TextSplitPTR splitter(hdata);
-    // Note: the splitter returns the term locations in byte, not
+    // Note: the splitter returns the term locations in byte, not character, offsets.
    // character, offsets.
    splitter.text_to_words(in);
    LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
    // Compute the positions for NEAR and PHRASE groups.
@ -205,7 +199,7 @@ bool PlainToRich::plaintorich(const string& in,
    out.clear();
    out.push_back("");
-    list<string>::iterator olit = out.begin();
+    auto olit = out.begin();
    // Rich text output
    *olit = header();
@ -225,9 +219,10 @@ bool PlainToRich::plaintorich(const string& in,
    vector<GroupMatchEntry>::iterator tPosEnd = splitter.m_tboffs.end();
 #if 0
-    for (vector<pair<int, int> >::const_iterator it = splitter.m_tboffs.begin();
+    for (const auto& region : splitter.m_tboffs) {
-         it != splitter.m_tboffs.end(); it++) {
+        auto st = region.offs.first;
-        LOGDEB2("plaintorich: region: " << it->first << " "<<it->second<< "\n");
+        auto nd = region.offs.second;
        LOGDEB0("plaintorich: region: " << st << " " << nd << "\n");
    }
 #endif
@ -276,8 +271,7 @@ bool PlainToRich::plaintorich(const string& in,
                }
                // Skip all highlight areas that would overlap this one
                int crend = tPosIt->offs.second;
-                while (tPosIt != splitter.m_tboffs.end() && 
+                while (tPosIt != splitter.m_tboffs.end() && tPosIt->offs.first < crend)
                       tPosIt->offs.first < crend)
                    tPosIt++;
                inrcltag = 0;
            }
--- a/src/unac/unac.c
+++ b/src/unac/unac.c
@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
    return 0;
 }
-int unac_string_utf16(const char* in, size_t in_length,
+int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
              char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
                      outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string_utf16(const char* in, size_t in_length,
+int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
              char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
                      outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string_utf16(const char* in, size_t in_length,
+int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
              char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
                      outp, out_lengthp, UNAC_FOLD);
 }
 static const char *utf16be = "UTF-16BE";
@ -14421,7 +14415,8 @@ static int convert(const char* from, const char* to,
                if (from_utf16) {
                    const char* tmp = space;
                    size_t tmp_length = 2;
-      if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
+                    if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
                        (size_t)-1) {
                        if(errno == E2BIG) {
                            /* fall thru to the E2BIG case below */;
                        } else {
@ -14481,8 +14476,7 @@ out:
    return ret;
 }
-int unacmaybefold_string(const char* charset,
+int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
             const char* in, size_t in_length,
                         char** outp, size_t* out_lengthp, int what)
 {
    /*
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
    return 0;
 }
-int unac_string(const char* charset,
+int unac_string(
-        const char* in, size_t in_length,
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
        char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
                outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string(const char* charset,
+int unacfold_string(
-            const char* in, size_t in_length,
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
            char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
                outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string(const char* charset,
+int fold_string(
-            const char* in, size_t in_length,
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
            char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
                outp, out_lengthp, UNAC_FOLD);
 }
 const char* unac_version(void)
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
    vector<string> vtrans;
    stringToStrings(spectrans, vtrans);
-    for (vector<string>::iterator it = vtrans.begin();
+    for (const auto& trans : vtrans) {
     it != vtrans.end(); it++) {
        /* Convert the whole thing to utf-16be/le according to endianness */
        char *out = 0;
        size_t outsize;
-    if (convert("UTF-8", machinecoding,
+        if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
-            it->c_str(), it->size(),
+            outsize < 2)
            &out, &outsize) != 0 || outsize < 2)
            continue;
        /* The source char must be utf-16be as this is what we convert the
--- a/unac/unac.c
+++ b/unac/unac.c
@ -13,7 +13,7 @@
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 #ifdef BUILDING_RECOLL
@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
    return 0;
 }
-int unac_string_utf16(const char* in, size_t in_length,
+int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
              char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
                      outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string_utf16(const char* in, size_t in_length,
+int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
              char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
                      outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string_utf16(const char* in, size_t in_length,
+int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
              char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
                      outp, out_lengthp, UNAC_FOLD);
 }
 static const char *utf16be = "UTF-16BE";
@ -14421,7 +14415,8 @@ static int convert(const char* from, const char* to,
                if (from_utf16) {
                    const char* tmp = space;
                    size_t tmp_length = 2;
-      if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
+                    if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
                        (size_t)-1) {
                        if(errno == E2BIG) {
                            /* fall thru to the E2BIG case below */;
                        } else {
@ -14481,8 +14476,7 @@ out:
    return ret;
 }
-int unacmaybefold_string(const char* charset,
+int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
             const char* in, size_t in_length,
                         char** outp, size_t* out_lengthp, int what)
 {
    /*
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
    return 0;
 }
-int unac_string(const char* charset,
+int unac_string(
-        const char* in, size_t in_length,
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
        char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
                outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string(const char* charset,
+int unacfold_string(
-            const char* in, size_t in_length,
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
            char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
                outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string(const char* charset,
+int fold_string(
-            const char* in, size_t in_length,
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
            char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
                outp, out_lengthp, UNAC_FOLD);
 }
 const char* unac_version(void)
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
    vector<string> vtrans;
    stringToStrings(spectrans, vtrans);
-    for (vector<string>::iterator it = vtrans.begin();
+    for (const auto& trans : vtrans) {
     it != vtrans.end(); it++) {
        /* Convert the whole thing to utf-16be/le according to endianness */
        char *out = 0;
        size_t outsize;
-    if (convert("UTF-8", machinecoding,
+        if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
-            it->c_str(), it->size(),
+            outsize < 2)
            &out, &outsize) != 0 || outsize < 2)
            continue;
        /* The source char must be utf-16be as this is what we convert the