indents and readability

2021-11-02 12:05:04 +01:00 · 2021-11-02 12:05:04 +01:00 · a24fc7bacc
commit a24fc7bacc
parent 4cc0bc90b6
5 changed files with 715 additions and 749 deletions
--- a/src/common/textsplit.h
+++ b/src/common/textsplit.h
@ -163,6 +163,9 @@ private:
    // Current span. Might be jf.dockes@wanadoo.f
    std::string        m_span; 

+    // Words in span: byte positions of start and end of words in m_span. For example:
+    // 0   4    9
+    // bill@some.com -> (0,4) (5,9) (10,13)
    std::vector <std::pair<int, int> > m_words_in_span;

    // Current word: no punctuation at all in there. Byte offset
--- a/src/common/unacpp.cpp
+++ b/src/common/unacpp.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2019 J.F.Dockes
+/* Copyright (C) 2004-2021 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -29,7 +29,7 @@
 using namespace std;

 bool unacmaybefold(const string &in, string &out,
-           const char *encoding, UnacOp what)
+                   const char *encoding, UnacOp what)
 {
    char *cout = 0;
    size_t out_len;
@ -37,16 +37,13 @@ bool unacmaybefold(const string &in, string &out,

    switch (what) {
    case UNACOP_UNAC:
-        status = unac_string(encoding, in.c_str(), in.length(), 
-                             &cout, &out_len);
+        status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
        break;
    case UNACOP_UNACFOLD:
-        status = unacfold_string(encoding, in.c_str(), in.length(), 
-                                 &cout, &out_len);
+        status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
        break;
    case UNACOP_FOLD:
-        status = fold_string(encoding, in.c_str(), in.length(), 
-                             &cout, &out_len);
+        status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
        break;
    }

--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@ -1,4 +1,4 @@
-/* Copyright (C) 2005 J.F.Dockes
+/* Copyright (C) 2005-2021 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
@ -78,8 +78,7 @@ public:
        string dumb = term;
        if (o_index_stripchars) {
            if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
-                LOGINFO("PlainToRich::takeword: unac failed for [" << term <<
-                        "]\n");
+                LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n");
                return true;
            }
        }
@ -173,30 +172,25 @@ static string activate_urls(const string& in)
 }
 #endif

-// Fix result text for display inside the gui text window.
+// Enrich result text for display inside the gui text window.
 //
-// We call overridden functions to output header data, beginnings and ends of
-// matches etc.
+// We call overridden functions to output header data, beginnings and ends of matches etc.
 //
-// If the input is text, we output the result in chunks, arranging not
-// to cut in the middle of a tag, which would confuse qtextedit. If
-// the input is html, the body is always a single output chunk.
-bool PlainToRich::plaintorich(const string& in, 
-                              list<string>& out, // Output chunk list
-                              const HighlightData& hdata,
-                              int chunksize)
+// If the input is text, we output the result in chunks, arranging not to cut in the middle of a
+// tag, which would confuse qtextedit. If the input is html, the body is always a single output
+// chunk.
+bool PlainToRich::plaintorich(
+    const string& in, list<string>& out, const HighlightData& hdata, int chunksize)
 {
    Chrono chron;
    bool ret = true;
    LOGDEB1("plaintorichich: in: [" << in << "]\n");

    m_hdata = &hdata;
-    // Compute the positions for the query terms.  We use the text
-    // splitter to break the text into words, and compare the words to
-    // the search terms,
+    // Compute the positions for the query terms.  We use the text splitter to break the text into
+    // words, and compare the words to the search terms,
    TextSplitPTR splitter(hdata);
-    // Note: the splitter returns the term locations in byte, not
-    // character, offsets.
+    // Note: the splitter returns the term locations in byte, not character, offsets.
    splitter.text_to_words(in);
    LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
    // Compute the positions for NEAR and PHRASE groups.
@ -205,7 +199,7 @@ bool PlainToRich::plaintorich(const string& in,

    out.clear();
    out.push_back("");
-    list<string>::iterator olit = out.begin();
+    auto olit = out.begin();

    // Rich text output
    *olit = header();
@ -225,9 +219,10 @@ bool PlainToRich::plaintorich(const string& in,
    vector<GroupMatchEntry>::iterator tPosEnd = splitter.m_tboffs.end();

 #if 0
-    for (vector<pair<int, int> >::const_iterator it = splitter.m_tboffs.begin();
-         it != splitter.m_tboffs.end(); it++) {
-        LOGDEB2("plaintorich: region: " << it->first << " "<<it->second<< "\n");
+    for (const auto& region : splitter.m_tboffs) {
+        auto st = region.offs.first;
+        auto nd = region.offs.second;
+        LOGDEB0("plaintorich: region: " << st << " " << nd << "\n");
    }
 #endif

@ -276,8 +271,7 @@ bool PlainToRich::plaintorich(const string& in,
                }
                // Skip all highlight areas that would overlap this one
                int crend = tPosIt->offs.second;
-                while (tPosIt != splitter.m_tboffs.end() && 
-                       tPosIt->offs.first < crend)
+                while (tPosIt != splitter.m_tboffs.end() && tPosIt->offs.first < crend)
                    tPosIt++;
                inrcltag = 0;
            }
--- a/src/unac/unac.c
+++ b/src/unac/unac.c
@ -14109,7 +14109,7 @@ static int debug_level = UNAC_DEBUG_LOW;
 */
 static void debug_doprint_default(const char* message, void* data)
 {
-  fprintf(stderr, "%s", message);
+    fprintf(stderr, "%s", message);
 }

 /*
@ -14130,30 +14130,30 @@ static void* debug_appdata = (void*)0;
 static void debug_print(const char* message, ...)
 {
 #define UNAC_MAXIMUM_MESSAGE_SIZE 512
-  /*
-   * UNAC_MAXIMUM_MESSAGE_SIZE is supposedly enough but I
-   * do trust some vsnprintf implementations to be bugous.
-   */
-  char unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE+1] = { '\0' };
-  va_list args;
-  va_start(args, message);
-  if(vsnprintf(unac_message_buffer, UNAC_MAXIMUM_MESSAGE_SIZE, message, args) < 0) {
-    char tmp[UNAC_MAXIMUM_MESSAGE_SIZE];
-    sprintf(tmp, "[message larger than %d, truncated]", UNAC_MAXIMUM_MESSAGE_SIZE);
-    debug_doprint(tmp, debug_appdata);
-  }
-  va_end(args);
-  unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE] = '\0';
+    /*
+     * UNAC_MAXIMUM_MESSAGE_SIZE is supposedly enough but I
+     * do trust some vsnprintf implementations to be bugous.
+     */
+    char unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE+1] = { '\0' };
+    va_list args;
+    va_start(args, message);
+    if(vsnprintf(unac_message_buffer, UNAC_MAXIMUM_MESSAGE_SIZE, message, args) < 0) {
+        char tmp[UNAC_MAXIMUM_MESSAGE_SIZE];
+        sprintf(tmp, "[message larger than %d, truncated]", UNAC_MAXIMUM_MESSAGE_SIZE);
+        debug_doprint(tmp, debug_appdata);
+    }
+    va_end(args);
+    unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE] = '\0';

-  debug_doprint(unac_message_buffer, debug_appdata);
+    debug_doprint(unac_message_buffer, debug_appdata);
 }

 void unac_debug_callback(int level, unac_debug_print_t function, void* data)
 {
-  debug_level = level;
-  if(function)
-    debug_doprint = function;
-  debug_appdata = data;
+    debug_level = level;
+    if(function)
+        debug_doprint = function;
+    debug_appdata = data;
 }

 #else /* UNAC_DEBUG_AVAILABLE */
@ -14167,146 +14167,140 @@ void unac_debug_callback(int level, unac_debug_print_t function, void* data)
 #define UNAC_FOLD 2

 int unacmaybefold_string_utf16(const char* in, size_t in_length,
-                   char** outp, size_t* out_lengthp, int what)
+                               char** outp, size_t* out_lengthp, int what)
 {
-  char* out;
-  size_t out_size;
-  size_t out_length;
-  size_t i;
-
-  out_size = in_length > 0 ? in_length : 1024;
-
-  out = *outp;
-  out = (char*)realloc(out, out_size + 1);
-  if(out == 0) {
-      if(debug_level >= UNAC_DEBUG_LOW)
-      DEBUG("realloc %d bytes failed\n", out_size+1);
-      /* *outp is still valid. Let the caller free it */
-      return -1;
-  }
-
-  out_length = 0;
-
-  for(i = 0; i < in_length; i += 2) {
-    unsigned short c;
-    unsigned short* p;
-    size_t l;
-    size_t k;
-    c = (in[i] << 8) | (in[i + 1] & 0xff);
-    /*
-     * Lookup the tables for decomposition information
-     */
-#ifdef BUILDING_RECOLL
-    // Exception unac/fold values set by user. There should be 3 arrays for
-    // unac/fold/unac+fold. For now there is only one array, which used to
-    // be set for unac+fold, and is mostly or only used to prevent diacritics
-    // removal for some chars and languages where it should not be done.
-    // In conformance with current usage, but incorrectly, we do the following
-    // things for the special chars depending on the operation requested:
-    //   - unaccenting: do nothing (copy original char)
-    //   - unac+fold: use table
-    //   - fold: use the unicode data.
-    string trans;
-    if (what != UNAC_FOLD && except_trans.size() != 0 && 
-    is_except_char(c, trans)) {
-    if (what == UNAC_UNAC) {
-        // Unaccent only. Do nothing
-        p = 0;
-        l = 0;
-    } else {
-        // Has to be UNAC_UNACFOLD: use table
-        p = (unsigned short *)trans.c_str();
-        l = trans.size() / 2;
-    }
-    } else {
-#endif /* BUILDING_RECOLL */
-    unac_uf_char_utf16_(c, p, l, what)
-#ifdef BUILDING_RECOLL
-    }
-#endif /* BUILDING_RECOLL */
-
-    /*
-     * Explain what's done in great detail
-     */
-    if(debug_level == UNAC_DEBUG_HIGH) {
-      unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT];
-      unsigned char position = (c) & UNAC_BLOCK_MASK;
-      DEBUG("unac_data%d[%d] & unac_positions[%d][%d]: ", index, unac_positions[index][position], index, position+1);
-      DEBUG_APPEND("0x%04x => ", (c));
-      if(l == 0) {
-    DEBUG_APPEND("untouched\n");
-      } else {
+    char* out;
+    size_t out_size;
+    size_t out_length;
    size_t i;
-    for(i = 0; i < l; i++)
-      DEBUG_APPEND("0x%04x ", p[i]);
-    DEBUG_APPEND("\n");
-      }
+
+    out_size = in_length > 0 ? in_length : 1024;
+
+    out = *outp;
+    out = (char*)realloc(out, out_size + 1);
+    if(out == 0) {
+        if(debug_level >= UNAC_DEBUG_LOW)
+            DEBUG("realloc %d bytes failed\n", out_size+1);
+        /* *outp is still valid. Let the caller free it */
+        return -1;
    }

-    /*
-     * Make sure there is enough space to hold the decomposition
-     * Note: a previous realloc may have succeeded, which means that *outp 
-     * is not valid any more. We have to do the freeing and zero out *outp
-     */
-    if(out_length + ((l + 1) * 2) > out_size) {
-      char *saved;
-      out_size += ((l + 1) * 2) + 1024;
-      saved = out;
-      out = (char *)realloc(out, out_size);
-      if(out == 0) {
-    if(debug_level >= UNAC_DEBUG_LOW)
-      DEBUG("realloc %d bytes failed\n", out_size);
-        free(saved);
-    *outp = 0;
-    return -1;
-      }
-    }
-    if(l > 0) {
-    /* l == 1 && *p == 0 is the special case generated for
-       mark characters (which may be found if the input is
-       already in decomposed form. Output nothing */
-    if (l != 1 || *p != 0) {
+    out_length = 0;
+
+    for(i = 0; i < in_length; i += 2) {
+        unsigned short c;
+        unsigned short* p;
+        size_t l;
+        size_t k;
+        c = (in[i] << 8) | (in[i + 1] & 0xff);
        /*
-         * If there is a decomposition, insert it in the output 
-         * string.
+         * Lookup the tables for decomposition information
         */
-        for(k = 0; k < l; k++) {
-        out[out_length++] = (p[k] >> 8) & 0xff;
-        out[out_length++] = (p[k] & 0xff);
+#ifdef BUILDING_RECOLL
+        // Exception unac/fold values set by user. There should be 3 arrays for
+        // unac/fold/unac+fold. For now there is only one array, which used to
+        // be set for unac+fold, and is mostly or only used to prevent diacritics
+        // removal for some chars and languages where it should not be done.
+        // In conformance with current usage, but incorrectly, we do the following
+        // things for the special chars depending on the operation requested:
+        //   - unaccenting: do nothing (copy original char)
+        //   - unac+fold: use table
+        //   - fold: use the unicode data.
+        string trans;
+        if (what != UNAC_FOLD && except_trans.size() != 0 && 
+            is_except_char(c, trans)) {
+            if (what == UNAC_UNAC) {
+                // Unaccent only. Do nothing
+                p = 0;
+                l = 0;
+            } else {
+                // Has to be UNAC_UNACFOLD: use table
+                p = (unsigned short *)trans.c_str();
+                l = trans.size() / 2;
+            }
+        } else {
+#endif /* BUILDING_RECOLL */
+            unac_uf_char_utf16_(c, p, l, what)
+#ifdef BUILDING_RECOLL
+                }
+#endif /* BUILDING_RECOLL */
+
+        /*
+         * Explain what's done in great detail
+         */
+        if(debug_level == UNAC_DEBUG_HIGH) {
+            unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT];
+            unsigned char position = (c) & UNAC_BLOCK_MASK;
+            DEBUG("unac_data%d[%d] & unac_positions[%d][%d]: ", index, unac_positions[index][position], index, position+1);
+            DEBUG_APPEND("0x%04x => ", (c));
+            if(l == 0) {
+                DEBUG_APPEND("untouched\n");
+            } else {
+                size_t i;
+                for(i = 0; i < l; i++)
+                    DEBUG_APPEND("0x%04x ", p[i]);
+                DEBUG_APPEND("\n");
+            }
+        }
+
+        /*
+         * Make sure there is enough space to hold the decomposition
+         * Note: a previous realloc may have succeeded, which means that *outp 
+         * is not valid any more. We have to do the freeing and zero out *outp
+         */
+        if(out_length + ((l + 1) * 2) > out_size) {
+            char *saved;
+            out_size += ((l + 1) * 2) + 1024;
+            saved = out;
+            out = (char *)realloc(out, out_size);
+            if(out == 0) {
+                if(debug_level >= UNAC_DEBUG_LOW)
+                    DEBUG("realloc %d bytes failed\n", out_size);
+                free(saved);
+                *outp = 0;
+                return -1;
+            }
+        }
+        if(l > 0) {
+            /* l == 1 && *p == 0 is the special case generated for
+               mark characters (which may be found if the input is
+               already in decomposed form. Output nothing */
+            if (l != 1 || *p != 0) {
+                /*
+                 * If there is a decomposition, insert it in the output 
+                 * string.
+                 */
+                for(k = 0; k < l; k++) {
+                    out[out_length++] = (p[k] >> 8) & 0xff;
+                    out[out_length++] = (p[k] & 0xff);
+                }
+            }
+        } else {
+            /*
+             * If there is no decomposition leave it unchanged
+             */
+            out[out_length++] = in[i];
+            out[out_length++] = in[i + 1];
        }
    }
-    } else {
-      /*
-       * If there is no decomposition leave it unchanged
-       */
-      out[out_length++] = in[i];
-      out[out_length++] = in[i + 1];
-    }
-  }

-  *outp = out;
-  *out_lengthp = out_length;
-  (*outp)[*out_lengthp] = '\0';
+    *outp = out;
+    *out_lengthp = out_length;
+    (*outp)[*out_lengthp] = '\0';

-  return 0;
+    return 0;
 }
-int unac_string_utf16(const char* in, size_t in_length,
-              char** outp, size_t* out_lengthp)
+int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
-                      outp, out_lengthp, UNAC_UNAC);
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string_utf16(const char* in, size_t in_length,
-              char** outp, size_t* out_lengthp)
+int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
-                      outp, out_lengthp, UNAC_UNACFOLD);
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string_utf16(const char* in, size_t in_length,
-              char** outp, size_t* out_lengthp)
+int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
-                      outp, out_lengthp, UNAC_FOLD);
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
 }

 static const char *utf16be = "UTF-16BE";
@ -14322,229 +14316,223 @@ static std::mutex o_unac_mutex;
 * The out string is always null terminated.
 */
 static int convert(const char* from, const char* to,
-           const char* in, size_t in_length,
-           char** outp, size_t* out_lengthp)
+                   const char* in, size_t in_length,
+                   char** outp, size_t* out_lengthp)
 {
-  int ret = -1;
-  iconv_t cd;
-  char* out;
-  size_t out_remain;
-  size_t out_size;
-  char* out_base;
-  int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
-  const char space[] = { 0x00, 0x20 };
+    int ret = -1;
+    iconv_t cd;
+    char* out;
+    size_t out_remain;
+    size_t out_size;
+    char* out_base;
+    int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
+    const char space[] = { 0x00, 0x20 };

-  std::unique_lock<std::mutex> lock(o_unac_mutex);
+    std::unique_lock<std::mutex> lock(o_unac_mutex);

-  if (!strcmp(utf16be, from)) {
-      from_utf8 = 0;
-      from_utf16 = 1;
-  } else if (!strcasecmp("UTF-8", from)) {
-      from_utf8 = 1;
-      from_utf16 = 0;
-  } else {
-      from_utf8 = from_utf16 = 0;
-  }
-  if (!strcmp(utf16be, to)) {
-      to_utf8 = 0;
-      to_utf16 = 1;
-  } else if (!strcasecmp("UTF-8", to)) {
-      to_utf8 = 1;
-      to_utf16 = 0;
-  } else {
-      to_utf8 = to_utf16 = 0;
-  }
-  u16tou8 = from_utf16 && to_utf8;
-  u8tou16 = from_utf8 && to_utf16;
-
-  out_size = in_length > 0 ? in_length : 1024;
-
-  out = *outp;
-  out = (char *)realloc(out, out_size + 1);
-  if(out == 0) {
-      /* *outp still valid, no freeing */
-      if(debug_level >= UNAC_DEBUG_LOW)
-      DEBUG("realloc %d bytes failed\n", out_size+1);
-      goto out;
-  }
-
-  out_remain = out_size;
-  out_base = out;
-
-  if (u8tou16) {
-      if (u8tou16_cd == (iconv_t)-1) {
-      if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
-          goto out;
-      }
-      } else {
-      iconv(u8tou16_cd, 0, 0, 0, 0);
-      }
-      cd = u8tou16_cd;
-  } else if (u16tou8) {
-      if (u16tou8_cd == (iconv_t)-1) {
-      if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
-          goto out;
-      }
-      } else {
-      iconv(u16tou8_cd, 0, 0, 0, 0);
-      }
-      cd = u16tou8_cd;
-  } else {
-      if((cd = iconv_open(to, from)) == (iconv_t)-1) {
-      goto out;
-      }
-  }
-
-  do {
-    if(iconv(cd, (ICONV_CONST char **) &in, &in_length, &out, &out_remain) == (size_t)-1) {
-      switch(errno) {
-      case EILSEQ:
-    /*
-     * If an illegal sequence is found in the context of unac_string
-     * it means the unaccented version of a character contains
-     * a sequence that cannot be mapped back to the original charset.
-     * For instance, the 1/4 character in ISO-8859-1 is decomposed
-     * in three characters including the FRACTION SLASH (2044) which
-     * have no equivalent in the ISO-8859-1 map. One can argue that
-     * the conversions tables should map it to the regular / character
-     * or that a <compat> entry should be associated with it. 
-     *
-     * To cope with this situation, convert silently transform all
-     * illegal sequences (EILSEQ) into a SPACE character 0x0020.
-     *
-     * In the general conversion case this behaviour is not desirable.
-     * However, it is not the responsibility of this program to cope
-     * with inconsistencies of the Unicode description and a bug report
-     * should be submited to Unicode so that they can fix the problem.
-     * 
-     */
-    if(from_utf16) {
-      const char* tmp = space;
-      size_t tmp_length = 2;
-      if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
-              if(errno == E2BIG) {
-          /* fall thru to the E2BIG case below */;
-              } else {
-                  goto out;
-              }
-      } else {
-        /* The offending character was replaced by a SPACE, skip it. */
-        in += 2;
-        in_length -= 2;
-        /* And continue conversion. */
-        break;
-      }
+    if (!strcmp(utf16be, from)) {
+        from_utf8 = 0;
+        from_utf16 = 1;
+    } else if (!strcasecmp("UTF-8", from)) {
+        from_utf8 = 1;
+        from_utf16 = 0;
    } else {
-      goto out;
+        from_utf8 = from_utf16 = 0;
    }
-      case E2BIG:
-    {
-      /*
-       * The output does not fit in the current out buffer, enlarge it.
-       */
-      size_t length = out - out_base;
-      out_size *= 2;
-      {
-          char *saved = out_base;
-          /* +1 for null */
-          out_base = (char *)realloc(out_base, out_size + 1);
-          if (out_base == 0) {
-          /* *outp potentially not valid any more. Free here,
-           * and zero out */
-          if(debug_level >= UNAC_DEBUG_LOW)
-              DEBUG("realloc %d bytes failed\n", out_size+1);
-          free(saved);
-          *outp = 0;
-          goto out;
-          }
-      }
-      out = out_base + length;
-      out_remain = out_size - length;
+    if (!strcmp(utf16be, to)) {
+        to_utf8 = 0;
+        to_utf16 = 1;
+    } else if (!strcasecmp("UTF-8", to)) {
+        to_utf8 = 1;
+        to_utf16 = 0;
+    } else {
+        to_utf8 = to_utf16 = 0;
    }
-    break;
-      default:
-    goto out;
-    break;
-      }
+    u16tou8 = from_utf16 && to_utf8;
+    u8tou16 = from_utf8 && to_utf16;
+
+    out_size = in_length > 0 ? in_length : 1024;
+
+    out = *outp;
+    out = (char *)realloc(out, out_size + 1);
+    if(out == 0) {
+        /* *outp still valid, no freeing */
+        if(debug_level >= UNAC_DEBUG_LOW)
+            DEBUG("realloc %d bytes failed\n", out_size+1);
+        goto out;
    }
-  } while(in_length > 0);

-  if (!u8tou16 && !u16tou8)
-      iconv_close(cd);
+    out_remain = out_size;
+    out_base = out;

-  *outp = out_base;
-  *out_lengthp = out - out_base;
-  (*outp)[*out_lengthp] = '\0';
+    if (u8tou16) {
+        if (u8tou16_cd == (iconv_t)-1) {
+            if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
+                goto out;
+            }
+        } else {
+            iconv(u8tou16_cd, 0, 0, 0, 0);
+        }
+        cd = u8tou16_cd;
+    } else if (u16tou8) {
+        if (u16tou8_cd == (iconv_t)-1) {
+            if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
+                goto out;
+            }
+        } else {
+            iconv(u16tou8_cd, 0, 0, 0, 0);
+        }
+        cd = u16tou8_cd;
+    } else {
+        if((cd = iconv_open(to, from)) == (iconv_t)-1) {
+            goto out;
+        }
+    }

-  ret = 0;
+    do {
+        if(iconv(cd, (ICONV_CONST char **) &in, &in_length, &out, &out_remain) == (size_t)-1) {
+            switch(errno) {
+            case EILSEQ:
+                /*
+                 * If an illegal sequence is found in the context of unac_string
+                 * it means the unaccented version of a character contains
+                 * a sequence that cannot be mapped back to the original charset.
+                 * For instance, the 1/4 character in ISO-8859-1 is decomposed
+                 * in three characters including the FRACTION SLASH (2044) which
+                 * have no equivalent in the ISO-8859-1 map. One can argue that
+                 * the conversions tables should map it to the regular / character
+                 * or that a <compat> entry should be associated with it. 
+                 *
+                 * To cope with this situation, convert silently transform all
+                 * illegal sequences (EILSEQ) into a SPACE character 0x0020.
+                 *
+                 * In the general conversion case this behaviour is not desirable.
+                 * However, it is not the responsibility of this program to cope
+                 * with inconsistencies of the Unicode description and a bug report
+                 * should be submited to Unicode so that they can fix the problem.
+                 * 
+                 */
+                if (from_utf16) {
+                    const char* tmp = space;
+                    size_t tmp_length = 2;
+                    if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
+                        (size_t)-1) {
+                        if(errno == E2BIG) {
+                            /* fall thru to the E2BIG case below */;
+                        } else {
+                            goto out;
+                        }
+                    } else {
+                        /* The offending character was replaced by a SPACE, skip it. */
+                        in += 2;
+                        in_length -= 2;
+                        /* And continue conversion. */
+                        break;
+                    }
+                } else {
+                    goto out;
+                }
+            case E2BIG:
+            {
+                /*
+                 * The output does not fit in the current out buffer, enlarge it.
+                 */
+                size_t length = out - out_base;
+                out_size *= 2;
+                {
+                    char *saved = out_base;
+                    /* +1 for null */
+                    out_base = (char *)realloc(out_base, out_size + 1);
+                    if (out_base == 0) {
+                        /* *outp potentially not valid any more. Free here,
+                         * and zero out */
+                        if(debug_level >= UNAC_DEBUG_LOW)
+                            DEBUG("realloc %d bytes failed\n", out_size+1);
+                        free(saved);
+                        *outp = 0;
+                        goto out;
+                    }
+                }
+                out = out_base + length;
+                out_remain = out_size - length;
+            }
+            break;
+            default:
+                goto out;
+                break;
+            }
+        }
+    } while(in_length > 0);
+
+    if (!u8tou16 && !u16tou8)
+        iconv_close(cd);
+
+    *outp = out_base;
+    *out_lengthp = out - out_base;
+    (*outp)[*out_lengthp] = '\0';
+
+    ret = 0;
 out:
-  return ret;
+    return ret;
 }

-int unacmaybefold_string(const char* charset,
-             const char* in, size_t in_length,
-             char** outp, size_t* out_lengthp, int what)
+int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
+                         char** outp, size_t* out_lengthp, int what)
 {
    /*
     * When converting an empty string, skip everything but alloc the
     * buffer if NULL pointer.
     */
    if (in_length <= 0) {
-    if(!*outp) {
-        if ((*outp = (char*)malloc(32)) == 0)
-        return -1;
-    }
-    (*outp)[0] = '\0';
-    *out_lengthp = 0;
+        if(!*outp) {
+            if ((*outp = (char*)malloc(32)) == 0)
+                return -1;
+        }
+        (*outp)[0] = '\0';
+        *out_lengthp = 0;
    } else {
-    char* utf16 = 0;
-    size_t utf16_length = 0;
-    char* utf16_unaccented = 0;
-    size_t utf16_unaccented_length = 0;
+        char* utf16 = 0;
+        size_t utf16_length = 0;
+        char* utf16_unaccented = 0;
+        size_t utf16_unaccented_length = 0;
  
-    if(convert(charset, utf16be, in, in_length, &utf16, &utf16_length) < 0) {
-        return -1;
-    }
+        if(convert(charset, utf16be, in, in_length, &utf16, &utf16_length) < 0) {
+            return -1;
+        }

-    unacmaybefold_string_utf16(utf16, utf16_length, &utf16_unaccented, 
-                   &utf16_unaccented_length, what);
-    free(utf16);
+        unacmaybefold_string_utf16(utf16, utf16_length, &utf16_unaccented, 
+                                   &utf16_unaccented_length, what);
+        free(utf16);

-    if(convert(utf16be, charset, utf16_unaccented, utf16_unaccented_length, 
-           outp, out_lengthp) < 0) {
-        return -1;
-    }
-    free(utf16_unaccented);
+        if(convert(utf16be, charset, utf16_unaccented, utf16_unaccented_length, 
+                   outp, out_lengthp) < 0) {
+            return -1;
+        }
+        free(utf16_unaccented);
    }

    return 0;
 }

-int unac_string(const char* charset,
-        const char* in, size_t in_length,
-        char** outp, size_t* out_lengthp)
+int unac_string(
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
-                outp, out_lengthp, UNAC_UNAC);
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string(const char* charset,
-            const char* in, size_t in_length,
-            char** outp, size_t* out_lengthp)
+int unacfold_string(
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
-                outp, out_lengthp, UNAC_UNACFOLD);
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string(const char* charset,
-            const char* in, size_t in_length,
-            char** outp, size_t* out_lengthp)
+int fold_string(
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
-                outp, out_lengthp, UNAC_FOLD);
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
 }

 const char* unac_version(void)
 {
-  return UNAC_VERSION;
+    return UNAC_VERSION;
 }

 #ifdef BUILDING_RECOLL
@ -14552,7 +14540,7 @@ void unac_set_except_translations(const char *spectrans)
 {
    except_trans.clear();
    if (!spectrans || !spectrans[0])
-    return;
+        return;

    // The translation tables out of Unicode are in machine byte order (we
    // just let the compiler read the values). 
@ -14563,41 +14551,39 @@ void unac_set_except_translations(const char *spectrans)
    static const char *machinecoding = 0;
    bool littleendian = true;
    if (machinecoding == 0) {
-    const char*  charshort = "\001\002";
-    short *ip = (short *)charshort;
-    if (*ip == 0x0102) {
-        littleendian = false;
-        machinecoding = "UTF-16BE";
-    } else {
-        littleendian = true;
-        machinecoding = "UTF-16LE";
-    }
+        const char*  charshort = "\001\002";
+        short *ip = (short *)charshort;
+        if (*ip == 0x0102) {
+            littleendian = false;
+            machinecoding = "UTF-16BE";
+        } else {
+            littleendian = true;
+            machinecoding = "UTF-16LE";
+        }
    }

    vector<string> vtrans;
    stringToStrings(spectrans, vtrans);

-    for (vector<string>::iterator it = vtrans.begin();
-     it != vtrans.end(); it++) {
+    for (const auto& trans : vtrans) {

-    /* Convert the whole thing to utf-16be/le according to endianness */
-    char *out = 0;
-    size_t outsize;
-    if (convert("UTF-8", machinecoding,
-            it->c_str(), it->size(),
-            &out, &outsize) != 0 || outsize < 2)
-        continue;
+        /* Convert the whole thing to utf-16be/le according to endianness */
+        char *out = 0;
+        size_t outsize;
+        if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
+            outsize < 2)
+            continue;

-    /* The source char must be utf-16be as this is what we convert the
-       input text to for internal processing */
-    unsigned short ch;
-    if (littleendian)
-        ch = (out[1] << 8) | (out[0] & 0xff);
-    else
-        ch = (out[0] << 8) | (out[1] & 0xff);
+        /* The source char must be utf-16be as this is what we convert the
+           input text to for internal processing */
+        unsigned short ch;
+        if (littleendian)
+            ch = (out[1] << 8) | (out[0] & 0xff);
+        else
+            ch = (out[0] << 8) | (out[1] & 0xff);

-    except_trans[ch] = string((const char *)(out + 2), outsize-2);
-    free(out);
+        except_trans[ch] = string((const char *)(out + 2), outsize-2);
+        free(out);
    }
 }
 #endif /* BUILDING_RECOLL */
--- a/unac/unac.c
+++ b/unac/unac.c
@ -13,7 +13,7 @@
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

 #ifdef BUILDING_RECOLL
@ -14109,7 +14109,7 @@ static int debug_level = UNAC_DEBUG_LOW;
 */
 static void debug_doprint_default(const char* message, void* data)
 {
-  fprintf(stderr, "%s", message);
+    fprintf(stderr, "%s", message);
 }

 /*
@ -14130,30 +14130,30 @@ static void* debug_appdata = (void*)0;
 static void debug_print(const char* message, ...)
 {
 #define UNAC_MAXIMUM_MESSAGE_SIZE 512
-  /*
-   * UNAC_MAXIMUM_MESSAGE_SIZE is supposedly enough but I
-   * do trust some vsnprintf implementations to be bugous.
-   */
-  char unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE+1] = { '\0' };
-  va_list args;
-  va_start(args, message);
-  if(vsnprintf(unac_message_buffer, UNAC_MAXIMUM_MESSAGE_SIZE, message, args) < 0) {
-    char tmp[UNAC_MAXIMUM_MESSAGE_SIZE];
-    sprintf(tmp, "[message larger than %d, truncated]", UNAC_MAXIMUM_MESSAGE_SIZE);
-    debug_doprint(tmp, debug_appdata);
-  }
-  va_end(args);
-  unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE] = '\0';
+    /*
+     * UNAC_MAXIMUM_MESSAGE_SIZE is supposedly enough but I
+     * do trust some vsnprintf implementations to be bugous.
+     */
+    char unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE+1] = { '\0' };
+    va_list args;
+    va_start(args, message);
+    if(vsnprintf(unac_message_buffer, UNAC_MAXIMUM_MESSAGE_SIZE, message, args) < 0) {
+        char tmp[UNAC_MAXIMUM_MESSAGE_SIZE];
+        sprintf(tmp, "[message larger than %d, truncated]", UNAC_MAXIMUM_MESSAGE_SIZE);
+        debug_doprint(tmp, debug_appdata);
+    }
+    va_end(args);
+    unac_message_buffer[UNAC_MAXIMUM_MESSAGE_SIZE] = '\0';

-  debug_doprint(unac_message_buffer, debug_appdata);
+    debug_doprint(unac_message_buffer, debug_appdata);
 }

 void unac_debug_callback(int level, unac_debug_print_t function, void* data)
 {
-  debug_level = level;
-  if(function)
-    debug_doprint = function;
-  debug_appdata = data;
+    debug_level = level;
+    if(function)
+        debug_doprint = function;
+    debug_appdata = data;
 }

 #else /* UNAC_DEBUG_AVAILABLE */
@ -14167,146 +14167,140 @@ void unac_debug_callback(int level, unac_debug_print_t function, void* data)
 #define UNAC_FOLD 2

 int unacmaybefold_string_utf16(const char* in, size_t in_length,
-                   char** outp, size_t* out_lengthp, int what)
+                               char** outp, size_t* out_lengthp, int what)
 {
-  char* out;
-  size_t out_size;
-  size_t out_length;
-  size_t i;
-
-  out_size = in_length > 0 ? in_length : 1024;
-
-  out = *outp;
-  out = (char*)realloc(out, out_size + 1);
-  if(out == 0) {
-      if(debug_level >= UNAC_DEBUG_LOW)
-      DEBUG("realloc %d bytes failed\n", out_size+1);
-      /* *outp is still valid. Let the caller free it */
-      return -1;
-  }
-
-  out_length = 0;
-
-  for(i = 0; i < in_length; i += 2) {
-    unsigned short c;
-    unsigned short* p;
-    size_t l;
-    size_t k;
-    c = (in[i] << 8) | (in[i + 1] & 0xff);
-    /*
-     * Lookup the tables for decomposition information
-     */
-#ifdef BUILDING_RECOLL
-    // Exception unac/fold values set by user. There should be 3 arrays for
-    // unac/fold/unac+fold. For now there is only one array, which used to
-    // be set for unac+fold, and is mostly or only used to prevent diacritics
-    // removal for some chars and languages where it should not be done.
-    // In conformance with current usage, but incorrectly, we do the following
-    // things for the special chars depending on the operation requested:
-    //   - unaccenting: do nothing (copy original char)
-    //   - unac+fold: use table
-    //   - fold: use the unicode data.
-    string trans;
-    if (what != UNAC_FOLD && except_trans.size() != 0 && 
-    is_except_char(c, trans)) {
-    if (what == UNAC_UNAC) {
-        // Unaccent only. Do nothing
-        p = 0;
-        l = 0;
-    } else {
-        // Has to be UNAC_UNACFOLD: use table
-        p = (unsigned short *)trans.c_str();
-        l = trans.size() / 2;
-    }
-    } else {
-#endif /* BUILDING_RECOLL */
-    unac_uf_char_utf16_(c, p, l, what)
-#ifdef BUILDING_RECOLL
-    }
-#endif /* BUILDING_RECOLL */
-
-    /*
-     * Explain what's done in great detail
-     */
-    if(debug_level == UNAC_DEBUG_HIGH) {
-      unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT];
-      unsigned char position = (c) & UNAC_BLOCK_MASK;
-      DEBUG("unac_data%d[%d] & unac_positions[%d][%d]: ", index, unac_positions[index][position], index, position+1);
-      DEBUG_APPEND("0x%04x => ", (c));
-      if(l == 0) {
-    DEBUG_APPEND("untouched\n");
-      } else {
+    char* out;
+    size_t out_size;
+    size_t out_length;
    size_t i;
-    for(i = 0; i < l; i++)
-      DEBUG_APPEND("0x%04x ", p[i]);
-    DEBUG_APPEND("\n");
-      }
+
+    out_size = in_length > 0 ? in_length : 1024;
+
+    out = *outp;
+    out = (char*)realloc(out, out_size + 1);
+    if(out == 0) {
+        if(debug_level >= UNAC_DEBUG_LOW)
+            DEBUG("realloc %d bytes failed\n", out_size+1);
+        /* *outp is still valid. Let the caller free it */
+        return -1;
    }

-    /*
-     * Make sure there is enough space to hold the decomposition
-     * Note: a previous realloc may have succeeded, which means that *outp 
-     * is not valid any more. We have to do the freeing and zero out *outp
-     */
-    if(out_length + ((l + 1) * 2) > out_size) {
-      char *saved;
-      out_size += ((l + 1) * 2) + 1024;
-      saved = out;
-      out = (char *)realloc(out, out_size);
-      if(out == 0) {
-    if(debug_level >= UNAC_DEBUG_LOW)
-      DEBUG("realloc %d bytes failed\n", out_size);
-        free(saved);
-    *outp = 0;
-    return -1;
-      }
-    }
-    if(l > 0) {
-    /* l == 1 && *p == 0 is the special case generated for
-       mark characters (which may be found if the input is
-       already in decomposed form. Output nothing */
-    if (l != 1 || *p != 0) {
+    out_length = 0;
+
+    for(i = 0; i < in_length; i += 2) {
+        unsigned short c;
+        unsigned short* p;
+        size_t l;
+        size_t k;
+        c = (in[i] << 8) | (in[i + 1] & 0xff);
        /*
-         * If there is a decomposition, insert it in the output 
-         * string.
+         * Lookup the tables for decomposition information
         */
-        for(k = 0; k < l; k++) {
-        out[out_length++] = (p[k] >> 8) & 0xff;
-        out[out_length++] = (p[k] & 0xff);
+#ifdef BUILDING_RECOLL
+        // Exception unac/fold values set by user. There should be 3 arrays for
+        // unac/fold/unac+fold. For now there is only one array, which used to
+        // be set for unac+fold, and is mostly or only used to prevent diacritics
+        // removal for some chars and languages where it should not be done.
+        // In conformance with current usage, but incorrectly, we do the following
+        // things for the special chars depending on the operation requested:
+        //   - unaccenting: do nothing (copy original char)
+        //   - unac+fold: use table
+        //   - fold: use the unicode data.
+        string trans;
+        if (what != UNAC_FOLD && except_trans.size() != 0 && 
+            is_except_char(c, trans)) {
+            if (what == UNAC_UNAC) {
+                // Unaccent only. Do nothing
+                p = 0;
+                l = 0;
+            } else {
+                // Has to be UNAC_UNACFOLD: use table
+                p = (unsigned short *)trans.c_str();
+                l = trans.size() / 2;
+            }
+        } else {
+#endif /* BUILDING_RECOLL */
+            unac_uf_char_utf16_(c, p, l, what)
+#ifdef BUILDING_RECOLL
+                }
+#endif /* BUILDING_RECOLL */
+
+        /*
+         * Explain what's done in great detail
+         */
+        if(debug_level == UNAC_DEBUG_HIGH) {
+            unsigned short index = unac_indexes[(c) >> UNAC_BLOCK_SHIFT];
+            unsigned char position = (c) & UNAC_BLOCK_MASK;
+            DEBUG("unac_data%d[%d] & unac_positions[%d][%d]: ", index, unac_positions[index][position], index, position+1);
+            DEBUG_APPEND("0x%04x => ", (c));
+            if(l == 0) {
+                DEBUG_APPEND("untouched\n");
+            } else {
+                size_t i;
+                for(i = 0; i < l; i++)
+                    DEBUG_APPEND("0x%04x ", p[i]);
+                DEBUG_APPEND("\n");
+            }
+        }
+
+        /*
+         * Make sure there is enough space to hold the decomposition
+         * Note: a previous realloc may have succeeded, which means that *outp 
+         * is not valid any more. We have to do the freeing and zero out *outp
+         */
+        if(out_length + ((l + 1) * 2) > out_size) {
+            char *saved;
+            out_size += ((l + 1) * 2) + 1024;
+            saved = out;
+            out = (char *)realloc(out, out_size);
+            if(out == 0) {
+                if(debug_level >= UNAC_DEBUG_LOW)
+                    DEBUG("realloc %d bytes failed\n", out_size);
+                free(saved);
+                *outp = 0;
+                return -1;
+            }
+        }
+        if(l > 0) {
+            /* l == 1 && *p == 0 is the special case generated for
+               mark characters (which may be found if the input is
+               already in decomposed form. Output nothing */
+            if (l != 1 || *p != 0) {
+                /*
+                 * If there is a decomposition, insert it in the output 
+                 * string.
+                 */
+                for(k = 0; k < l; k++) {
+                    out[out_length++] = (p[k] >> 8) & 0xff;
+                    out[out_length++] = (p[k] & 0xff);
+                }
+            }
+        } else {
+            /*
+             * If there is no decomposition leave it unchanged
+             */
+            out[out_length++] = in[i];
+            out[out_length++] = in[i + 1];
        }
    }
-    } else {
-      /*
-       * If there is no decomposition leave it unchanged
-       */
-      out[out_length++] = in[i];
-      out[out_length++] = in[i + 1];
-    }
-  }

-  *outp = out;
-  *out_lengthp = out_length;
-  (*outp)[*out_lengthp] = '\0';
+    *outp = out;
+    *out_lengthp = out_length;
+    (*outp)[*out_lengthp] = '\0';

-  return 0;
+    return 0;
 }
-int unac_string_utf16(const char* in, size_t in_length,
-              char** outp, size_t* out_lengthp)
+int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
-                      outp, out_lengthp, UNAC_UNAC);
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string_utf16(const char* in, size_t in_length,
-              char** outp, size_t* out_lengthp)
+int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
-                      outp, out_lengthp, UNAC_UNACFOLD);
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string_utf16(const char* in, size_t in_length,
-              char** outp, size_t* out_lengthp)
+int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string_utf16(in, in_length,
-                      outp, out_lengthp, UNAC_FOLD);
+    return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
 }

 static const char *utf16be = "UTF-16BE";
@ -14322,229 +14316,223 @@ static std::mutex o_unac_mutex;
 * The out string is always null terminated.
 */
 static int convert(const char* from, const char* to,
-           const char* in, size_t in_length,
-           char** outp, size_t* out_lengthp)
+                   const char* in, size_t in_length,
+                   char** outp, size_t* out_lengthp)
 {
-  int ret = -1;
-  iconv_t cd;
-  char* out;
-  size_t out_remain;
-  size_t out_size;
-  char* out_base;
-  int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
-  const char space[] = { 0x00, 0x20 };
+    int ret = -1;
+    iconv_t cd;
+    char* out;
+    size_t out_remain;
+    size_t out_size;
+    char* out_base;
+    int from_utf16, from_utf8, to_utf16, to_utf8, u8tou16, u16tou8;
+    const char space[] = { 0x00, 0x20 };

-  std::unique_lock<std::mutex> lock(o_unac_mutex);
+    std::unique_lock<std::mutex> lock(o_unac_mutex);

-  if (!strcmp(utf16be, from)) {
-      from_utf8 = 0;
-      from_utf16 = 1;
-  } else if (!strcasecmp("UTF-8", from)) {
-      from_utf8 = 1;
-      from_utf16 = 0;
-  } else {
-      from_utf8 = from_utf16 = 0;
-  }
-  if (!strcmp(utf16be, to)) {
-      to_utf8 = 0;
-      to_utf16 = 1;
-  } else if (!strcasecmp("UTF-8", to)) {
-      to_utf8 = 1;
-      to_utf16 = 0;
-  } else {
-      to_utf8 = to_utf16 = 0;
-  }
-  u16tou8 = from_utf16 && to_utf8;
-  u8tou16 = from_utf8 && to_utf16;
-
-  out_size = in_length > 0 ? in_length : 1024;
-
-  out = *outp;
-  out = (char *)realloc(out, out_size + 1);
-  if(out == 0) {
-      /* *outp still valid, no freeing */
-      if(debug_level >= UNAC_DEBUG_LOW)
-      DEBUG("realloc %d bytes failed\n", out_size+1);
-      goto out;
-  }
-
-  out_remain = out_size;
-  out_base = out;
-
-  if (u8tou16) {
-      if (u8tou16_cd == (iconv_t)-1) {
-      if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
-          goto out;
-      }
-      } else {
-      iconv(u8tou16_cd, 0, 0, 0, 0);
-      }
-      cd = u8tou16_cd;
-  } else if (u16tou8) {
-      if (u16tou8_cd == (iconv_t)-1) {
-      if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
-          goto out;
-      }
-      } else {
-      iconv(u16tou8_cd, 0, 0, 0, 0);
-      }
-      cd = u16tou8_cd;
-  } else {
-      if((cd = iconv_open(to, from)) == (iconv_t)-1) {
-      goto out;
-      }
-  }
-
-  do {
-    if(iconv(cd, (ICONV_CONST char **) &in, &in_length, &out, &out_remain) == (size_t)-1) {
-      switch(errno) {
-      case EILSEQ:
-    /*
-     * If an illegal sequence is found in the context of unac_string
-     * it means the unaccented version of a character contains
-     * a sequence that cannot be mapped back to the original charset.
-     * For instance, the 1/4 character in ISO-8859-1 is decomposed
-     * in three characters including the FRACTION SLASH (2044) which
-     * have no equivalent in the ISO-8859-1 map. One can argue that
-     * the conversions tables should map it to the regular / character
-     * or that a <compat> entry should be associated with it. 
-     *
-     * To cope with this situation, convert silently transform all
-     * illegal sequences (EILSEQ) into a SPACE character 0x0020.
-     *
-     * In the general conversion case this behaviour is not desirable.
-     * However, it is not the responsibility of this program to cope
-     * with inconsistencies of the Unicode description and a bug report
-     * should be submited to Unicode so that they can fix the problem.
-     * 
-     */
-    if(from_utf16) {
-      const char* tmp = space;
-      size_t tmp_length = 2;
-      if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
-              if(errno == E2BIG) {
-          /* fall thru to the E2BIG case below */;
-              } else {
-                  goto out;
-              }
-      } else {
-        /* The offending character was replaced by a SPACE, skip it. */
-        in += 2;
-        in_length -= 2;
-        /* And continue conversion. */
-        break;
-      }
+    if (!strcmp(utf16be, from)) {
+        from_utf8 = 0;
+        from_utf16 = 1;
+    } else if (!strcasecmp("UTF-8", from)) {
+        from_utf8 = 1;
+        from_utf16 = 0;
    } else {
-      goto out;
+        from_utf8 = from_utf16 = 0;
    }
-      case E2BIG:
-    {
-      /*
-       * The output does not fit in the current out buffer, enlarge it.
-       */
-      size_t length = out - out_base;
-      out_size *= 2;
-      {
-          char *saved = out_base;
-          /* +1 for null */
-          out_base = (char *)realloc(out_base, out_size + 1);
-          if (out_base == 0) {
-          /* *outp potentially not valid any more. Free here,
-           * and zero out */
-          if(debug_level >= UNAC_DEBUG_LOW)
-              DEBUG("realloc %d bytes failed\n", out_size+1);
-          free(saved);
-          *outp = 0;
-          goto out;
-          }
-      }
-      out = out_base + length;
-      out_remain = out_size - length;
+    if (!strcmp(utf16be, to)) {
+        to_utf8 = 0;
+        to_utf16 = 1;
+    } else if (!strcasecmp("UTF-8", to)) {
+        to_utf8 = 1;
+        to_utf16 = 0;
+    } else {
+        to_utf8 = to_utf16 = 0;
    }
-    break;
-      default:
-    goto out;
-    break;
-      }
+    u16tou8 = from_utf16 && to_utf8;
+    u8tou16 = from_utf8 && to_utf16;
+
+    out_size = in_length > 0 ? in_length : 1024;
+
+    out = *outp;
+    out = (char *)realloc(out, out_size + 1);
+    if(out == 0) {
+        /* *outp still valid, no freeing */
+        if(debug_level >= UNAC_DEBUG_LOW)
+            DEBUG("realloc %d bytes failed\n", out_size+1);
+        goto out;
    }
-  } while(in_length > 0);

-  if (!u8tou16 && !u16tou8)
-      iconv_close(cd);
+    out_remain = out_size;
+    out_base = out;

-  *outp = out_base;
-  *out_lengthp = out - out_base;
-  (*outp)[*out_lengthp] = '\0';
+    if (u8tou16) {
+        if (u8tou16_cd == (iconv_t)-1) {
+            if((u8tou16_cd = iconv_open(to, from)) == (iconv_t)-1) {
+                goto out;
+            }
+        } else {
+            iconv(u8tou16_cd, 0, 0, 0, 0);
+        }
+        cd = u8tou16_cd;
+    } else if (u16tou8) {
+        if (u16tou8_cd == (iconv_t)-1) {
+            if((u16tou8_cd = iconv_open(to, from)) == (iconv_t)-1) {
+                goto out;
+            }
+        } else {
+            iconv(u16tou8_cd, 0, 0, 0, 0);
+        }
+        cd = u16tou8_cd;
+    } else {
+        if((cd = iconv_open(to, from)) == (iconv_t)-1) {
+            goto out;
+        }
+    }

-  ret = 0;
+    do {
+        if(iconv(cd, (ICONV_CONST char **) &in, &in_length, &out, &out_remain) == (size_t)-1) {
+            switch(errno) {
+            case EILSEQ:
+                /*
+                 * If an illegal sequence is found in the context of unac_string
+                 * it means the unaccented version of a character contains
+                 * a sequence that cannot be mapped back to the original charset.
+                 * For instance, the 1/4 character in ISO-8859-1 is decomposed
+                 * in three characters including the FRACTION SLASH (2044) which
+                 * have no equivalent in the ISO-8859-1 map. One can argue that
+                 * the conversions tables should map it to the regular / character
+                 * or that a <compat> entry should be associated with it. 
+                 *
+                 * To cope with this situation, convert silently transform all
+                 * illegal sequences (EILSEQ) into a SPACE character 0x0020.
+                 *
+                 * In the general conversion case this behaviour is not desirable.
+                 * However, it is not the responsibility of this program to cope
+                 * with inconsistencies of the Unicode description and a bug report
+                 * should be submited to Unicode so that they can fix the problem.
+                 * 
+                 */
+                if (from_utf16) {
+                    const char* tmp = space;
+                    size_t tmp_length = 2;
+                    if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
+                        (size_t)-1) {
+                        if(errno == E2BIG) {
+                            /* fall thru to the E2BIG case below */;
+                        } else {
+                            goto out;
+                        }
+                    } else {
+                        /* The offending character was replaced by a SPACE, skip it. */
+                        in += 2;
+                        in_length -= 2;
+                        /* And continue conversion. */
+                        break;
+                    }
+                } else {
+                    goto out;
+                }
+            case E2BIG:
+            {
+                /*
+                 * The output does not fit in the current out buffer, enlarge it.
+                 */
+                size_t length = out - out_base;
+                out_size *= 2;
+                {
+                    char *saved = out_base;
+                    /* +1 for null */
+                    out_base = (char *)realloc(out_base, out_size + 1);
+                    if (out_base == 0) {
+                        /* *outp potentially not valid any more. Free here,
+                         * and zero out */
+                        if(debug_level >= UNAC_DEBUG_LOW)
+                            DEBUG("realloc %d bytes failed\n", out_size+1);
+                        free(saved);
+                        *outp = 0;
+                        goto out;
+                    }
+                }
+                out = out_base + length;
+                out_remain = out_size - length;
+            }
+            break;
+            default:
+                goto out;
+                break;
+            }
+        }
+    } while(in_length > 0);
+
+    if (!u8tou16 && !u16tou8)
+        iconv_close(cd);
+
+    *outp = out_base;
+    *out_lengthp = out - out_base;
+    (*outp)[*out_lengthp] = '\0';
+
+    ret = 0;
 out:
-  return ret;
+    return ret;
 }

-int unacmaybefold_string(const char* charset,
-             const char* in, size_t in_length,
-             char** outp, size_t* out_lengthp, int what)
+int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
+                         char** outp, size_t* out_lengthp, int what)
 {
    /*
     * When converting an empty string, skip everything but alloc the
     * buffer if NULL pointer.
     */
    if (in_length <= 0) {
-    if(!*outp) {
-        if ((*outp = (char*)malloc(32)) == 0)
-        return -1;
-    }
-    (*outp)[0] = '\0';
-    *out_lengthp = 0;
+        if(!*outp) {
+            if ((*outp = (char*)malloc(32)) == 0)
+                return -1;
+        }
+        (*outp)[0] = '\0';
+        *out_lengthp = 0;
    } else {
-    char* utf16 = 0;
-    size_t utf16_length = 0;
-    char* utf16_unaccented = 0;
-    size_t utf16_unaccented_length = 0;
+        char* utf16 = 0;
+        size_t utf16_length = 0;
+        char* utf16_unaccented = 0;
+        size_t utf16_unaccented_length = 0;
  
-    if(convert(charset, utf16be, in, in_length, &utf16, &utf16_length) < 0) {
-        return -1;
-    }
+        if(convert(charset, utf16be, in, in_length, &utf16, &utf16_length) < 0) {
+            return -1;
+        }

-    unacmaybefold_string_utf16(utf16, utf16_length, &utf16_unaccented, 
-                   &utf16_unaccented_length, what);
-    free(utf16);
+        unacmaybefold_string_utf16(utf16, utf16_length, &utf16_unaccented, 
+                                   &utf16_unaccented_length, what);
+        free(utf16);

-    if(convert(utf16be, charset, utf16_unaccented, utf16_unaccented_length, 
-           outp, out_lengthp) < 0) {
-        return -1;
-    }
-    free(utf16_unaccented);
+        if(convert(utf16be, charset, utf16_unaccented, utf16_unaccented_length, 
+                   outp, out_lengthp) < 0) {
+            return -1;
+        }
+        free(utf16_unaccented);
    }

    return 0;
 }

-int unac_string(const char* charset,
-        const char* in, size_t in_length,
-        char** outp, size_t* out_lengthp)
+int unac_string(
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
-                outp, out_lengthp, UNAC_UNAC);
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
 }
-int unacfold_string(const char* charset,
-            const char* in, size_t in_length,
-            char** outp, size_t* out_lengthp)
+int unacfold_string(
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
-                outp, out_lengthp, UNAC_UNACFOLD);
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
 }
-int fold_string(const char* charset,
-            const char* in, size_t in_length,
-            char** outp, size_t* out_lengthp)
+int fold_string(
+    const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
 {
-    return unacmaybefold_string(charset, in, in_length,
-                outp, out_lengthp, UNAC_FOLD);
+    return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
 }

 const char* unac_version(void)
 {
-  return UNAC_VERSION;
+    return UNAC_VERSION;
 }

 #ifdef BUILDING_RECOLL
@ -14552,7 +14540,7 @@ void unac_set_except_translations(const char *spectrans)
 {
    except_trans.clear();
    if (!spectrans || !spectrans[0])
-    return;
+        return;

    // The translation tables out of Unicode are in machine byte order (we
    // just let the compiler read the values). 
@ -14563,41 +14551,39 @@ void unac_set_except_translations(const char *spectrans)
    static const char *machinecoding = 0;
    bool littleendian = true;
    if (machinecoding == 0) {
-    const char*  charshort = "\001\002";
-    short *ip = (short *)charshort;
-    if (*ip == 0x0102) {
-        littleendian = false;
-        machinecoding = "UTF-16BE";
-    } else {
-        littleendian = true;
-        machinecoding = "UTF-16LE";
-    }
+        const char*  charshort = "\001\002";
+        short *ip = (short *)charshort;
+        if (*ip == 0x0102) {
+            littleendian = false;
+            machinecoding = "UTF-16BE";
+        } else {
+            littleendian = true;
+            machinecoding = "UTF-16LE";
+        }
    }

    vector<string> vtrans;
    stringToStrings(spectrans, vtrans);

-    for (vector<string>::iterator it = vtrans.begin();
-     it != vtrans.end(); it++) {
+    for (const auto& trans : vtrans) {

-    /* Convert the whole thing to utf-16be/le according to endianness */
-    char *out = 0;
-    size_t outsize;
-    if (convert("UTF-8", machinecoding,
-            it->c_str(), it->size(),
-            &out, &outsize) != 0 || outsize < 2)
-        continue;
+        /* Convert the whole thing to utf-16be/le according to endianness */
+        char *out = 0;
+        size_t outsize;
+        if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
+            outsize < 2)
+            continue;

-    /* The source char must be utf-16be as this is what we convert the
-       input text to for internal processing */
-    unsigned short ch;
-    if (littleendian)
-        ch = (out[1] << 8) | (out[0] & 0xff);
-    else
-        ch = (out[0] << 8) | (out[1] & 0xff);
+        /* The source char must be utf-16be as this is what we convert the
+           input text to for internal processing */
+        unsigned short ch;
+        if (littleendian)
+            ch = (out[1] << 8) | (out[0] & 0xff);
+        else
+            ch = (out[0] << 8) | (out[1] & 0xff);

-    except_trans[ch] = string((const char *)(out + 2), outsize-2);
-    free(out);
+        except_trans[ch] = string((const char *)(out + 2), outsize-2);
+        free(out);
    }
 }
 #endif /* BUILDING_RECOLL */