indents and readability

This commit is contained in:
Jean-Francois Dockes 2021-11-02 12:05:04 +01:00
parent 4cc0bc90b6
commit a24fc7bacc
5 changed files with 715 additions and 749 deletions

View File

@ -163,6 +163,9 @@ private:
// Current span. Might be jf.dockes@wanadoo.f // Current span. Might be jf.dockes@wanadoo.f
std::string m_span; std::string m_span;
// Words in span: byte positions of start and end of words in m_span. For example:
// 0 4 9
// bill@some.com -> (0,4) (5,9) (10,13)
std::vector <std::pair<int, int> > m_words_in_span; std::vector <std::pair<int, int> > m_words_in_span;
// Current word: no punctuation at all in there. Byte offset // Current word: no punctuation at all in there. Byte offset

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2004-2019 J.F.Dockes /* Copyright (C) 2004-2021 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -37,16 +37,13 @@ bool unacmaybefold(const string &in, string &out,
switch (what) { switch (what) {
case UNACOP_UNAC: case UNACOP_UNAC:
status = unac_string(encoding, in.c_str(), in.length(), status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
&cout, &out_len);
break; break;
case UNACOP_UNACFOLD: case UNACOP_UNACFOLD:
status = unacfold_string(encoding, in.c_str(), in.length(), status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
&cout, &out_len);
break; break;
case UNACOP_FOLD: case UNACOP_FOLD:
status = fold_string(encoding, in.c_str(), in.length(), status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
&cout, &out_len);
break; break;
} }

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2005 J.F.Dockes /* Copyright (C) 2005-2021 J.F.Dockes
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
@ -78,8 +78,7 @@ public:
string dumb = term; string dumb = term;
if (o_index_stripchars) { if (o_index_stripchars) {
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) { if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
LOGINFO("PlainToRich::takeword: unac failed for [" << term << LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n");
"]\n");
return true; return true;
} }
} }
@ -173,30 +172,25 @@ static string activate_urls(const string& in)
} }
#endif #endif
// Fix result text for display inside the gui text window. // Enrich result text for display inside the gui text window.
// //
// We call overridden functions to output header data, beginnings and ends of // We call overridden functions to output header data, beginnings and ends of matches etc.
// matches etc.
// //
// If the input is text, we output the result in chunks, arranging not // If the input is text, we output the result in chunks, arranging not to cut in the middle of a
// to cut in the middle of a tag, which would confuse qtextedit. If // tag, which would confuse qtextedit. If the input is html, the body is always a single output
// the input is html, the body is always a single output chunk. // chunk.
bool PlainToRich::plaintorich(const string& in, bool PlainToRich::plaintorich(
list<string>& out, // Output chunk list const string& in, list<string>& out, const HighlightData& hdata, int chunksize)
const HighlightData& hdata,
int chunksize)
{ {
Chrono chron; Chrono chron;
bool ret = true; bool ret = true;
LOGDEB1("plaintorichich: in: [" << in << "]\n"); LOGDEB1("plaintorichich: in: [" << in << "]\n");
m_hdata = &hdata; m_hdata = &hdata;
// Compute the positions for the query terms. We use the text // Compute the positions for the query terms. We use the text splitter to break the text into
// splitter to break the text into words, and compare the words to // words, and compare the words to the search terms,
// the search terms,
TextSplitPTR splitter(hdata); TextSplitPTR splitter(hdata);
// Note: the splitter returns the term locations in byte, not // Note: the splitter returns the term locations in byte, not character, offsets.
// character, offsets.
splitter.text_to_words(in); splitter.text_to_words(in);
LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n"); LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
// Compute the positions for NEAR and PHRASE groups. // Compute the positions for NEAR and PHRASE groups.
@ -205,7 +199,7 @@ bool PlainToRich::plaintorich(const string& in,
out.clear(); out.clear();
out.push_back(""); out.push_back("");
list<string>::iterator olit = out.begin(); auto olit = out.begin();
// Rich text output // Rich text output
*olit = header(); *olit = header();
@ -225,9 +219,10 @@ bool PlainToRich::plaintorich(const string& in,
vector<GroupMatchEntry>::iterator tPosEnd = splitter.m_tboffs.end(); vector<GroupMatchEntry>::iterator tPosEnd = splitter.m_tboffs.end();
#if 0 #if 0
for (vector<pair<int, int> >::const_iterator it = splitter.m_tboffs.begin(); for (const auto& region : splitter.m_tboffs) {
it != splitter.m_tboffs.end(); it++) { auto st = region.offs.first;
LOGDEB2("plaintorich: region: " << it->first << " "<<it->second<< "\n"); auto nd = region.offs.second;
LOGDEB0("plaintorich: region: " << st << " " << nd << "\n");
} }
#endif #endif
@ -276,8 +271,7 @@ bool PlainToRich::plaintorich(const string& in,
} }
// Skip all highlight areas that would overlap this one // Skip all highlight areas that would overlap this one
int crend = tPosIt->offs.second; int crend = tPosIt->offs.second;
while (tPosIt != splitter.m_tboffs.end() && while (tPosIt != splitter.m_tboffs.end() && tPosIt->offs.first < crend)
tPosIt->offs.first < crend)
tPosIt++; tPosIt++;
inrcltag = 0; inrcltag = 0;
} }

View File

@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
return 0; return 0;
} }
int unac_string_utf16(const char* in, size_t in_length, int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string_utf16(in, in_length, return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
outp, out_lengthp, UNAC_UNAC);
} }
int unacfold_string_utf16(const char* in, size_t in_length, int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string_utf16(in, in_length, return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
outp, out_lengthp, UNAC_UNACFOLD);
} }
int fold_string_utf16(const char* in, size_t in_length, int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string_utf16(in, in_length, return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
outp, out_lengthp, UNAC_FOLD);
} }
static const char *utf16be = "UTF-16BE"; static const char *utf16be = "UTF-16BE";
@ -14421,7 +14415,8 @@ static int convert(const char* from, const char* to,
if (from_utf16) { if (from_utf16) {
const char* tmp = space; const char* tmp = space;
size_t tmp_length = 2; size_t tmp_length = 2;
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) { if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
(size_t)-1) {
if(errno == E2BIG) { if(errno == E2BIG) {
/* fall thru to the E2BIG case below */; /* fall thru to the E2BIG case below */;
} else { } else {
@ -14481,8 +14476,7 @@ out:
return ret; return ret;
} }
int unacmaybefold_string(const char* charset, int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
const char* in, size_t in_length,
char** outp, size_t* out_lengthp, int what) char** outp, size_t* out_lengthp, int what)
{ {
/* /*
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
return 0; return 0;
} }
int unac_string(const char* charset, int unac_string(
const char* in, size_t in_length, const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string(charset, in, in_length, return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
outp, out_lengthp, UNAC_UNAC);
} }
int unacfold_string(const char* charset, int unacfold_string(
const char* in, size_t in_length, const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string(charset, in, in_length, return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
outp, out_lengthp, UNAC_UNACFOLD);
} }
int fold_string(const char* charset, int fold_string(
const char* in, size_t in_length, const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string(charset, in, in_length, return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
outp, out_lengthp, UNAC_FOLD);
} }
const char* unac_version(void) const char* unac_version(void)
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
vector<string> vtrans; vector<string> vtrans;
stringToStrings(spectrans, vtrans); stringToStrings(spectrans, vtrans);
for (vector<string>::iterator it = vtrans.begin(); for (const auto& trans : vtrans) {
it != vtrans.end(); it++) {
/* Convert the whole thing to utf-16be/le according to endianness */ /* Convert the whole thing to utf-16be/le according to endianness */
char *out = 0; char *out = 0;
size_t outsize; size_t outsize;
if (convert("UTF-8", machinecoding, if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
it->c_str(), it->size(), outsize < 2)
&out, &outsize) != 0 || outsize < 2)
continue; continue;
/* The source char must be utf-16be as this is what we convert the /* The source char must be utf-16be as this is what we convert the

View File

@ -13,7 +13,7 @@
* *
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/ */
#ifdef BUILDING_RECOLL #ifdef BUILDING_RECOLL
@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
return 0; return 0;
} }
int unac_string_utf16(const char* in, size_t in_length, int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string_utf16(in, in_length, return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
outp, out_lengthp, UNAC_UNAC);
} }
int unacfold_string_utf16(const char* in, size_t in_length, int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string_utf16(in, in_length, return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
outp, out_lengthp, UNAC_UNACFOLD);
} }
int fold_string_utf16(const char* in, size_t in_length, int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string_utf16(in, in_length, return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
outp, out_lengthp, UNAC_FOLD);
} }
static const char *utf16be = "UTF-16BE"; static const char *utf16be = "UTF-16BE";
@ -14421,7 +14415,8 @@ static int convert(const char* from, const char* to,
if (from_utf16) { if (from_utf16) {
const char* tmp = space; const char* tmp = space;
size_t tmp_length = 2; size_t tmp_length = 2;
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) { if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
(size_t)-1) {
if(errno == E2BIG) { if(errno == E2BIG) {
/* fall thru to the E2BIG case below */; /* fall thru to the E2BIG case below */;
} else { } else {
@ -14481,8 +14476,7 @@ out:
return ret; return ret;
} }
int unacmaybefold_string(const char* charset, int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
const char* in, size_t in_length,
char** outp, size_t* out_lengthp, int what) char** outp, size_t* out_lengthp, int what)
{ {
/* /*
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
return 0; return 0;
} }
int unac_string(const char* charset, int unac_string(
const char* in, size_t in_length, const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string(charset, in, in_length, return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
outp, out_lengthp, UNAC_UNAC);
} }
int unacfold_string(const char* charset, int unacfold_string(
const char* in, size_t in_length, const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string(charset, in, in_length, return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
outp, out_lengthp, UNAC_UNACFOLD);
} }
int fold_string(const char* charset, int fold_string(
const char* in, size_t in_length, const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
char** outp, size_t* out_lengthp)
{ {
return unacmaybefold_string(charset, in, in_length, return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
outp, out_lengthp, UNAC_FOLD);
} }
const char* unac_version(void) const char* unac_version(void)
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
vector<string> vtrans; vector<string> vtrans;
stringToStrings(spectrans, vtrans); stringToStrings(spectrans, vtrans);
for (vector<string>::iterator it = vtrans.begin(); for (const auto& trans : vtrans) {
it != vtrans.end(); it++) {
/* Convert the whole thing to utf-16be/le according to endianness */ /* Convert the whole thing to utf-16be/le according to endianness */
char *out = 0; char *out = 0;
size_t outsize; size_t outsize;
if (convert("UTF-8", machinecoding, if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
it->c_str(), it->size(), outsize < 2)
&out, &outsize) != 0 || outsize < 2)
continue; continue;
/* The source char must be utf-16be as this is what we convert the /* The source char must be utf-16be as this is what we convert the