indents and readability
This commit is contained in:
parent
4cc0bc90b6
commit
a24fc7bacc
@ -163,6 +163,9 @@ private:
|
||||
// Current span. Might be jf.dockes@wanadoo.f
|
||||
std::string m_span;
|
||||
|
||||
// Words in span: byte positions of start and end of words in m_span. For example:
|
||||
// 0 4 9
|
||||
// bill@some.com -> (0,4) (5,9) (10,13)
|
||||
std::vector <std::pair<int, int> > m_words_in_span;
|
||||
|
||||
// Current word: no punctuation at all in there. Byte offset
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2004-2019 J.F.Dockes
|
||||
/* Copyright (C) 2004-2021 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -37,16 +37,13 @@ bool unacmaybefold(const string &in, string &out,
|
||||
|
||||
switch (what) {
|
||||
case UNACOP_UNAC:
|
||||
status = unac_string(encoding, in.c_str(), in.length(),
|
||||
&cout, &out_len);
|
||||
status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||
break;
|
||||
case UNACOP_UNACFOLD:
|
||||
status = unacfold_string(encoding, in.c_str(), in.length(),
|
||||
&cout, &out_len);
|
||||
status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||
break;
|
||||
case UNACOP_FOLD:
|
||||
status = fold_string(encoding, in.c_str(), in.length(),
|
||||
&cout, &out_len);
|
||||
status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2005 J.F.Dockes
|
||||
/* Copyright (C) 2005-2021 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
@ -78,8 +78,7 @@ public:
|
||||
string dumb = term;
|
||||
if (o_index_stripchars) {
|
||||
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO("PlainToRich::takeword: unac failed for [" << term <<
|
||||
"]\n");
|
||||
LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -173,30 +172,25 @@ static string activate_urls(const string& in)
|
||||
}
|
||||
#endif
|
||||
|
||||
// Fix result text for display inside the gui text window.
|
||||
// Enrich result text for display inside the gui text window.
|
||||
//
|
||||
// We call overridden functions to output header data, beginnings and ends of
|
||||
// matches etc.
|
||||
// We call overridden functions to output header data, beginnings and ends of matches etc.
|
||||
//
|
||||
// If the input is text, we output the result in chunks, arranging not
|
||||
// to cut in the middle of a tag, which would confuse qtextedit. If
|
||||
// the input is html, the body is always a single output chunk.
|
||||
bool PlainToRich::plaintorich(const string& in,
|
||||
list<string>& out, // Output chunk list
|
||||
const HighlightData& hdata,
|
||||
int chunksize)
|
||||
// If the input is text, we output the result in chunks, arranging not to cut in the middle of a
|
||||
// tag, which would confuse qtextedit. If the input is html, the body is always a single output
|
||||
// chunk.
|
||||
bool PlainToRich::plaintorich(
|
||||
const string& in, list<string>& out, const HighlightData& hdata, int chunksize)
|
||||
{
|
||||
Chrono chron;
|
||||
bool ret = true;
|
||||
LOGDEB1("plaintorichich: in: [" << in << "]\n");
|
||||
|
||||
m_hdata = &hdata;
|
||||
// Compute the positions for the query terms. We use the text
|
||||
// splitter to break the text into words, and compare the words to
|
||||
// the search terms,
|
||||
// Compute the positions for the query terms. We use the text splitter to break the text into
|
||||
// words, and compare the words to the search terms,
|
||||
TextSplitPTR splitter(hdata);
|
||||
// Note: the splitter returns the term locations in byte, not
|
||||
// character, offsets.
|
||||
// Note: the splitter returns the term locations in byte, not character, offsets.
|
||||
splitter.text_to_words(in);
|
||||
LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
|
||||
// Compute the positions for NEAR and PHRASE groups.
|
||||
@ -205,7 +199,7 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
|
||||
out.clear();
|
||||
out.push_back("");
|
||||
list<string>::iterator olit = out.begin();
|
||||
auto olit = out.begin();
|
||||
|
||||
// Rich text output
|
||||
*olit = header();
|
||||
@ -225,9 +219,10 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
vector<GroupMatchEntry>::iterator tPosEnd = splitter.m_tboffs.end();
|
||||
|
||||
#if 0
|
||||
for (vector<pair<int, int> >::const_iterator it = splitter.m_tboffs.begin();
|
||||
it != splitter.m_tboffs.end(); it++) {
|
||||
LOGDEB2("plaintorich: region: " << it->first << " "<<it->second<< "\n");
|
||||
for (const auto& region : splitter.m_tboffs) {
|
||||
auto st = region.offs.first;
|
||||
auto nd = region.offs.second;
|
||||
LOGDEB0("plaintorich: region: " << st << " " << nd << "\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -276,8 +271,7 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
}
|
||||
// Skip all highlight areas that would overlap this one
|
||||
int crend = tPosIt->offs.second;
|
||||
while (tPosIt != splitter.m_tboffs.end() &&
|
||||
tPosIt->offs.first < crend)
|
||||
while (tPosIt != splitter.m_tboffs.end() && tPosIt->offs.first < crend)
|
||||
tPosIt++;
|
||||
inrcltag = 0;
|
||||
}
|
||||
|
||||
@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||
|
||||
return 0;
|
||||
}
|
||||
int unac_string_utf16(const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string_utf16(in, in_length,
|
||||
outp, out_lengthp, UNAC_UNAC);
|
||||
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||
}
|
||||
int unacfold_string_utf16(const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string_utf16(in, in_length,
|
||||
outp, out_lengthp, UNAC_UNACFOLD);
|
||||
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||
}
|
||||
int fold_string_utf16(const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string_utf16(in, in_length,
|
||||
outp, out_lengthp, UNAC_FOLD);
|
||||
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||
}
|
||||
|
||||
static const char *utf16be = "UTF-16BE";
|
||||
@ -14418,10 +14412,11 @@ static int convert(const char* from, const char* to,
|
||||
* should be submited to Unicode so that they can fix the problem.
|
||||
*
|
||||
*/
|
||||
if(from_utf16) {
|
||||
if (from_utf16) {
|
||||
const char* tmp = space;
|
||||
size_t tmp_length = 2;
|
||||
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
||||
if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
|
||||
(size_t)-1) {
|
||||
if(errno == E2BIG) {
|
||||
/* fall thru to the E2BIG case below */;
|
||||
} else {
|
||||
@ -14481,8 +14476,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int unacmaybefold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp, int what)
|
||||
{
|
||||
/*
|
||||
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unac_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unac_string(
|
||||
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string(charset, in, in_length,
|
||||
outp, out_lengthp, UNAC_UNAC);
|
||||
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||
}
|
||||
int unacfold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unacfold_string(
|
||||
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string(charset, in, in_length,
|
||||
outp, out_lengthp, UNAC_UNACFOLD);
|
||||
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||
}
|
||||
int fold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int fold_string(
|
||||
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string(charset, in, in_length,
|
||||
outp, out_lengthp, UNAC_FOLD);
|
||||
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||
}
|
||||
|
||||
const char* unac_version(void)
|
||||
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
|
||||
vector<string> vtrans;
|
||||
stringToStrings(spectrans, vtrans);
|
||||
|
||||
for (vector<string>::iterator it = vtrans.begin();
|
||||
it != vtrans.end(); it++) {
|
||||
for (const auto& trans : vtrans) {
|
||||
|
||||
/* Convert the whole thing to utf-16be/le according to endianness */
|
||||
char *out = 0;
|
||||
size_t outsize;
|
||||
if (convert("UTF-8", machinecoding,
|
||||
it->c_str(), it->size(),
|
||||
&out, &outsize) != 0 || outsize < 2)
|
||||
if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
|
||||
outsize < 2)
|
||||
continue;
|
||||
|
||||
/* The source char must be utf-16be as this is what we convert the
|
||||
|
||||
60
unac/unac.c
60
unac/unac.c
@ -13,7 +13,7 @@
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifdef BUILDING_RECOLL
|
||||
@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
||||
|
||||
return 0;
|
||||
}
|
||||
int unac_string_utf16(const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string_utf16(in, in_length,
|
||||
outp, out_lengthp, UNAC_UNAC);
|
||||
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||
}
|
||||
int unacfold_string_utf16(const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string_utf16(in, in_length,
|
||||
outp, out_lengthp, UNAC_UNACFOLD);
|
||||
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||
}
|
||||
int fold_string_utf16(const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string_utf16(in, in_length,
|
||||
outp, out_lengthp, UNAC_FOLD);
|
||||
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||
}
|
||||
|
||||
static const char *utf16be = "UTF-16BE";
|
||||
@ -14418,10 +14412,11 @@ static int convert(const char* from, const char* to,
|
||||
* should be submited to Unicode so that they can fix the problem.
|
||||
*
|
||||
*/
|
||||
if(from_utf16) {
|
||||
if (from_utf16) {
|
||||
const char* tmp = space;
|
||||
size_t tmp_length = 2;
|
||||
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
||||
if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
|
||||
(size_t)-1) {
|
||||
if(errno == E2BIG) {
|
||||
/* fall thru to the E2BIG case below */;
|
||||
} else {
|
||||
@ -14481,8 +14476,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int unacmaybefold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp, int what)
|
||||
{
|
||||
/*
|
||||
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int unac_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unac_string(
|
||||
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string(charset, in, in_length,
|
||||
outp, out_lengthp, UNAC_UNAC);
|
||||
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||
}
|
||||
int unacfold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int unacfold_string(
|
||||
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string(charset, in, in_length,
|
||||
outp, out_lengthp, UNAC_UNACFOLD);
|
||||
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||
}
|
||||
int fold_string(const char* charset,
|
||||
const char* in, size_t in_length,
|
||||
char** outp, size_t* out_lengthp)
|
||||
int fold_string(
|
||||
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||
{
|
||||
return unacmaybefold_string(charset, in, in_length,
|
||||
outp, out_lengthp, UNAC_FOLD);
|
||||
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||
}
|
||||
|
||||
const char* unac_version(void)
|
||||
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
|
||||
vector<string> vtrans;
|
||||
stringToStrings(spectrans, vtrans);
|
||||
|
||||
for (vector<string>::iterator it = vtrans.begin();
|
||||
it != vtrans.end(); it++) {
|
||||
for (const auto& trans : vtrans) {
|
||||
|
||||
/* Convert the whole thing to utf-16be/le according to endianness */
|
||||
char *out = 0;
|
||||
size_t outsize;
|
||||
if (convert("UTF-8", machinecoding,
|
||||
it->c_str(), it->size(),
|
||||
&out, &outsize) != 0 || outsize < 2)
|
||||
if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
|
||||
outsize < 2)
|
||||
continue;
|
||||
|
||||
/* The source char must be utf-16be as this is what we convert the
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user