indents and readability
This commit is contained in:
parent
4cc0bc90b6
commit
a24fc7bacc
@ -163,6 +163,9 @@ private:
|
|||||||
// Current span. Might be jf.dockes@wanadoo.f
|
// Current span. Might be jf.dockes@wanadoo.f
|
||||||
std::string m_span;
|
std::string m_span;
|
||||||
|
|
||||||
|
// Words in span: byte positions of start and end of words in m_span. For example:
|
||||||
|
// 0 4 9
|
||||||
|
// bill@some.com -> (0,4) (5,9) (10,13)
|
||||||
std::vector <std::pair<int, int> > m_words_in_span;
|
std::vector <std::pair<int, int> > m_words_in_span;
|
||||||
|
|
||||||
// Current word: no punctuation at all in there. Byte offset
|
// Current word: no punctuation at all in there. Byte offset
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2004-2019 J.F.Dockes
|
/* Copyright (C) 2004-2021 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -37,16 +37,13 @@ bool unacmaybefold(const string &in, string &out,
|
|||||||
|
|
||||||
switch (what) {
|
switch (what) {
|
||||||
case UNACOP_UNAC:
|
case UNACOP_UNAC:
|
||||||
status = unac_string(encoding, in.c_str(), in.length(),
|
status = unac_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||||
&cout, &out_len);
|
|
||||||
break;
|
break;
|
||||||
case UNACOP_UNACFOLD:
|
case UNACOP_UNACFOLD:
|
||||||
status = unacfold_string(encoding, in.c_str(), in.length(),
|
status = unacfold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||||
&cout, &out_len);
|
|
||||||
break;
|
break;
|
||||||
case UNACOP_FOLD:
|
case UNACOP_FOLD:
|
||||||
status = fold_string(encoding, in.c_str(), in.length(),
|
status = fold_string(encoding, in.c_str(), in.length(), &cout, &out_len);
|
||||||
&cout, &out_len);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2005 J.F.Dockes
|
/* Copyright (C) 2005-2021 J.F.Dockes
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation; either version 2 of the License, or
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -78,8 +78,7 @@ public:
|
|||||||
string dumb = term;
|
string dumb = term;
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
LOGINFO("PlainToRich::takeword: unac failed for [" << term <<
|
LOGINFO("PlainToRich::takeword: unac failed for [" << term << "]\n");
|
||||||
"]\n");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -173,30 +172,25 @@ static string activate_urls(const string& in)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Fix result text for display inside the gui text window.
|
// Enrich result text for display inside the gui text window.
|
||||||
//
|
//
|
||||||
// We call overridden functions to output header data, beginnings and ends of
|
// We call overridden functions to output header data, beginnings and ends of matches etc.
|
||||||
// matches etc.
|
|
||||||
//
|
//
|
||||||
// If the input is text, we output the result in chunks, arranging not
|
// If the input is text, we output the result in chunks, arranging not to cut in the middle of a
|
||||||
// to cut in the middle of a tag, which would confuse qtextedit. If
|
// tag, which would confuse qtextedit. If the input is html, the body is always a single output
|
||||||
// the input is html, the body is always a single output chunk.
|
// chunk.
|
||||||
bool PlainToRich::plaintorich(const string& in,
|
bool PlainToRich::plaintorich(
|
||||||
list<string>& out, // Output chunk list
|
const string& in, list<string>& out, const HighlightData& hdata, int chunksize)
|
||||||
const HighlightData& hdata,
|
|
||||||
int chunksize)
|
|
||||||
{
|
{
|
||||||
Chrono chron;
|
Chrono chron;
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
LOGDEB1("plaintorichich: in: [" << in << "]\n");
|
LOGDEB1("plaintorichich: in: [" << in << "]\n");
|
||||||
|
|
||||||
m_hdata = &hdata;
|
m_hdata = &hdata;
|
||||||
// Compute the positions for the query terms. We use the text
|
// Compute the positions for the query terms. We use the text splitter to break the text into
|
||||||
// splitter to break the text into words, and compare the words to
|
// words, and compare the words to the search terms,
|
||||||
// the search terms,
|
|
||||||
TextSplitPTR splitter(hdata);
|
TextSplitPTR splitter(hdata);
|
||||||
// Note: the splitter returns the term locations in byte, not
|
// Note: the splitter returns the term locations in byte, not character, offsets.
|
||||||
// character, offsets.
|
|
||||||
splitter.text_to_words(in);
|
splitter.text_to_words(in);
|
||||||
LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
|
LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
|
||||||
// Compute the positions for NEAR and PHRASE groups.
|
// Compute the positions for NEAR and PHRASE groups.
|
||||||
@ -205,7 +199,7 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
|
|
||||||
out.clear();
|
out.clear();
|
||||||
out.push_back("");
|
out.push_back("");
|
||||||
list<string>::iterator olit = out.begin();
|
auto olit = out.begin();
|
||||||
|
|
||||||
// Rich text output
|
// Rich text output
|
||||||
*olit = header();
|
*olit = header();
|
||||||
@ -225,9 +219,10 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
vector<GroupMatchEntry>::iterator tPosEnd = splitter.m_tboffs.end();
|
vector<GroupMatchEntry>::iterator tPosEnd = splitter.m_tboffs.end();
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
for (vector<pair<int, int> >::const_iterator it = splitter.m_tboffs.begin();
|
for (const auto& region : splitter.m_tboffs) {
|
||||||
it != splitter.m_tboffs.end(); it++) {
|
auto st = region.offs.first;
|
||||||
LOGDEB2("plaintorich: region: " << it->first << " "<<it->second<< "\n");
|
auto nd = region.offs.second;
|
||||||
|
LOGDEB0("plaintorich: region: " << st << " " << nd << "\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -276,8 +271,7 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
}
|
}
|
||||||
// Skip all highlight areas that would overlap this one
|
// Skip all highlight areas that would overlap this one
|
||||||
int crend = tPosIt->offs.second;
|
int crend = tPosIt->offs.second;
|
||||||
while (tPosIt != splitter.m_tboffs.end() &&
|
while (tPosIt != splitter.m_tboffs.end() && tPosIt->offs.first < crend)
|
||||||
tPosIt->offs.first < crend)
|
|
||||||
tPosIt++;
|
tPosIt++;
|
||||||
inrcltag = 0;
|
inrcltag = 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
int unac_string_utf16(const char* in, size_t in_length,
|
int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string_utf16(in, in_length,
|
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||||
outp, out_lengthp, UNAC_UNAC);
|
|
||||||
}
|
}
|
||||||
int unacfold_string_utf16(const char* in, size_t in_length,
|
int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string_utf16(in, in_length,
|
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||||
outp, out_lengthp, UNAC_UNACFOLD);
|
|
||||||
}
|
}
|
||||||
int fold_string_utf16(const char* in, size_t in_length,
|
int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string_utf16(in, in_length,
|
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||||
outp, out_lengthp, UNAC_FOLD);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *utf16be = "UTF-16BE";
|
static const char *utf16be = "UTF-16BE";
|
||||||
@ -14418,10 +14412,11 @@ static int convert(const char* from, const char* to,
|
|||||||
* should be submited to Unicode so that they can fix the problem.
|
* should be submited to Unicode so that they can fix the problem.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
if(from_utf16) {
|
if (from_utf16) {
|
||||||
const char* tmp = space;
|
const char* tmp = space;
|
||||||
size_t tmp_length = 2;
|
size_t tmp_length = 2;
|
||||||
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
|
||||||
|
(size_t)-1) {
|
||||||
if(errno == E2BIG) {
|
if(errno == E2BIG) {
|
||||||
/* fall thru to the E2BIG case below */;
|
/* fall thru to the E2BIG case below */;
|
||||||
} else {
|
} else {
|
||||||
@ -14481,8 +14476,7 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int unacmaybefold_string(const char* charset,
|
int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
|
||||||
const char* in, size_t in_length,
|
|
||||||
char** outp, size_t* out_lengthp, int what)
|
char** outp, size_t* out_lengthp, int what)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int unac_string(const char* charset,
|
int unac_string(
|
||||||
const char* in, size_t in_length,
|
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string(charset, in, in_length,
|
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||||
outp, out_lengthp, UNAC_UNAC);
|
|
||||||
}
|
}
|
||||||
int unacfold_string(const char* charset,
|
int unacfold_string(
|
||||||
const char* in, size_t in_length,
|
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string(charset, in, in_length,
|
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||||
outp, out_lengthp, UNAC_UNACFOLD);
|
|
||||||
}
|
}
|
||||||
int fold_string(const char* charset,
|
int fold_string(
|
||||||
const char* in, size_t in_length,
|
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string(charset, in, in_length,
|
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||||
outp, out_lengthp, UNAC_FOLD);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* unac_version(void)
|
const char* unac_version(void)
|
||||||
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
|
|||||||
vector<string> vtrans;
|
vector<string> vtrans;
|
||||||
stringToStrings(spectrans, vtrans);
|
stringToStrings(spectrans, vtrans);
|
||||||
|
|
||||||
for (vector<string>::iterator it = vtrans.begin();
|
for (const auto& trans : vtrans) {
|
||||||
it != vtrans.end(); it++) {
|
|
||||||
|
|
||||||
/* Convert the whole thing to utf-16be/le according to endianness */
|
/* Convert the whole thing to utf-16be/le according to endianness */
|
||||||
char *out = 0;
|
char *out = 0;
|
||||||
size_t outsize;
|
size_t outsize;
|
||||||
if (convert("UTF-8", machinecoding,
|
if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
|
||||||
it->c_str(), it->size(),
|
outsize < 2)
|
||||||
&out, &outsize) != 0 || outsize < 2)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* The source char must be utf-16be as this is what we convert the
|
/* The source char must be utf-16be as this is what we convert the
|
||||||
|
|||||||
60
unac/unac.c
60
unac/unac.c
@ -13,7 +13,7 @@
|
|||||||
*
|
*
|
||||||
* You should have received a copy of the GNU General Public License
|
* You should have received a copy of the GNU General Public License
|
||||||
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
||||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef BUILDING_RECOLL
|
#ifdef BUILDING_RECOLL
|
||||||
@ -14290,23 +14290,17 @@ int unacmaybefold_string_utf16(const char* in, size_t in_length,
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
int unac_string_utf16(const char* in, size_t in_length,
|
int unac_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string_utf16(in, in_length,
|
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||||
outp, out_lengthp, UNAC_UNAC);
|
|
||||||
}
|
}
|
||||||
int unacfold_string_utf16(const char* in, size_t in_length,
|
int unacfold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string_utf16(in, in_length,
|
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||||
outp, out_lengthp, UNAC_UNACFOLD);
|
|
||||||
}
|
}
|
||||||
int fold_string_utf16(const char* in, size_t in_length,
|
int fold_string_utf16(const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string_utf16(in, in_length,
|
return unacmaybefold_string_utf16(in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||||
outp, out_lengthp, UNAC_FOLD);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *utf16be = "UTF-16BE";
|
static const char *utf16be = "UTF-16BE";
|
||||||
@ -14418,10 +14412,11 @@ static int convert(const char* from, const char* to,
|
|||||||
* should be submited to Unicode so that they can fix the problem.
|
* should be submited to Unicode so that they can fix the problem.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
if(from_utf16) {
|
if (from_utf16) {
|
||||||
const char* tmp = space;
|
const char* tmp = space;
|
||||||
size_t tmp_length = 2;
|
size_t tmp_length = 2;
|
||||||
if(iconv(cd, (ICONV_CONST char **) &tmp, &tmp_length, &out, &out_remain) == (size_t)-1) {
|
if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_length, &out, &out_remain) ==
|
||||||
|
(size_t)-1) {
|
||||||
if(errno == E2BIG) {
|
if(errno == E2BIG) {
|
||||||
/* fall thru to the E2BIG case below */;
|
/* fall thru to the E2BIG case below */;
|
||||||
} else {
|
} else {
|
||||||
@ -14481,8 +14476,7 @@ out:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int unacmaybefold_string(const char* charset,
|
int unacmaybefold_string(const char* charset, const char* in, size_t in_length,
|
||||||
const char* in, size_t in_length,
|
|
||||||
char** outp, size_t* out_lengthp, int what)
|
char** outp, size_t* out_lengthp, int what)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -14520,26 +14514,20 @@ int unacmaybefold_string(const char* charset,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int unac_string(const char* charset,
|
int unac_string(
|
||||||
const char* in, size_t in_length,
|
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string(charset, in, in_length,
|
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNAC);
|
||||||
outp, out_lengthp, UNAC_UNAC);
|
|
||||||
}
|
}
|
||||||
int unacfold_string(const char* charset,
|
int unacfold_string(
|
||||||
const char* in, size_t in_length,
|
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string(charset, in, in_length,
|
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_UNACFOLD);
|
||||||
outp, out_lengthp, UNAC_UNACFOLD);
|
|
||||||
}
|
}
|
||||||
int fold_string(const char* charset,
|
int fold_string(
|
||||||
const char* in, size_t in_length,
|
const char* charset, const char* in, size_t in_length, char** outp, size_t* out_lengthp)
|
||||||
char** outp, size_t* out_lengthp)
|
|
||||||
{
|
{
|
||||||
return unacmaybefold_string(charset, in, in_length,
|
return unacmaybefold_string(charset, in, in_length, outp, out_lengthp, UNAC_FOLD);
|
||||||
outp, out_lengthp, UNAC_FOLD);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* unac_version(void)
|
const char* unac_version(void)
|
||||||
@ -14577,15 +14565,13 @@ void unac_set_except_translations(const char *spectrans)
|
|||||||
vector<string> vtrans;
|
vector<string> vtrans;
|
||||||
stringToStrings(spectrans, vtrans);
|
stringToStrings(spectrans, vtrans);
|
||||||
|
|
||||||
for (vector<string>::iterator it = vtrans.begin();
|
for (const auto& trans : vtrans) {
|
||||||
it != vtrans.end(); it++) {
|
|
||||||
|
|
||||||
/* Convert the whole thing to utf-16be/le according to endianness */
|
/* Convert the whole thing to utf-16be/le according to endianness */
|
||||||
char *out = 0;
|
char *out = 0;
|
||||||
size_t outsize;
|
size_t outsize;
|
||||||
if (convert("UTF-8", machinecoding,
|
if (convert("UTF-8", machinecoding, trans.c_str(), trans.size(), &out, &outsize) != 0 ||
|
||||||
it->c_str(), it->size(),
|
outsize < 2)
|
||||||
&out, &outsize) != 0 || outsize < 2)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* The source char must be utf-16be as this is what we convert the
|
/* The source char must be utf-16be as this is what we convert the
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user