plaintorich: indent and log lines
This commit is contained in:
parent
51189fa49e
commit
3f8f31732d
@ -47,22 +47,20 @@ struct MatchEntry {
|
|||||||
// match to the original user input.
|
// match to the original user input.
|
||||||
size_t grpidx;
|
size_t grpidx;
|
||||||
MatchEntry(int sta, int sto, size_t idx)
|
MatchEntry(int sta, int sto, size_t idx)
|
||||||
: offs(sta, sto), grpidx(idx)
|
: offs(sta, sto), grpidx(idx) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Text splitter used to take note of the position of query terms
|
// Text splitter used to take note of the position of query terms
|
||||||
// inside the result text. This is then used to insert highlight tags.
|
// inside the result text. This is then used to insert highlight tags.
|
||||||
class TextSplitPTR : public TextSplit {
|
class TextSplitPTR : public TextSplit {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Out: begin and end byte positions of query terms/groups in text
|
// Out: begin and end byte positions of query terms/groups in text
|
||||||
vector<MatchEntry> tboffs;
|
vector<MatchEntry> tboffs;
|
||||||
|
|
||||||
TextSplitPTR(const HighlightData& hdata)
|
TextSplitPTR(const HighlightData& hdata)
|
||||||
: m_wcount(0), m_hdata(hdata)
|
: m_wcount(0), m_hdata(hdata) {
|
||||||
{
|
|
||||||
// We separate single terms and groups and extract the group
|
// We separate single terms and groups and extract the group
|
||||||
// terms for computing positions list before looking for group
|
// terms for computing positions list before looking for group
|
||||||
// matches
|
// matches
|
||||||
@ -86,17 +84,19 @@ class TextSplitPTR : public TextSplit {
|
|||||||
string dumb = term;
|
string dumb = term;
|
||||||
if (o_index_stripchars) {
|
if (o_index_stripchars) {
|
||||||
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
||||||
LOGINFO("PlainToRich::takeword: unac failed for [" << (term) << "]\n" );
|
LOGINFO("PlainToRich::takeword: unac failed for [" << term <<
|
||||||
|
"]\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//LOGDEB2("Input dumbbed term: '" << (dumb) << "' " << (// pos) << " " << (bts) << " " << (bte) << "\n" );
|
LOGDEB2("Input dumbbed term: '" << dumb << "' " << pos << " " << bts
|
||||||
|
<< " " << bte << "\n");
|
||||||
|
|
||||||
// If this word is a search term, remember its byte-offset span.
|
// If this word is a search term, remember its byte-offset span.
|
||||||
map<string, size_t>::const_iterator it = m_terms.find(dumb);
|
map<string, size_t>::const_iterator it = m_terms.find(dumb);
|
||||||
if (it != m_terms.end()) {
|
if (it != m_terms.end()) {
|
||||||
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
|
tboffs.push_back(MatchEntry(bts, bte, it->second));
|
||||||
}
|
}
|
||||||
|
|
||||||
// If word is part of a search group, update its positions list
|
// If word is part of a search group, update its positions list
|
||||||
@ -104,7 +104,8 @@ class TextSplitPTR : public TextSplit {
|
|||||||
// Term group (phrase/near) handling
|
// Term group (phrase/near) handling
|
||||||
m_plists[dumb].push_back(pos);
|
m_plists[dumb].push_back(pos);
|
||||||
m_gpostobytes[pos] = pair<int,int>(bts, bte);
|
m_gpostobytes[pos] = pair<int,int>(bts, bte);
|
||||||
//LOGDEB2("Recorded bpos for " << (pos) << ": " << (bts) << " " << (bte) << "\n" );
|
LOGDEB2("Recorded bpos for " << pos << ": " << bts << " " <<
|
||||||
|
bte << "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check for cancellation request
|
// Check for cancellation request
|
||||||
@ -139,9 +140,8 @@ private:
|
|||||||
|
|
||||||
/** Sort by shorter comparison class */
|
/** Sort by shorter comparison class */
|
||||||
class VecIntCmpShorter {
|
class VecIntCmpShorter {
|
||||||
public:
|
public:
|
||||||
/** Return true if and only if a is strictly shorter than b.
|
/** Return true if and only if a is strictly shorter than b. */
|
||||||
*/
|
|
||||||
bool operator()(const vector<int> *a, const vector<int> *b) {
|
bool operator()(const vector<int> *a, const vector<int> *b) {
|
||||||
return a->size() < b->size();
|
return a->size() < b->size();
|
||||||
}
|
}
|
||||||
@ -167,7 +167,8 @@ static bool do_proximity_test(int window, vector<vector<int>* >& plists,
|
|||||||
unsigned int i, int min, int max,
|
unsigned int i, int min, int max,
|
||||||
int *sp, int *ep, int minpos)
|
int *sp, int *ep, int minpos)
|
||||||
{
|
{
|
||||||
LOGDEB1("do_prox_test: win " << (window) << " i " << (i) << " min " << (min) << " max " << (max) << " minpos " << (minpos) << "\n" );
|
LOGDEB1("do_prox_test: win " << window << " i " << i << " min " <<
|
||||||
|
min << " max " << max << " minpos " << minpos << "\n");
|
||||||
int tmp = max + 1 - window;
|
int tmp = max + 1 - window;
|
||||||
if (tmp < minpos)
|
if (tmp < minpos)
|
||||||
tmp = minpos;
|
tmp = minpos;
|
||||||
@ -204,7 +205,8 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
|||||||
const vector<string>& terms = m_hdata.groups[grpidx];
|
const vector<string>& terms = m_hdata.groups[grpidx];
|
||||||
int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);
|
int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);
|
||||||
|
|
||||||
LOGDEB1("TextSplitPTR::matchGroup:d " << (window) << ": " << (vecStringToString(terms)) << "\n" );
|
LOGDEB1("TextSplitPTR::matchGroup:d " << window << ": " <<
|
||||||
|
stringsToString(terms) << "\n");
|
||||||
|
|
||||||
// The position lists we are going to work with. We extract them from the
|
// The position lists we are going to work with. We extract them from the
|
||||||
// (string->plist) map
|
// (string->plist) map
|
||||||
@ -220,7 +222,8 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
|||||||
it != terms.end(); it++) {
|
it != terms.end(); it++) {
|
||||||
map<string, vector<int> >::iterator pl = m_plists.find(*it);
|
map<string, vector<int> >::iterator pl = m_plists.find(*it);
|
||||||
if (pl == m_plists.end()) {
|
if (pl == m_plists.end()) {
|
||||||
LOGDEB1("TextSplitPTR::matchGroup: [" << ((*it)) << "] not found in m_plists\n" );
|
LOGDEB1("TextSplitPTR::matchGroup: [" << *it <<
|
||||||
|
"] not found in m_plists\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
plists.push_back(&(pl->second));
|
plists.push_back(&(pl->second));
|
||||||
@ -229,7 +232,7 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
|||||||
// I think this can't actually happen, was useful when we used to
|
// I think this can't actually happen, was useful when we used to
|
||||||
// prune the groups, but doesn't hurt.
|
// prune the groups, but doesn't hurt.
|
||||||
if (plists.size() < 2) {
|
if (plists.size() < 2) {
|
||||||
LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n" );
|
LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Sort the positions lists so that the shorter is first
|
// Sort the positions lists so that the shorter is first
|
||||||
@ -240,10 +243,11 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
|||||||
it = plistToTerm.find(plists[0]);
|
it = plistToTerm.find(plists[0]);
|
||||||
if (it == plistToTerm.end()) {
|
if (it == plistToTerm.end()) {
|
||||||
// SuperWeird
|
// SuperWeird
|
||||||
LOGERR("matchGroup: term for first list not found !?!\n" );
|
LOGERR("matchGroup: term for first list not found !?!\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOGDEB1("matchGroup: walking the shortest plist. Term [" << (it->second) << "], len " << (plists[0]->size()) << "\n" );
|
LOGDEB1("matchGroup: walking the shortest plist. Term [" <<
|
||||||
|
it->second << "], len " << plists[0]->size() << "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Minpos is the highest end of a found match. While looking for
|
// Minpos is the highest end of a found match. While looking for
|
||||||
@ -256,9 +260,10 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
|||||||
it != plists[0]->end(); it++) {
|
it != plists[0]->end(); it++) {
|
||||||
int pos = *it;
|
int pos = *it;
|
||||||
int sta = INT_MAX, sto = 0;
|
int sta = INT_MAX, sto = 0;
|
||||||
LOGDEB2("MatchGroup: Testing at pos " << (pos) << "\n" );
|
LOGDEB2("MatchGroup: Testing at pos " << pos << "\n");
|
||||||
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
||||||
LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << (sta) << "," << (sto) << "]\n" );
|
LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << sta <<
|
||||||
|
"," << sto << "]\n");
|
||||||
// Maybe extend the window by 1st term position, this was not
|
// Maybe extend the window by 1st term position, this was not
|
||||||
// done by do_prox..
|
// done by do_prox..
|
||||||
SETMINMAX(pos, sta, sto);
|
SETMINMAX(pos, sta, sto);
|
||||||
@ -267,14 +272,16 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
|||||||
map<int, pair<int, int> >::iterator i1 = m_gpostobytes.find(sta);
|
map<int, pair<int, int> >::iterator i1 = m_gpostobytes.find(sta);
|
||||||
map<int, pair<int, int> >::iterator i2 = m_gpostobytes.find(sto);
|
map<int, pair<int, int> >::iterator i2 = m_gpostobytes.find(sto);
|
||||||
if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) {
|
if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) {
|
||||||
LOGDEB2("TextSplitPTR::matchGroup: pushing bpos " << (i1->second.first) << " " << (i2->second.second) << "\n" );
|
LOGDEB2("TextSplitPTR::matchGroup: pushing bpos " <<
|
||||||
|
i1->second.first << " " << i2->second.second << "\n");
|
||||||
tboffs.push_back(MatchEntry(i1->second.first,
|
tboffs.push_back(MatchEntry(i1->second.first,
|
||||||
i2->second.second, grpidx));
|
i2->second.second, grpidx));
|
||||||
} else {
|
} else {
|
||||||
LOGDEB0("matchGroup: no bpos found for " << (sta) << " or " << (sto) << "\n" );
|
LOGDEB0("matchGroup: no bpos found for " << sta << " or "
|
||||||
|
<< sto << "\n");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOGDEB1("matchGroup: no group match found at this position\n" );
|
LOGDEB1("matchGroup: no group match found at this position\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -325,7 +332,7 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
{
|
{
|
||||||
Chrono chron;
|
Chrono chron;
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
LOGDEB1("plaintorichich: in: [" << (in) << "]\n" );
|
LOGDEB1("plaintorichich: in: [" << in << "]\n");
|
||||||
|
|
||||||
m_hdata = &hdata;
|
m_hdata = &hdata;
|
||||||
// Compute the positions for the query terms. We use the text
|
// Compute the positions for the query terms. We use the text
|
||||||
@ -335,10 +342,10 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
// Note: the splitter returns the term locations in byte, not
|
// Note: the splitter returns the term locations in byte, not
|
||||||
// character, offsets.
|
// character, offsets.
|
||||||
splitter.text_to_words(in);
|
splitter.text_to_words(in);
|
||||||
LOGDEB2("plaintorich: split done " << (chron.millis()) << " mS\n" );
|
LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
|
||||||
// Compute the positions for NEAR and PHRASE groups.
|
// Compute the positions for NEAR and PHRASE groups.
|
||||||
splitter.matchGroups();
|
splitter.matchGroups();
|
||||||
LOGDEB2("plaintorich: group match done " << (chron.millis()) << " mS\n" );
|
LOGDEB2("plaintorich: group match done " << chron.millis() << " mS\n");
|
||||||
|
|
||||||
out.clear();
|
out.clear();
|
||||||
out.push_back("");
|
out.push_back("");
|
||||||
@ -351,7 +358,7 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
// a term match when we are actually looking for a group match
|
// a term match when we are actually looking for a group match
|
||||||
// (the snippet generator does this...).
|
// (the snippet generator does this...).
|
||||||
if (splitter.tboffs.empty()) {
|
if (splitter.tboffs.empty()) {
|
||||||
LOGDEB1("plaintorich: no term matches\n" );
|
LOGDEB1("plaintorich: no term matches\n");
|
||||||
ret = false;
|
ret = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -364,7 +371,7 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
#if 0
|
#if 0
|
||||||
for (vector<pair<int, int> >::const_iterator it = splitter.tboffs.begin();
|
for (vector<pair<int, int> >::const_iterator it = splitter.tboffs.begin();
|
||||||
it != splitter.tboffs.end(); it++) {
|
it != splitter.tboffs.end(); it++) {
|
||||||
LOGDEB2("plaintorich: region: " << (it->first) << " " << (it->second) << "\n" );
|
LOGDEB2("plaintorich: region: " << it->first << " "<<it->second<< "\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -524,7 +531,6 @@ bool PlainToRich::plaintorich(const string& in,
|
|||||||
fclose(fp);
|
fclose(fp);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
LOGDEB2("plaintorich: done " << (chron.millis()) << " mS\n" );
|
LOGDEB2("plaintorich: done " << chron.millis() << " mS\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -32,16 +32,12 @@
|
|||||||
class PlainToRich {
|
class PlainToRich {
|
||||||
public:
|
public:
|
||||||
PlainToRich()
|
PlainToRich()
|
||||||
: m_inputhtml(false), m_eolbr(false), m_hdata(0)
|
: m_inputhtml(false), m_eolbr(false), m_hdata(0) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~PlainToRich()
|
virtual ~PlainToRich() {}
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_inputhtml(bool v)
|
void set_inputhtml(bool v) {
|
||||||
{
|
|
||||||
m_inputhtml = v;
|
m_inputhtml = v;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,26 +69,22 @@ public:
|
|||||||
|
|
||||||
/* Overridable output methods for headers, highlighting and marking tags */
|
/* Overridable output methods for headers, highlighting and marking tags */
|
||||||
|
|
||||||
virtual std::string header()
|
virtual std::string header() {
|
||||||
{
|
|
||||||
return cstr_null;
|
return cstr_null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return match prefix (e.g.: <div class="match">).
|
/** Return match prefix (e.g.: <div class="match">).
|
||||||
@param groupidx the index into hdata.groups */
|
@param groupidx the index into hdata.groups */
|
||||||
virtual std::string startMatch(unsigned int)
|
virtual std::string startMatch(unsigned int) {
|
||||||
{
|
|
||||||
return cstr_null;
|
return cstr_null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return data for end of match area (e.g.: </div>). */
|
/** Return data for end of match area (e.g.: </div>). */
|
||||||
virtual std::string endMatch()
|
virtual std::string endMatch() {
|
||||||
{
|
|
||||||
return cstr_null;
|
return cstr_null;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual std::string startChunk()
|
virtual std::string startChunk() {
|
||||||
{
|
|
||||||
return cstr_null;
|
return cstr_null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user