plaintorich: indent and log lines
This commit is contained in:
parent
51189fa49e
commit
3f8f31732d
@ -47,71 +47,72 @@ struct MatchEntry {
|
||||
// match to the original user input.
|
||||
size_t grpidx;
|
||||
MatchEntry(int sta, int sto, size_t idx)
|
||||
: offs(sta, sto), grpidx(idx)
|
||||
{
|
||||
: offs(sta, sto), grpidx(idx) {
|
||||
}
|
||||
};
|
||||
|
||||
// Text splitter used to take note of the position of query terms
|
||||
// inside the result text. This is then used to insert highlight tags.
|
||||
class TextSplitPTR : public TextSplit {
|
||||
public:
|
||||
public:
|
||||
|
||||
// Out: begin and end byte positions of query terms/groups in text
|
||||
vector<MatchEntry> tboffs;
|
||||
|
||||
TextSplitPTR(const HighlightData& hdata)
|
||||
: m_wcount(0), m_hdata(hdata)
|
||||
{
|
||||
// We separate single terms and groups and extract the group
|
||||
// terms for computing positions list before looking for group
|
||||
// matches
|
||||
for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
|
||||
vit != hdata.groups.end(); vit++) {
|
||||
if (vit->size() == 1) {
|
||||
m_terms[vit->front()] = vit - hdata.groups.begin();
|
||||
} else if (vit->size() > 1) {
|
||||
for (vector<string>::const_iterator it = vit->begin();
|
||||
it != vit->end(); it++) {
|
||||
m_gterms.insert(*it);
|
||||
}
|
||||
}
|
||||
}
|
||||
: m_wcount(0), m_hdata(hdata) {
|
||||
// We separate single terms and groups and extract the group
|
||||
// terms for computing positions list before looking for group
|
||||
// matches
|
||||
for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
|
||||
vit != hdata.groups.end(); vit++) {
|
||||
if (vit->size() == 1) {
|
||||
m_terms[vit->front()] = vit - hdata.groups.begin();
|
||||
} else if (vit->size() > 1) {
|
||||
for (vector<string>::const_iterator it = vit->begin();
|
||||
it != vit->end(); it++) {
|
||||
m_gterms.insert(*it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Accept word and its position. If word is search term, add
|
||||
// highlight zone definition. If word is part of search group
|
||||
// (phrase or near), update positions list.
|
||||
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
||||
string dumb = term;
|
||||
if (o_index_stripchars) {
|
||||
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO("PlainToRich::takeword: unac failed for [" << (term) << "]\n" );
|
||||
return true;
|
||||
}
|
||||
}
|
||||
string dumb = term;
|
||||
if (o_index_stripchars) {
|
||||
if (!unacmaybefold(term, dumb, "UTF-8", UNACOP_UNACFOLD)) {
|
||||
LOGINFO("PlainToRich::takeword: unac failed for [" << term <<
|
||||
"]\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
//LOGDEB2("Input dumbbed term: '" << (dumb) << "' " << (// pos) << " " << (bts) << " " << (bte) << "\n" );
|
||||
LOGDEB2("Input dumbbed term: '" << dumb << "' " << pos << " " << bts
|
||||
<< " " << bte << "\n");
|
||||
|
||||
// If this word is a search term, remember its byte-offset span.
|
||||
map<string, size_t>::const_iterator it = m_terms.find(dumb);
|
||||
if (it != m_terms.end()) {
|
||||
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
|
||||
}
|
||||
|
||||
// If word is part of a search group, update its positions list
|
||||
if (m_gterms.find(dumb) != m_gterms.end()) {
|
||||
// Term group (phrase/near) handling
|
||||
m_plists[dumb].push_back(pos);
|
||||
m_gpostobytes[pos] = pair<int,int>(bts, bte);
|
||||
//LOGDEB2("Recorded bpos for " << (pos) << ": " << (bts) << " " << (bte) << "\n" );
|
||||
}
|
||||
// If this word is a search term, remember its byte-offset span.
|
||||
map<string, size_t>::const_iterator it = m_terms.find(dumb);
|
||||
if (it != m_terms.end()) {
|
||||
tboffs.push_back(MatchEntry(bts, bte, it->second));
|
||||
}
|
||||
|
||||
// If word is part of a search group, update its positions list
|
||||
if (m_gterms.find(dumb) != m_gterms.end()) {
|
||||
// Term group (phrase/near) handling
|
||||
m_plists[dumb].push_back(pos);
|
||||
m_gpostobytes[pos] = pair<int,int>(bts, bte);
|
||||
LOGDEB2("Recorded bpos for " << pos << ": " << bts << " " <<
|
||||
bte << "\n");
|
||||
}
|
||||
|
||||
// Check for cancellation request
|
||||
if ((m_wcount++ & 0xfff) == 0)
|
||||
CancelCheck::instance().checkCancel();
|
||||
// Check for cancellation request
|
||||
if ((m_wcount++ & 0xfff) == 0)
|
||||
CancelCheck::instance().checkCancel();
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Must be called after the split to find the phrase/near match positions
|
||||
@ -139,16 +140,15 @@ private:
|
||||
|
||||
/** Sort by shorter comparison class */
|
||||
class VecIntCmpShorter {
|
||||
public:
|
||||
/** Return true if and only if a is strictly shorter than b.
|
||||
*/
|
||||
bool operator()(const vector<int> *a, const vector<int> *b) {
|
||||
return a->size() < b->size();
|
||||
}
|
||||
public:
|
||||
/** Return true if and only if a is strictly shorter than b. */
|
||||
bool operator()(const vector<int> *a, const vector<int> *b) {
|
||||
return a->size() < b->size();
|
||||
}
|
||||
};
|
||||
|
||||
#define SETMINMAX(POS, STA, STO) {if ((POS) < (STA)) (STA) = (POS); \
|
||||
if ((POS) > (STO)) (STO) = (POS);}
|
||||
#define SETMINMAX(POS, STA, STO) {if ((POS) < (STA)) (STA) = (POS); \
|
||||
if ((POS) > (STO)) (STO) = (POS);}
|
||||
|
||||
// Check that at least an entry from the first position list is inside
|
||||
// the window and recurse on next list. The window is readjusted as
|
||||
@ -164,36 +164,37 @@ class VecIntCmpShorter {
|
||||
// any previous match. We don't look below this as overlapping matches
|
||||
// make no sense for highlighting.
|
||||
static bool do_proximity_test(int window, vector<vector<int>* >& plists,
|
||||
unsigned int i, int min, int max,
|
||||
int *sp, int *ep, int minpos)
|
||||
unsigned int i, int min, int max,
|
||||
int *sp, int *ep, int minpos)
|
||||
{
|
||||
LOGDEB1("do_prox_test: win " << (window) << " i " << (i) << " min " << (min) << " max " << (max) << " minpos " << (minpos) << "\n" );
|
||||
LOGDEB1("do_prox_test: win " << window << " i " << i << " min " <<
|
||||
min << " max " << max << " minpos " << minpos << "\n");
|
||||
int tmp = max + 1 - window;
|
||||
if (tmp < minpos)
|
||||
tmp = minpos;
|
||||
tmp = minpos;
|
||||
|
||||
// Find 1st position bigger than window start
|
||||
vector<int>::iterator it = plists[i]->begin();
|
||||
while (it != plists[i]->end() && *it < tmp)
|
||||
it++;
|
||||
it++;
|
||||
|
||||
// Look for position inside window. If not found, no match. If
|
||||
// found: if this is the last list we're done, else recurse on
|
||||
// next list after adjusting the window
|
||||
while (it != plists[i]->end()) {
|
||||
int pos = *it;
|
||||
if (pos > min + window - 1)
|
||||
return false;
|
||||
if (i + 1 == plists.size()) {
|
||||
SETMINMAX(pos, *sp, *ep);
|
||||
return true;
|
||||
}
|
||||
SETMINMAX(pos, min, max);
|
||||
if (do_proximity_test(window,plists, i + 1, min, max, sp, ep, minpos)) {
|
||||
SETMINMAX(pos, *sp, *ep);
|
||||
return true;
|
||||
}
|
||||
it++;
|
||||
int pos = *it;
|
||||
if (pos > min + window - 1)
|
||||
return false;
|
||||
if (i + 1 == plists.size()) {
|
||||
SETMINMAX(pos, *sp, *ep);
|
||||
return true;
|
||||
}
|
||||
SETMINMAX(pos, min, max);
|
||||
if (do_proximity_test(window,plists, i + 1, min, max, sp, ep, minpos)) {
|
||||
SETMINMAX(pos, *sp, *ep);
|
||||
return true;
|
||||
}
|
||||
it++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -204,7 +205,8 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
||||
const vector<string>& terms = m_hdata.groups[grpidx];
|
||||
int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);
|
||||
|
||||
LOGDEB1("TextSplitPTR::matchGroup:d " << (window) << ": " << (vecStringToString(terms)) << "\n" );
|
||||
LOGDEB1("TextSplitPTR::matchGroup:d " << window << ": " <<
|
||||
stringsToString(terms) << "\n");
|
||||
|
||||
// The position lists we are going to work with. We extract them from the
|
||||
// (string->plist) map
|
||||
@ -217,33 +219,35 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
||||
// possible that this particular group was not actually matched by
|
||||
// the search, so that some terms are not found.
|
||||
for (vector<string>::const_iterator it = terms.begin();
|
||||
it != terms.end(); it++) {
|
||||
map<string, vector<int> >::iterator pl = m_plists.find(*it);
|
||||
if (pl == m_plists.end()) {
|
||||
LOGDEB1("TextSplitPTR::matchGroup: [" << ((*it)) << "] not found in m_plists\n" );
|
||||
return false;
|
||||
}
|
||||
plists.push_back(&(pl->second));
|
||||
plistToTerm[&(pl->second)] = *it;
|
||||
it != terms.end(); it++) {
|
||||
map<string, vector<int> >::iterator pl = m_plists.find(*it);
|
||||
if (pl == m_plists.end()) {
|
||||
LOGDEB1("TextSplitPTR::matchGroup: [" << *it <<
|
||||
"] not found in m_plists\n");
|
||||
return false;
|
||||
}
|
||||
plists.push_back(&(pl->second));
|
||||
plistToTerm[&(pl->second)] = *it;
|
||||
}
|
||||
// I think this can't actually happen, was useful when we used to
|
||||
// prune the groups, but doesn't hurt.
|
||||
if (plists.size() < 2) {
|
||||
LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n" );
|
||||
return false;
|
||||
LOGDEB1("TextSplitPTR::matchGroup: no actual groups found\n");
|
||||
return false;
|
||||
}
|
||||
// Sort the positions lists so that the shorter is first
|
||||
std::sort(plists.begin(), plists.end(), VecIntCmpShorter());
|
||||
|
||||
{ // Debug
|
||||
map<vector<int>*, string>::iterator it;
|
||||
it = plistToTerm.find(plists[0]);
|
||||
if (it == plistToTerm.end()) {
|
||||
// SuperWeird
|
||||
LOGERR("matchGroup: term for first list not found !?!\n" );
|
||||
return false;
|
||||
}
|
||||
LOGDEB1("matchGroup: walking the shortest plist. Term [" << (it->second) << "], len " << (plists[0]->size()) << "\n" );
|
||||
map<vector<int>*, string>::iterator it;
|
||||
it = plistToTerm.find(plists[0]);
|
||||
if (it == plistToTerm.end()) {
|
||||
// SuperWeird
|
||||
LOGERR("matchGroup: term for first list not found !?!\n");
|
||||
return false;
|
||||
}
|
||||
LOGDEB1("matchGroup: walking the shortest plist. Term [" <<
|
||||
it->second << "], len " << plists[0]->size() << "\n");
|
||||
}
|
||||
|
||||
// Minpos is the highest end of a found match. While looking for
|
||||
@ -253,29 +257,32 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
||||
int minpos = 0;
|
||||
// Walk the shortest plist and look for matches
|
||||
for (vector<int>::iterator it = plists[0]->begin();
|
||||
it != plists[0]->end(); it++) {
|
||||
int pos = *it;
|
||||
int sta = INT_MAX, sto = 0;
|
||||
LOGDEB2("MatchGroup: Testing at pos " << (pos) << "\n" );
|
||||
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
||||
LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << (sta) << "," << (sto) << "]\n" );
|
||||
// Maybe extend the window by 1st term position, this was not
|
||||
// done by do_prox..
|
||||
SETMINMAX(pos, sta, sto);
|
||||
minpos = sto+1;
|
||||
// Translate the position window into a byte offset window
|
||||
map<int, pair<int, int> >::iterator i1 = m_gpostobytes.find(sta);
|
||||
map<int, pair<int, int> >::iterator i2 = m_gpostobytes.find(sto);
|
||||
if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) {
|
||||
LOGDEB2("TextSplitPTR::matchGroup: pushing bpos " << (i1->second.first) << " " << (i2->second.second) << "\n" );
|
||||
tboffs.push_back(MatchEntry(i1->second.first,
|
||||
i2->second.second, grpidx));
|
||||
} else {
|
||||
LOGDEB0("matchGroup: no bpos found for " << (sta) << " or " << (sto) << "\n" );
|
||||
}
|
||||
} else {
|
||||
LOGDEB1("matchGroup: no group match found at this position\n" );
|
||||
}
|
||||
it != plists[0]->end(); it++) {
|
||||
int pos = *it;
|
||||
int sta = INT_MAX, sto = 0;
|
||||
LOGDEB2("MatchGroup: Testing at pos " << pos << "\n");
|
||||
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
||||
LOGDEB1("TextSplitPTR::matchGroup: MATCH termpos [" << sta <<
|
||||
"," << sto << "]\n");
|
||||
// Maybe extend the window by 1st term position, this was not
|
||||
// done by do_prox..
|
||||
SETMINMAX(pos, sta, sto);
|
||||
minpos = sto+1;
|
||||
// Translate the position window into a byte offset window
|
||||
map<int, pair<int, int> >::iterator i1 = m_gpostobytes.find(sta);
|
||||
map<int, pair<int, int> >::iterator i2 = m_gpostobytes.find(sto);
|
||||
if (i1 != m_gpostobytes.end() && i2 != m_gpostobytes.end()) {
|
||||
LOGDEB2("TextSplitPTR::matchGroup: pushing bpos " <<
|
||||
i1->second.first << " " << i2->second.second << "\n");
|
||||
tboffs.push_back(MatchEntry(i1->second.first,
|
||||
i2->second.second, grpidx));
|
||||
} else {
|
||||
LOGDEB0("matchGroup: no bpos found for " << sta << " or "
|
||||
<< sto << "\n");
|
||||
}
|
||||
} else {
|
||||
LOGDEB1("matchGroup: no group match found at this position\n");
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -285,9 +292,9 @@ bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
||||
class PairIntCmpFirst {
|
||||
public:
|
||||
bool operator()(const MatchEntry& a, const MatchEntry& b) {
|
||||
if (a.offs.first != b.offs.first)
|
||||
return a.offs.first < b.offs.first;
|
||||
return a.offs.second > b.offs.second;
|
||||
if (a.offs.first != b.offs.first)
|
||||
return a.offs.first < b.offs.first;
|
||||
return a.offs.second > b.offs.second;
|
||||
}
|
||||
};
|
||||
|
||||
@ -298,9 +305,9 @@ public:
|
||||
bool TextSplitPTR::matchGroups()
|
||||
{
|
||||
for (unsigned int i = 0; i < m_hdata.groups.size(); i++) {
|
||||
if (m_hdata.groups[i].size() <= 1)
|
||||
continue;
|
||||
matchGroup(i);
|
||||
if (m_hdata.groups[i].size() <= 1)
|
||||
continue;
|
||||
matchGroup(i);
|
||||
}
|
||||
|
||||
// Sort regions by increasing start and decreasing width.
|
||||
@ -319,13 +326,13 @@ bool TextSplitPTR::matchGroups()
|
||||
// to cut in the middle of a tag, which would confuse qtextedit. If
|
||||
// the input is html, the body is always a single output chunk.
|
||||
bool PlainToRich::plaintorich(const string& in,
|
||||
list<string>& out, // Output chunk list
|
||||
const HighlightData& hdata,
|
||||
int chunksize)
|
||||
list<string>& out, // Output chunk list
|
||||
const HighlightData& hdata,
|
||||
int chunksize)
|
||||
{
|
||||
Chrono chron;
|
||||
bool ret = true;
|
||||
LOGDEB1("plaintorichich: in: [" << (in) << "]\n" );
|
||||
LOGDEB1("plaintorichich: in: [" << in << "]\n");
|
||||
|
||||
m_hdata = &hdata;
|
||||
// Compute the positions for the query terms. We use the text
|
||||
@ -335,10 +342,10 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
// Note: the splitter returns the term locations in byte, not
|
||||
// character, offsets.
|
||||
splitter.text_to_words(in);
|
||||
LOGDEB2("plaintorich: split done " << (chron.millis()) << " mS\n" );
|
||||
LOGDEB2("plaintorich: split done " << chron.millis() << " mS\n");
|
||||
// Compute the positions for NEAR and PHRASE groups.
|
||||
splitter.matchGroups();
|
||||
LOGDEB2("plaintorich: group match done " << (chron.millis()) << " mS\n" );
|
||||
LOGDEB2("plaintorich: group match done " << chron.millis() << " mS\n");
|
||||
|
||||
out.clear();
|
||||
out.push_back("");
|
||||
@ -351,8 +358,8 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
// a term match when we are actually looking for a group match
|
||||
// (the snippet generator does this...).
|
||||
if (splitter.tboffs.empty()) {
|
||||
LOGDEB1("plaintorich: no term matches\n" );
|
||||
ret = false;
|
||||
LOGDEB1("plaintorich: no term matches\n");
|
||||
ret = false;
|
||||
}
|
||||
|
||||
// Iterator for the list of input term positions. We use it to
|
||||
@ -363,8 +370,8 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
|
||||
#if 0
|
||||
for (vector<pair<int, int> >::const_iterator it = splitter.tboffs.begin();
|
||||
it != splitter.tboffs.end(); it++) {
|
||||
LOGDEB2("plaintorich: region: " << (it->first) << " " << (it->second) << "\n" );
|
||||
it != splitter.tboffs.end(); it++) {
|
||||
LOGDEB2("plaintorich: region: " << it->first << " "<<it->second<< "\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -384,41 +391,41 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
|
||||
string::size_type headend = 0;
|
||||
if (m_inputhtml) {
|
||||
headend = in.find("</head>");
|
||||
if (headend == string::npos)
|
||||
headend = in.find("</HEAD>");
|
||||
if (headend != string::npos)
|
||||
headend += 7;
|
||||
headend = in.find("</head>");
|
||||
if (headend == string::npos)
|
||||
headend = in.find("</HEAD>");
|
||||
if (headend != string::npos)
|
||||
headend += 7;
|
||||
}
|
||||
|
||||
for (string::size_type pos = 0; pos != string::npos; pos = chariter++) {
|
||||
// Check from time to time if we need to stop
|
||||
if ((pos & 0xfff) == 0) {
|
||||
CancelCheck::instance().checkCancel();
|
||||
}
|
||||
// Check from time to time if we need to stop
|
||||
if ((pos & 0xfff) == 0) {
|
||||
CancelCheck::instance().checkCancel();
|
||||
}
|
||||
|
||||
// If we still have terms positions, check (byte) position. If
|
||||
// we are at or after a term match, mark.
|
||||
if (tPosIt != tPosEnd) {
|
||||
int ibyteidx = int(chariter.getBpos());
|
||||
if (ibyteidx == tPosIt->offs.first) {
|
||||
if (!intag && ibyteidx >= (int)headend) {
|
||||
*olit += startMatch((unsigned int)(tPosIt->grpidx));
|
||||
}
|
||||
// If we still have terms positions, check (byte) position. If
|
||||
// we are at or after a term match, mark.
|
||||
if (tPosIt != tPosEnd) {
|
||||
int ibyteidx = int(chariter.getBpos());
|
||||
if (ibyteidx == tPosIt->offs.first) {
|
||||
if (!intag && ibyteidx >= (int)headend) {
|
||||
*olit += startMatch((unsigned int)(tPosIt->grpidx));
|
||||
}
|
||||
inrcltag = 1;
|
||||
} else if (ibyteidx == tPosIt->offs.second) {
|
||||
// Output end of match region tags
|
||||
if (!intag && ibyteidx > (int)headend) {
|
||||
*olit += endMatch();
|
||||
}
|
||||
// Skip all highlight areas that would overlap this one
|
||||
int crend = tPosIt->offs.second;
|
||||
while (tPosIt != splitter.tboffs.end() &&
|
||||
tPosIt->offs.first < crend)
|
||||
tPosIt++;
|
||||
} else if (ibyteidx == tPosIt->offs.second) {
|
||||
// Output end of match region tags
|
||||
if (!intag && ibyteidx > (int)headend) {
|
||||
*olit += endMatch();
|
||||
}
|
||||
// Skip all highlight areas that would overlap this one
|
||||
int crend = tPosIt->offs.second;
|
||||
while (tPosIt != splitter.tboffs.end() &&
|
||||
tPosIt->offs.first < crend)
|
||||
tPosIt++;
|
||||
inrcltag = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int car = *chariter;
|
||||
|
||||
@ -433,13 +440,13 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
continue;
|
||||
} else if (eol) {
|
||||
// Got non eol char in line break state. Do line break;
|
||||
inindent = 1;
|
||||
inindent = 1;
|
||||
hadcr = 0;
|
||||
if (eol > 2)
|
||||
eol = 2;
|
||||
while (eol) {
|
||||
if (!m_inputhtml && m_eolbr)
|
||||
*olit += "<br>";
|
||||
if (!m_inputhtml && m_eolbr)
|
||||
*olit += "<br>";
|
||||
*olit += "\n";
|
||||
eol--;
|
||||
}
|
||||
@ -455,7 +462,7 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
|
||||
switch (car) {
|
||||
case '<':
|
||||
inindent = 0;
|
||||
inindent = 0;
|
||||
if (m_inputhtml) {
|
||||
if (!inparamvalue)
|
||||
intag = true;
|
||||
@ -465,7 +472,7 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
}
|
||||
break;
|
||||
case '>':
|
||||
inindent = 0;
|
||||
inindent = 0;
|
||||
if (m_inputhtml) {
|
||||
if (!inparamvalue)
|
||||
intag = false;
|
||||
@ -473,7 +480,7 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
chariter.appendchartostring(*olit);
|
||||
break;
|
||||
case '&':
|
||||
inindent = 0;
|
||||
inindent = 0;
|
||||
if (m_inputhtml) {
|
||||
chariter.appendchartostring(*olit);
|
||||
} else {
|
||||
@ -481,30 +488,30 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
inindent = 0;
|
||||
inindent = 0;
|
||||
if (m_inputhtml && intag) {
|
||||
inparamvalue = !inparamvalue;
|
||||
}
|
||||
chariter.appendchartostring(*olit);
|
||||
break;
|
||||
|
||||
case ' ':
|
||||
if (m_eolbr && inindent) {
|
||||
*olit += " ";
|
||||
} else {
|
||||
chariter.appendchartostring(*olit);
|
||||
}
|
||||
break;
|
||||
case '\t':
|
||||
if (m_eolbr && inindent) {
|
||||
*olit += " ";
|
||||
} else {
|
||||
chariter.appendchartostring(*olit);
|
||||
}
|
||||
break;
|
||||
case ' ':
|
||||
if (m_eolbr && inindent) {
|
||||
*olit += " ";
|
||||
} else {
|
||||
chariter.appendchartostring(*olit);
|
||||
}
|
||||
break;
|
||||
case '\t':
|
||||
if (m_eolbr && inindent) {
|
||||
*olit += " ";
|
||||
} else {
|
||||
chariter.appendchartostring(*olit);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
inindent = 0;
|
||||
inindent = 0;
|
||||
chariter.appendchartostring(*olit);
|
||||
}
|
||||
|
||||
@ -512,19 +519,18 @@ bool PlainToRich::plaintorich(const string& in,
|
||||
|
||||
#if 0
|
||||
{
|
||||
FILE *fp = fopen("/tmp/debugplaintorich", "a");
|
||||
fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
|
||||
for (list<string>::iterator it = out.begin();
|
||||
it != out.end(); it++) {
|
||||
fprintf(fp, "BEGINOFPLAINTORICHCHUNK\n");
|
||||
fprintf(fp, "%s", it->c_str());
|
||||
fprintf(fp, "ENDOFPLAINTORICHCHUNK\n");
|
||||
}
|
||||
fprintf(fp, "ENDOFPLAINTORICHOUTPUT\n");
|
||||
fclose(fp);
|
||||
FILE *fp = fopen("/tmp/debugplaintorich", "a");
|
||||
fprintf(fp, "BEGINOFPLAINTORICHOUTPUT\n");
|
||||
for (list<string>::iterator it = out.begin();
|
||||
it != out.end(); it++) {
|
||||
fprintf(fp, "BEGINOFPLAINTORICHCHUNK\n");
|
||||
fprintf(fp, "%s", it->c_str());
|
||||
fprintf(fp, "ENDOFPLAINTORICHCHUNK\n");
|
||||
}
|
||||
fprintf(fp, "ENDOFPLAINTORICHOUTPUT\n");
|
||||
fclose(fp);
|
||||
}
|
||||
#endif
|
||||
LOGDEB2("plaintorich: done " << (chron.millis()) << " mS\n" );
|
||||
LOGDEB2("plaintorich: done " << chron.millis() << " mS\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -32,17 +32,13 @@
|
||||
class PlainToRich {
|
||||
public:
|
||||
PlainToRich()
|
||||
: m_inputhtml(false), m_eolbr(false), m_hdata(0)
|
||||
{
|
||||
: m_inputhtml(false), m_eolbr(false), m_hdata(0) {
|
||||
}
|
||||
|
||||
virtual ~PlainToRich()
|
||||
{
|
||||
}
|
||||
virtual ~PlainToRich() {}
|
||||
|
||||
void set_inputhtml(bool v)
|
||||
{
|
||||
m_inputhtml = v;
|
||||
void set_inputhtml(bool v) {
|
||||
m_inputhtml = v;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -67,33 +63,29 @@ public:
|
||||
* @param chunksize max size of chunks in output list
|
||||
*/
|
||||
virtual bool plaintorich(const std::string &in, std::list<std::string> &out,
|
||||
const HighlightData& hdata,
|
||||
int chunksize = 50000
|
||||
);
|
||||
const HighlightData& hdata,
|
||||
int chunksize = 50000
|
||||
);
|
||||
|
||||
/* Overridable output methods for headers, highlighting and marking tags */
|
||||
|
||||
virtual std::string header()
|
||||
{
|
||||
return cstr_null;
|
||||
virtual std::string header() {
|
||||
return cstr_null;
|
||||
}
|
||||
|
||||
/** Return match prefix (e.g.: <div class="match">).
|
||||
@param groupidx the index into hdata.groups */
|
||||
virtual std::string startMatch(unsigned int)
|
||||
{
|
||||
return cstr_null;
|
||||
@param groupidx the index into hdata.groups */
|
||||
virtual std::string startMatch(unsigned int) {
|
||||
return cstr_null;
|
||||
}
|
||||
|
||||
/** Return data for end of match area (e.g.: </div>). */
|
||||
virtual std::string endMatch()
|
||||
{
|
||||
return cstr_null;
|
||||
virtual std::string endMatch() {
|
||||
return cstr_null;
|
||||
}
|
||||
|
||||
virtual std::string startChunk()
|
||||
{
|
||||
return cstr_null;
|
||||
virtual std::string startChunk() {
|
||||
return cstr_null;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user