defined data structure to pass around the search term description used for highlighting and other

This commit is contained in:
"Jean-Francois Dockes ext:(%22) 2012-08-17 10:45:00 +02:00
parent ebdd6faaf5
commit dc7b3420a0
16 changed files with 409 additions and 351 deletions

View File

@ -686,12 +686,12 @@ class LoadThread : public QThread {
/* A thread to convert to rich text (mark search terms) */ /* A thread to convert to rich text (mark search terms) */
class ToRichThread : public QThread { class ToRichThread : public QThread {
string ∈ string ∈
const HiliteData &hdata; const HighlightData &hdata;
list<string> &out; list<string> &out;
int loglevel; int loglevel;
PlainToRichQtPreview *ptr; PlainToRichQtPreview *ptr;
public: public:
ToRichThread(string &i, const HiliteData& hd, list<string> &o, ToRichThread(string &i, const HighlightData& hd, list<string> &o,
PlainToRichQtPreview *_ptr) PlainToRichQtPreview *_ptr)
: in(i), hdata(hd), out(o), ptr(_ptr) : in(i), hdata(hd), out(o), ptr(_ptr)
{ {

View File

@ -91,7 +91,7 @@ class Preview : public QWidget {
public: public:
Preview(int sid, // Search Id Preview(int sid, // Search Id
const HiliteData& hdata) // Search terms etc. for highlighting const HighlightData& hdata) // Search terms etc. for highlighting
: QWidget(0), m_searchId(sid), m_hData(hdata) : QWidget(0), m_searchId(sid), m_hData(hdata)
{ {
init(); init();
@ -141,7 +141,7 @@ private:
bool m_canBeep; bool m_canBeep;
bool m_loading; bool m_loading;
QWidget *m_currentW; QWidget *m_currentW;
HiliteData m_hData; HighlightData m_hData;
bool m_justCreated; // First tab create is different bool m_justCreated; // First tab create is different
bool m_haveAnchors; // Search terms are marked in text bool m_haveAnchors; // Search terms are marked in text
int m_lastAnchor; // Number of last anchor. Then rewind to 1 int m_lastAnchor; // Number of last anchor. Then rewind to 1

View File

@ -1220,8 +1220,8 @@ void RclMain::startPreview(int docnum, Rcl::Doc doc, int mod)
curPreview = 0; curPreview = 0;
} }
if (curPreview == 0) { if (curPreview == 0) {
HiliteData hdata; HighlightData hdata;
m_source->getTerms(hdata.terms, hdata.groups, hdata.gslks); m_source->getTerms(hdata);
curPreview = new Preview(reslist->listId(), hdata); curPreview = new Preview(reslist->listId(), hdata);
if (curPreview == 0) { if (curPreview == 0) {
@ -1284,7 +1284,7 @@ void RclMain::updateIdxForDocs(vector<Rcl::Doc>& docs)
*/ */
void RclMain::startPreview(Rcl::Doc doc) void RclMain::startPreview(Rcl::Doc doc)
{ {
Preview *preview = new Preview(0, HiliteData()); Preview *preview = new Preview(0, HighlightData());
if (preview == 0) { if (preview == 0) {
QMessageBox::warning(0, tr("Warning"), QMessageBox::warning(0, tr("Warning"),
tr("Can't create preview window"), tr("Can't create preview window"),

View File

@ -299,8 +299,8 @@ void RecollModel::setDocSource(RefCntr<DocSequence> nsource)
m_source = RefCntr<DocSequence>(); m_source = RefCntr<DocSequence>();
} else { } else {
m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource)); m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource));
m_hdata.reset(); m_hdata.clear();
m_source->getTerms(m_hdata.terms, m_hdata.groups, m_hdata.gslks); m_source->getTerms(m_hdata);
} }
} }

View File

@ -19,6 +19,10 @@
#include <Qt> #include <Qt>
#include <string>
#include <map>
#include <vector>
#include "ui_restable.h" #include "ui_restable.h"
#include "refcntr.h" #include "refcntr.h"
#include "docseq.h" #include "docseq.h"
@ -26,7 +30,7 @@
class ResTable; class ResTable;
typedef string (FieldGetter)(const string& fldname, const Rcl::Doc& doc); typedef std::string (FieldGetter)(const std::string& fldname, const Rcl::Doc& doc);
class RecollModel : public QAbstractTableModel { class RecollModel : public QAbstractTableModel {
@ -49,15 +53,15 @@ public:
virtual void setDocSource(RefCntr<DocSequence> nsource); virtual void setDocSource(RefCntr<DocSequence> nsource);
virtual RefCntr<DocSequence> getDocSource() {return m_source;} virtual RefCntr<DocSequence> getDocSource() {return m_source;}
virtual void deleteColumn(int); virtual void deleteColumn(int);
virtual const vector<string>& getFields() {return m_fields;} virtual const std::vector<std::string>& getFields() {return m_fields;}
virtual const map<string, QString>& getAllFields() virtual const std::map<std::string, QString>& getAllFields()
{ {
return o_displayableFields; return o_displayableFields;
} }
virtual void addColumn(int, const string&); virtual void addColumn(int, const std::string&);
// Some column name are aliases/translator for base document field // Some column name are aliases/translator for base document field
// (ie: date, datetime->mtime). Help deal with this: // (ie: date, datetime->mtime). Help deal with this:
virtual string baseField(const string&); virtual std::string baseField(const std::string&);
// Ignore sort() call because // Ignore sort() call because
virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;} virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;}
@ -69,12 +73,12 @@ signals:
private: private:
mutable RefCntr<DocSequence> m_source; mutable RefCntr<DocSequence> m_source;
vector<string> m_fields; std::vector<std::string> m_fields;
vector<FieldGetter*> m_getters; std::vector<FieldGetter*> m_getters;
static map<string, QString> o_displayableFields; static std::map<std::string, QString> o_displayableFields;
bool m_ignoreSort; bool m_ignoreSort;
FieldGetter* chooseGetter(const string&); FieldGetter* chooseGetter(const std::string&);
HiliteData m_hdata; HighlightData m_hdata;
}; };
class ResTable; class ResTable;

View File

@ -19,19 +19,16 @@
#include <string> #include <string>
#include <list> #include <list>
#include <vector> #include <vector>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
using std::vector;
#endif
#include "rcldoc.h" #include "rcldoc.h"
#include "refcntr.h" #include "refcntr.h"
#include "hldata.h"
// A result list entry. // A result list entry.
struct ResListEntry { struct ResListEntry {
Rcl::Doc doc; Rcl::Doc doc;
string subHeader; std::string subHeader;
}; };
/** Sort specification. */ /** Sort specification. */
@ -40,7 +37,7 @@ class DocSeqSortSpec {
DocSeqSortSpec() : desc(false) {} DocSeqSortSpec() : desc(false) {}
bool isNotNull() const {return !field.empty();} bool isNotNull() const {return !field.empty();}
void reset() {field.erase();} void reset() {field.erase();}
string field; std::string field;
bool desc; bool desc;
}; };
@ -50,12 +47,12 @@ class DocSeqFiltSpec {
public: public:
DocSeqFiltSpec() {} DocSeqFiltSpec() {}
enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL}; enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL};
void orCrit(Crit crit, const string& value) { void orCrit(Crit crit, const std::string& value) {
crits.push_back(crit); crits.push_back(crit);
values.push_back(value); values.push_back(value);
} }
std::vector<Crit> crits; std::vector<Crit> crits;
std::vector<string> values; std::vector<std::string> values;
void reset() {crits.clear(); values.clear();} void reset() {crits.clear(); values.clear();}
bool isNotNull() const {return crits.size() != 0;} bool isNotNull() const {return crits.size() != 0;}
}; };
@ -73,7 +70,7 @@ class DocSeqFiltSpec {
*/ */
class DocSequence { class DocSequence {
public: public:
DocSequence(const string &t) : m_title(t) {} DocSequence(const std::string &t) : m_title(t) {}
virtual ~DocSequence() {} virtual ~DocSequence() {}
/** Get document at given rank. /** Get document at given rank.
@ -84,16 +81,17 @@ class DocSequence {
* inside history) * inside history)
* @return true if ok, false for error or end of data * @return true if ok, false for error or end of data
*/ */
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) = 0; virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0) = 0;
/** Get next page of documents. This accumulates entries into the result /** Get next page of documents. This accumulates entries into the result
* list parameter (doesn't reset it). */ * list parameter (doesn't reset it). */
virtual int getSeqSlice(int offs, int cnt, vector<ResListEntry>& result); virtual int getSeqSlice(int offs, int cnt,
std::vector<ResListEntry>& result);
/** Get abstract for document. This is special because it may take time. /** Get abstract for document. This is special because it may take time.
* The default is to return the input doc's abstract fields, but some * The default is to return the input doc's abstract fields, but some
* sequences can compute a better value (ie: docseqdb) */ * sequences can compute a better value (ie: docseqdb) */
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) { virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs) {
abs.push_back(doc.meta[Rcl::Doc::keyabs]); abs.push_back(doc.meta[Rcl::Doc::keyabs]);
return true; return true;
} }
@ -103,25 +101,21 @@ class DocSequence {
virtual int getResCnt() = 0; virtual int getResCnt() = 0;
/** Get title for result list */ /** Get title for result list */
virtual string title() {return m_title;} virtual std::string title() {return m_title;}
/** Get description for underlying query */ /** Get description for underlying query */
virtual string getDescription() = 0; virtual std::string getDescription() = 0;
/** Get search terms (for highlighting abstracts). Some sequences /** Get search terms (for highlighting abstracts). Some sequences
* may have no associated search terms. Implement this for them. */ * may have no associated search terms. Implement this for them. */
virtual bool getTerms(vector<string>& terms, virtual void getTerms(HighlightData& hld)
vector<vector<string> >& groups,
vector<int>& gslks)
{ {
terms.clear(); groups.clear(); gslks.clear(); return true; hld.clear();
} }
/** Get user-input terms (before stemming etc.) */ virtual std::list<std::string> expand(Rcl::Doc &)
virtual void getUTerms(vector<string>& terms)
{ {
terms.clear(); return std::list<std::string>();
} }
virtual list<string> expand(Rcl::Doc &) {return list<string>();}
/** Optional functionality. */ /** Optional functionality. */
virtual bool canFilter() {return false;} virtual bool canFilter() {return false;}
@ -130,16 +124,16 @@ class DocSequence {
virtual bool setSortSpec(const DocSeqSortSpec &) {return false;} virtual bool setSortSpec(const DocSeqSortSpec &) {return false;}
virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();} virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();}
static void set_translations(const string& sort, const string& filt) static void set_translations(const std::string& sort, const std::string& filt)
{ {
o_sort_trans = sort; o_sort_trans = sort;
o_filt_trans = filt; o_filt_trans = filt;
} }
protected: protected:
static string o_sort_trans; static std::string o_sort_trans;
static string o_filt_trans; static std::string o_filt_trans;
private: private:
string m_title; std::string m_title;
}; };
/** A modifier has a child sequence which does the real work and does /** A modifier has a child sequence which does the real work and does
@ -152,25 +146,23 @@ public:
{} {}
virtual ~DocSeqModifier() {} virtual ~DocSeqModifier() {}
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs)
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return false; return false;
return m_seq->getAbstract(doc, abs); return m_seq->getAbstract(doc, abs);
} }
virtual string getDescription() virtual std::string getDescription()
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return ""; return "";
return m_seq->getDescription(); return m_seq->getDescription();
} }
virtual bool getTerms(vector<string>& terms, virtual void getTerms(HighlightData& hld)
vector<vector<string> >& groups,
vector<int>& gslks)
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return false; return;
return m_seq->getTerms(terms, groups, gslks); m_seq->getTerms(hld);
} }
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
{ {
@ -178,13 +170,7 @@ public:
return false; return false;
return m_seq->getEnclosing(doc, pdoc); return m_seq->getEnclosing(doc, pdoc);
} }
virtual void getUTerms(vector<string>& terms) virtual std::string title() {return m_seq->title();}
{
if (m_seq.isNull())
return;
m_seq->getUTerms(terms);
}
virtual string title() {return m_seq->title();}
virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;} virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}
protected: protected:
@ -203,7 +189,7 @@ public:
virtual bool canSort() {return true;} virtual bool canSort() {return true;}
virtual bool setFiltSpec(const DocSeqFiltSpec &); virtual bool setFiltSpec(const DocSeqFiltSpec &);
virtual bool setSortSpec(const DocSeqSortSpec &); virtual bool setSortSpec(const DocSeqSortSpec &);
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0)
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return false; return false;
@ -215,7 +201,7 @@ public:
return 0; return 0;
return m_seq->getResCnt(); return m_seq->getResCnt();
} }
virtual string title(); virtual std::string title();
private: private:
bool buildStack(); bool buildStack();
void stripStack(); void stripStack();

View File

@ -39,16 +39,9 @@ DocSequenceDb::~DocSequenceDb()
{ {
} }
bool DocSequenceDb::getTerms(vector<string>& terms, void DocSequenceDb::getTerms(HighlightData& hld)
vector<vector<string> >& groups,
vector<int>& gslks)
{ {
return m_fsdata->getTerms(terms, groups, gslks); m_fsdata->getTerms(hld);
}
void DocSequenceDb::getUTerms(vector<string>& terms)
{
m_sdata->getUTerms(terms);
} }
string DocSequenceDb::getDescription() string DocSequenceDb::getDescription()
@ -180,5 +173,13 @@ bool DocSequenceDb::setQuery()
return true; return true;
m_rescnt = -1; m_rescnt = -1;
m_needSetQuery = !m_q->setQuery(m_fsdata); m_needSetQuery = !m_q->setQuery(m_fsdata);
if (0) {
HighlightData hld;
m_fsdata->getTerms(hld);
string str;
hld.toString(str);
fprintf(stderr, "DocSequenceDb::setQuery: terms: %s\n", str.c_str());
}
return !m_needSetQuery; return !m_needSetQuery;
} }

View File

@ -30,10 +30,7 @@ class DocSequenceDb : public DocSequence {
virtual ~DocSequenceDb(); virtual ~DocSequenceDb();
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0); virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
virtual int getResCnt(); virtual int getResCnt();
virtual bool getTerms(vector<string>& terms, virtual void getTerms(HighlightData& hld);
vector<vector<string> >& groups,
vector<int>& gslks);
virtual void getUTerms(vector<string>& terms);
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&); virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc); virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
virtual string getDescription(); virtual string getDescription();

View File

@ -24,12 +24,10 @@
#include <map> #include <map>
#include <algorithm> #include <algorithm>
#ifndef NO_NAMESPACES
using std::vector; using std::vector;
using std::list; using std::list;
using std::pair; using std::pair;
using std::set; using std::set;
#endif /* NO_NAMESPACES */
#include "rcldb.h" #include "rcldb.h"
#include "rclconfig.h" #include "rclconfig.h"
@ -51,7 +49,7 @@ static string vecStringToString(const vector<string>& t)
return sterms; return sterms;
} }
// Text splitter callback used to take note of the position of query terms // Text splitter used to take note of the position of query terms
// inside the result text. This is then used to insert highlight tags. // inside the result text. This is then used to insert highlight tags.
class TextSplitPTR : public TextSplit { class TextSplitPTR : public TextSplit {
public: public:
@ -59,23 +57,25 @@ class TextSplitPTR : public TextSplit {
// Out: begin and end byte positions of query terms/groups in text // Out: begin and end byte positions of query terms/groups in text
vector<pair<int, int> > tboffs; vector<pair<int, int> > tboffs;
TextSplitPTR(const vector<string>& its, TextSplitPTR(const HighlightData& hdata)
const vector<vector<string> >&groups, : m_wcount(0), m_hdata(hdata)
const vector<int>& slacks)
: m_wcount(0), m_groups(groups), m_slacks(slacks)
{ {
for (vector<string>::const_iterator it = its.begin(); // We separate single terms and groups and extract the group
it != its.end(); it++) { // terms for computing positions list before looking for group
m_terms.insert(*it); // matches
}
for (vector<vector<string> >::const_iterator vit = m_groups.begin(); for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
vit != m_groups.end(); vit++) { vit != hdata.groups.end(); vit++) {
for (vector<string>::const_iterator it = (*vit).begin(); if (vit->size() == 1) {
it != (*vit).end(); it++) { m_terms.insert(vit->front());
} else if (vit->size() > 1) {
for (vector<string>::const_iterator it = vit->begin();
it != vit->end(); it++) {
m_gterms.insert(*it); m_gterms.insert(*it);
} }
} }
} }
}
// Accept word and its position. If word is search term, add // Accept word and its position. If word is search term, add
// highlight zone definition. If word is part of search group // highlight zone definition. If word is part of search group
@ -116,16 +116,17 @@ class TextSplitPTR : public TextSplit {
private: private:
virtual bool matchGroup(const vector<string>& terms, int dist); virtual bool matchGroup(const vector<string>& terms, int dist);
// Word count. Used to call checkCancel from time to time.
int m_wcount; int m_wcount;
// In: user query terms // In: user query terms
set<string> m_terms; set<string> m_terms;
// In: user query groups, for near/phrase searches. // m_gterms holds all the terms in m_groups, as a set for quick lookup
const vector<vector<string> >& m_groups;
const vector<int>& m_slacks;
set<string> m_gterms; set<string> m_gterms;
const HighlightData& m_hdata;
// group/near terms word positions. // group/near terms word positions.
map<string, vector<int> > m_plists; map<string, vector<int> > m_plists;
map<int, pair<int, int> > m_gpostobytes; map<int, pair<int, int> > m_gpostobytes;
@ -294,10 +295,11 @@ public:
// handle all groups as NEAR (ignore order). // handle all groups as NEAR (ignore order).
bool TextSplitPTR::matchGroups() bool TextSplitPTR::matchGroups()
{ {
vector<vector<string> >::const_iterator vit = m_groups.begin(); for (unsigned int i = 0; i < m_hdata.groups.size(); i++) {
vector<int>::const_iterator sit = m_slacks.begin(); if (m_hdata.groups[i].size() <= 1)
for (; vit != m_groups.end() && sit != m_slacks.end(); vit++, sit++) { continue;
matchGroup(*vit, *sit + (*vit).size()); matchGroup(m_hdata.groups[i],
m_hdata.groups[i].size() + m_hdata.slacks[i]);
} }
// Sort regions by increasing start and decreasing width. // Sort regions by increasing start and decreasing width.
@ -317,39 +319,22 @@ bool TextSplitPTR::matchGroups()
// the input is html, the body is always a single output chunk. // the input is html, the body is always a single output chunk.
bool PlainToRich::plaintorich(const string& in, bool PlainToRich::plaintorich(const string& in,
list<string>& out, // Output chunk list list<string>& out, // Output chunk list
const HiliteData& hdata, const HighlightData& hdata,
int chunksize) int chunksize)
{ {
Chrono chron; Chrono chron;
const vector<string>& terms(hdata.terms);
const vector<vector<string> >& groups(hdata.groups);
const vector<int>& slacks(hdata.gslks);
if (0 && DebugLog::getdbl()->getlevel() >= DEBDEB0) {
string sterms = vecStringToString(terms);
LOGDEB0(("plaintorich: terms: %s\n", sterms.c_str()));
sterms.clear();
for (vector<vector<string> >::const_iterator vit = groups.begin();
vit != groups.end(); vit++) {
sterms += "GROUP: ";
sterms += vecStringToString(*vit);
sterms += "\n";
}
LOGDEB0(("plaintorich: groups:\n %s", sterms.c_str()));
LOGDEB2((" TEXT:[%s]\n", in.c_str()));
}
// Compute the positions for the query terms. We use the text // Compute the positions for the query terms. We use the text
// splitter to break the text into words, and compare the words to // splitter to break the text into words, and compare the words to
// the search terms, // the search terms,
TextSplitPTR splitter(terms, groups, slacks); TextSplitPTR splitter(hdata);
// Note: the splitter returns the term locations in byte, not // Note: the splitter returns the term locations in byte, not
// character, offsets. // character, offsets.
splitter.text_to_words(in); splitter.text_to_words(in);
LOGDEB2(("plaintorich: split done %d mS\n", chron.millis())); LOGDEB2(("plaintorich: split done %d mS\n", chron.millis()));
// Compute the positions for NEAR and PHRASE groups. // Compute the positions for NEAR and PHRASE groups.
splitter.matchGroups(); splitter.matchGroups();
LOGDEB2(("plaintorich: group match done %d mS\n", chron.millis()));
out.clear(); out.clear();
out.push_back(""); out.push_back("");

View File

@ -19,26 +19,8 @@
#include <string> #include <string>
#include <list> #include <list>
using std::list;
using std::string;
/// Holder for plaintorich() input data: words and groups of words to #include "hldata.h"
/// be highlighted
struct HiliteData {
// Single terms
vector<string> terms;
// NEAR and PHRASE elements
vector<vector<string> > groups;
// Group slacks (number of permitted non-matched words).
// Parallel vector to the above 'groups'
vector<int> gslks;
void reset()
{
terms.clear();
groups.clear();
gslks.clear();
}
};
/** /**
* A class for highlighting search results. Overridable methods allow * A class for highlighting search results. Overridable methods allow
@ -83,21 +65,21 @@ public:
* lowercase and unaccented. * lowercase and unaccented.
* @param chunksize max size of chunks in output list * @param chunksize max size of chunks in output list
*/ */
virtual bool plaintorich(const string &in, list<string> &out, virtual bool plaintorich(const std::string &in, std::list<std::string> &out,
const HiliteData& hdata, const HighlightData& hdata,
int chunksize = 50000 int chunksize = 50000
); );
/* Overridable output methods for headers, highlighting and marking tags */ /* Overridable output methods for headers, highlighting and marking tags */
virtual string header() {return snull;} virtual std::string header() {return snull;}
virtual string startMatch() {return snull;} virtual std::string startMatch() {return snull;}
virtual string endMatch() {return snull;} virtual std::string endMatch() {return snull;}
virtual string startAnchor(int) {return snull;} virtual std::string startAnchor(int) {return snull;}
virtual string endAnchor() {return snull;} virtual std::string endAnchor() {return snull;}
virtual string startChunk() {return snull;} virtual std::string startChunk() {return snull;}
protected: protected:
const string snull; const std::string snull;
bool m_inputhtml; bool m_inputhtml;
// Use <br> to break plain text lines (else caller has used a <pre> tag) // Use <br> to break plain text lines (else caller has used a <pre> tag)
bool m_eolbr; bool m_eolbr;

View File

@ -132,9 +132,8 @@ void ResListPager::resultPageFor(int docnum)
m_respage = npage; m_respage = npage;
} }
void ResListPager::displayDoc(RclConfig *config, void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
int i, Rcl::Doc& doc, const HiliteData& hdata, const HighlightData& hdata, const string& sh)
const string& sh)
{ {
ostringstream chunk; ostringstream chunk;
int percent; int percent;
@ -309,8 +308,9 @@ void ResListPager::displayPage(RclConfig *config)
if (pageEmpty()) { if (pageEmpty()) {
chunk << trans("<p><b>No results found</b><br>"); chunk << trans("<p><b>No results found</b><br>");
vector<string>uterms; HighlightData hldata;
m_docSource->getUTerms(uterms); m_docSource->getTerms(hldata);
vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
if (!uterms.empty()) { if (!uterms.empty()) {
map<string, vector<string> > spellings; map<string, vector<string> > spellings;
suggest(uterms, spellings); suggest(uterms, spellings);
@ -366,8 +366,8 @@ void ResListPager::displayPage(RclConfig *config)
if (pageEmpty()) if (pageEmpty())
return; return;
HiliteData hdata; HighlightData hdata;
m_docSource->getTerms(hdata.terms, hdata.groups, hdata.gslks); m_docSource->getTerms(hdata);
// Emit data for result entry paragraph. Do it in chunks that make sense // Emit data for result entry paragraph. Do it in chunks that make sense
// html-wise, else our client may get confused // html-wise, else our client may get confused

View File

@ -19,14 +19,13 @@
#define _reslistpager_h_included_ #define _reslistpager_h_included_
#include <vector> #include <vector>
using std::vector;
#include "refcntr.h" #include "refcntr.h"
#include "docseq.h" #include "docseq.h"
#include "hldata.h"
class RclConfig; class RclConfig;
class PlainToRich; class PlainToRich;
struct HiliteData;
/** /**
* Manage a paged HTML result list. * Manage a paged HTML result list.
@ -85,7 +84,7 @@ public:
void resultPageFor(int docnum); void resultPageFor(int docnum);
void displayPage(RclConfig *); void displayPage(RclConfig *);
void displayDoc(RclConfig *, int idx, Rcl::Doc& doc, void displayDoc(RclConfig *, int idx, Rcl::Doc& doc,
const HiliteData& hdata, const string& sh = ""); const HighlightData& hdata, const string& sh = "");
bool pageEmpty() {return m_respage.size() == 0;} bool pageEmpty() {return m_respage.size() == 0;}
string queryDescription() {return m_docSource.isNull() ? "" : string queryDescription() {return m_docSource.isNull() ? "" :
@ -112,8 +111,9 @@ public:
virtual string pageTop() {return string();} virtual string pageTop() {return string();}
virtual string headerContent() {return string();} virtual string headerContent() {return string();}
virtual string iconUrl(RclConfig *, Rcl::Doc& doc); virtual string iconUrl(RclConfig *, Rcl::Doc& doc);
virtual void suggest(const vector<string>, virtual void suggest(const std::vector<std::string>,
map<string, vector<string> >& sugg) { std::map<std::string, std::vector<std::string> >& sugg)
{
sugg.clear(); sugg.clear();
} }
virtual string absSep() {return "&hellip;";} virtual string absSep() {return "&hellip;";}
@ -126,7 +126,7 @@ private:
bool m_hasNext; bool m_hasNext;
PlainToRich *m_hiliter; PlainToRich *m_hiliter;
RefCntr<DocSequence> m_docSource; RefCntr<DocSequence> m_docSource;
vector<ResListEntry> m_respage; std::vector<ResListEntry> m_respage;
}; };
#endif /* _reslistpager_h_included_ */ #endif /* _reslistpager_h_included_ */

View File

@ -498,23 +498,12 @@ bool SearchData::fileNameOnly()
return true; return true;
} }
// Extract all terms and term groups // Extract all term data
bool SearchData::getTerms(vector<string>& terms, void SearchData::getTerms(HighlightData &hld) const
vector<vector<string> >& groups,
vector<int>& gslks) const
{ {
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++) for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
(*it)->getTerms(terms, groups, gslks); (*it)->getTerms(hld);
return true; return;
}
// Extract user terms
void SearchData::getUTerms(vector<string>& terms) const
{
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
(*it)->getUTerms(terms);
sort(terms.begin(), terms.end());
vector<string>::iterator it = unique(terms.begin(), terms.end());
terms.erase(it, terms.end());
} }
// Splitter callback for breaking a user string into simple terms and // Splitter callback for breaking a user string into simple terms and
@ -590,10 +579,10 @@ private:
// translating. // translating.
class StringToXapianQ { class StringToXapianQ {
public: public:
StringToXapianQ(Db& db, const string& field, StringToXapianQ(Db& db, HighlightData& hld, const string& field,
const string &stmlng, bool boostUser) const string &stmlng, bool boostUser)
: m_db(db), m_field(field), m_stemlang(stmlng), : m_db(db), m_field(field), m_stemlang(stmlng),
m_doBoostUserTerms(boostUser) m_doBoostUserTerms(boostUser), m_hld(hld)
{ } { }
bool processUserString(const string &iq, bool processUserString(const string &iq,
@ -601,20 +590,6 @@ public:
vector<Xapian::Query> &pqueries, vector<Xapian::Query> &pqueries,
const StopList &stops, const StopList &stops,
int slack = 0, bool useNear = false); int slack = 0, bool useNear = false);
// After processing the string: return search terms and term
// groups (ie: for highlighting)
bool getTerms(vector<string>& terms, vector<vector<string> >& groups)
{
terms.insert(terms.end(), m_terms.begin(), m_terms.end());
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
return true;
}
bool getUTerms(vector<string>& terms)
{
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
return true;
}
private: private:
void expandTerm(bool dont, const string& term, vector<string>& exp, void expandTerm(bool dont, const string& term, vector<string>& exp,
string& sterm, const string& prefix); string& sterm, const string& prefix);
@ -630,10 +605,7 @@ private:
const string& m_field; const string& m_field;
const string& m_stemlang; const string& m_stemlang;
bool m_doBoostUserTerms; bool m_doBoostUserTerms;
// Single terms and phrases resulting from breaking up text; HighlightData& m_hld;
vector<string> m_uterms;
vector<string> m_terms;
vector<vector<string> > m_groups;
}; };
#if 1 #if 1
@ -647,7 +619,7 @@ static void listVector(const string& what, const vector<string>&l)
} }
#endif #endif
/** Expand stem and wildcards /** Take simple term and expand stem and wildcards
* *
* @param nostemexp don't perform stem expansion. This is mainly used to * @param nostemexp don't perform stem expansion. This is mainly used to
* prevent stem expansion inside phrases (because the user probably * prevent stem expansion inside phrases (because the user probably
@ -680,9 +652,11 @@ void StringToXapianQ::expandTerm(bool nostemexp,
nostemexp = true; nostemexp = true;
} }
if (!haswild)
m_hld.uterms.insert(term);
if (nostemexp && !haswild) { if (nostemexp && !haswild) {
sterm = term; sterm = term;
m_uterms.push_back(sterm);
exp.resize(1); exp.resize(1);
exp[0] = prefix + term; exp[0] = prefix + term;
} else { } else {
@ -692,7 +666,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
m_field); m_field);
} else { } else {
sterm = term; sterm = term;
m_uterms.push_back(sterm);
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
m_field); m_field);
} }
@ -701,7 +674,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
exp.push_back(it->term); exp.push_back(it->term);
} }
} }
//listVector("ExpandTerm:uterms now: ", m_uterms);
} }
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d // Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@ -754,11 +726,14 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
expandTerm(nostemexp, span, exp, sterm, prefix); expandTerm(nostemexp, span, exp, sterm, prefix);
// m_terms is used for highlighting, we don't want prefixes in there. // Set up the highlight data. No prefix should go in there
for (vector<string>::const_iterator it = exp.begin(); for (vector<string>::const_iterator it = exp.begin();
it != exp.end(); it++) { it != exp.end(); it++) {
m_terms.push_back(it->substr(prefix.size())); m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
m_hld.slacks.push_back(0);
m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
} }
// Push either term or OR of stem-expanded set // Push either term or OR of stem-expanded set
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end()); Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
@ -786,7 +761,9 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
Xapian::Query::OP_PHRASE; Xapian::Query::OP_PHRASE;
vector<Xapian::Query> orqueries; vector<Xapian::Query> orqueries;
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
bool hadmultiple = false; bool hadmultiple = false;
#endif
vector<vector<string> >groups; vector<vector<string> >groups;
string prefix; string prefix;
@ -805,15 +782,19 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
for (vector<string>::iterator it = splitData->terms.begin(); for (vector<string>::iterator it = splitData->terms.begin();
it != splitData->terms.end(); it++, nxit++) { it != splitData->terms.end(); it++, nxit++) {
LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str())); LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
// Adjust when we do stem expansion. Not inside phrases, and // Adjust when we do stem expansion. Not if disabled by
// some versions of xapian will accept only one OR clause // caller, not inside phrases, and some versions of xapian
// inside NEAR, all others must be leafs. // will accept only one OR clause inside NEAR.
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) || hadmultiple; bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|| hadmultiple
#endif // single OR inside NEAR
;
string sterm; string sterm;
vector<string> exp; vector<string> exp;
expandTerm(nostemexp, *it, exp, sterm, prefix); expandTerm(nostemexp, *it, exp, sterm, prefix);
LOGDEB0(("ProcessPhrase: exp size %d\n", exp.size())); LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
listVector("", exp); listVector("", exp);
// groups is used for highlighting, we don't want prefixes in there. // groups is used for highlighting, we don't want prefixes in there.
vector<string> noprefs; vector<string> noprefs;
@ -850,7 +831,13 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
vector<vector<string> > allcombs; vector<vector<string> > allcombs;
vector<string> comb; vector<string> comb;
multiply_groups(groups.begin(), groups.end(), comb, allcombs); multiply_groups(groups.begin(), groups.end(), comb, allcombs);
m_groups.insert(m_groups.end(), allcombs.begin(), allcombs.end());
// Insert the search groups and slacks in the highlight data, with
// a reference to the user entry that generated them:
m_hld.groups.insert(m_hld.groups.end(), allcombs.begin(), allcombs.end());
m_hld.slacks.insert(m_hld.slacks.end(), allcombs.size(), slack);
m_hld.grpsugidx.insert(m_hld.grpsugidx.end(), allcombs.size(),
m_hld.ugroups.size() - 1);
} }
// Trim string beginning with ^ or ending with $ and convert to flags // Trim string beginning with ^ or ending with $ and convert to flags
@ -875,7 +862,16 @@ static int stringToMods(string& s)
* We just separate words and phrases, and do wildcard and stem expansion, * We just separate words and phrases, and do wildcard and stem expansion,
* *
* This is used to process data entered into an OR/AND/NEAR/PHRASE field of * This is used to process data entered into an OR/AND/NEAR/PHRASE field of
* the GUI. * the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
* entry).
*
* This appears awful, and it would seem that the split into
* terms/phrases should be performed in the upper layer so that we
* only receive pure term or near/phrase pure elements here, but in
* fact there are things that would appear like terms to naive code,
* and which will actually may be turned into phrases (ie: tom:jerry),
* in a manner which intimately depends on the index implementation,
* so that it makes sense to process this here.
* *
* The final list contains one query for each term or phrase * The final list contains one query for each term or phrase
* - Elements corresponding to a stem-expanded part are an OP_OR * - Elements corresponding to a stem-expanded part are an OP_OR
@ -895,9 +891,6 @@ bool StringToXapianQ::processUserString(const string &iq,
{ {
LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear)); LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear));
ermsg.erase(); ermsg.erase();
m_uterms.clear();
m_terms.clear();
m_groups.clear();
// Simple whitespace-split input into user-level words and // Simple whitespace-split input into user-level words and
// double-quoted phrases: word1 word2 "this is a phrase". // double-quoted phrases: word1 word2 "this is a phrase".
@ -952,10 +945,12 @@ bool StringToXapianQ::processUserString(const string &iq,
case 0: case 0:
continue;// ?? continue;// ??
case 1: case 1:
m_hld.ugroups.push_back(vector<string>(1, *it));
processSimpleSpan(splitter.terms.front(), processSimpleSpan(splitter.terms.front(),
splitter.nostemexps.front(), pqueries); splitter.nostemexps.front(), pqueries);
break; break;
default: default:
m_hld.ugroups.push_back(vector<string>(1, *it));
processPhraseOrNear(&splitter, pqueries, useNear, slack, mods); processPhraseOrNear(&splitter, pqueries, useNear, slack, mods);
} }
} }
@ -984,8 +979,6 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n", LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
stemlang.c_str())); stemlang.c_str()));
m_terms.clear();
m_groups.clear();
Xapian::Query *qp = (Xapian::Query *)p; Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query(); *qp = Xapian::Query();
@ -1007,16 +1000,14 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
(m_parentSearch && !m_parentSearch->haveWildCards()) || (m_parentSearch && !m_parentSearch->haveWildCards()) ||
(m_parentSearch == 0 && !m_haveWildCards); (m_parentSearch == 0 && !m_haveWildCards);
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList())) if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
return false; return false;
if (pqueries.empty()) { if (pqueries.empty()) {
LOGERR(("SearchDataClauseSimple: resolved to null query\n")); LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
return true; return true;
} }
tr.getTerms(m_terms, m_groups);
tr.getUTerms(m_uterms);
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end()); *qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
if (m_weight != 1.0) { if (m_weight != 1.0) {
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
@ -1056,8 +1047,6 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null: const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
stemlang; stemlang;
LOGDEB(("SearchDataClauseDist::toNativeQuery\n")); LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
m_terms.clear();
m_groups.clear();
Xapian::Query *qp = (Xapian::Query *)p; Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query(); *qp = Xapian::Query();
@ -1080,7 +1069,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
} }
string s = cstr_dquote + m_text + cstr_dquote; string s = cstr_dquote + m_text + cstr_dquote;
bool useNear = (m_tp == SCLT_NEAR); bool useNear = (m_tp == SCLT_NEAR);
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(), if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(),
m_slack, useNear)) m_slack, useNear))
return false; return false;
@ -1088,8 +1077,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
LOGERR(("SearchDataClauseDist: resolved to null query\n")); LOGERR(("SearchDataClauseDist: resolved to null query\n"));
return true; return true;
} }
tr.getTerms(m_terms, m_groups);
tr.getUTerms(m_uterms);
*qp = *pqueries.begin(); *qp = *pqueries.begin();
if (m_weight != 1.0) { if (m_weight != 1.0) {
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
@ -1097,21 +1085,4 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
return true; return true;
} }
// Translate subquery
bool SearchDataClauseSub::toNativeQuery(Rcl::Db &db, void *p, const string&)
{
return m_sub->toNativeQuery(db, p);
}
bool SearchDataClauseSub::getTerms(vector<string>& terms,
vector<vector<string> >& groups,
vector<int>& gslks) const
{
return m_sub.getconstptr()->getTerms(terms, groups, gslks);
}
void SearchDataClauseSub::getUTerms(vector<string>& terms) const
{
m_sub.getconstptr()->getUTerms(terms);
}
} // Namespace Rcl } // Namespace Rcl

View File

@ -31,14 +31,11 @@
#include "refcntr.h" #include "refcntr.h"
#include "smallut.h" #include "smallut.h"
#include "cstr.h" #include "cstr.h"
#include "hldata.h"
class RclConfig; class RclConfig;
#ifndef NO_NAMESPACES
using std::vector;
using std::string;
namespace Rcl { namespace Rcl {
#endif // NO_NAMESPACES
/** Search clause types */ /** Search clause types */
enum SClType { enum SClType {
@ -70,7 +67,7 @@ class SearchDataClause;
A phrase clause could be added either explicitly or using double quotes: A phrase clause could be added either explicitly or using double quotes:
{SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]} {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
*/ */
class SearchData { class SearchData {
public: public:
SearchData(SClType tp) SearchData(SClType tp)
@ -108,7 +105,7 @@ public:
bool maybeAddAutoPhrase(Rcl::Db &db, double threshold); bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
/** Set/get top subdirectory for filtering results */ /** Set/get top subdirectory for filtering results */
void setTopdir(const string& t, bool excl = false, float w = 1.0) void setTopdir(const std::string& t, bool excl = false, float w = 1.0)
{ {
m_topdir = t; m_topdir = t;
m_topdirexcl = excl; m_topdirexcl = excl;
@ -122,38 +119,37 @@ public:
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;} void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
/** Add file type for filtering results */ /** Add file type for filtering results */
void addFiletype(const string& ft) {m_filetypes.push_back(ft);} void addFiletype(const std::string& ft) {m_filetypes.push_back(ft);}
/** Add file type to not wanted list */ /** Add file type to not wanted list */
void remFiletype(const string& ft) {m_nfiletypes.push_back(ft);} void remFiletype(const std::string& ft) {m_nfiletypes.push_back(ft);}
void setStemlang(const string& lang = "english") {m_stemlang = lang;} void setStemlang(const std::string& lang = "english") {m_stemlang = lang;}
/** Retrieve error description */ /** Retrieve error description */
string getReason() {return m_reason;} std::string getReason() {return m_reason;}
/** Get terms and phrase/near groups. Used in the GUI for highlighting /** Return term expansion data. Mostly used by caller for highlighting
* The groups and gslks vectors are parallel and hold the phrases/near
* string groups and their associated slacks (distance in excess of group
* size)
*/ */
bool getTerms(vector<string>& terms, void getTerms(HighlightData& hldata) const;
vector<vector<string> >& groups, vector<int>& gslks) const;
/** Get user-input terms (before expansion etc.) */
void getUTerms(vector<string>& terms) const;
/** /**
* Get/set the description field which is retrieved from xapian after * Get/set the description field which is retrieved from xapian after
* initializing the query. It is stored here for usage in the GUI. * initializing the query. It is stored here for usage in the GUI.
*/ */
string getDescription() {return m_description;} std::string getDescription() {return m_description;}
void setDescription(const string& d) {m_description = d;} void setDescription(const std::string& d) {m_description = d;}
private: private:
SClType m_tp; // Only SCLT_AND or SCLT_OR here // Combine type. Only SCLT_AND or SCLT_OR here
vector<SearchDataClause*> m_query; SClType m_tp;
vector<string> m_filetypes; // Restrict to filetypes if set. // Complex query descriptor
vector<string> m_nfiletypes; // Unwanted file types std::vector<SearchDataClause*> m_query;
string m_topdir; // Restrict to subtree. // Restricted set of filetypes if not empty.
std::vector<std::string> m_filetypes;
// Excluded set of file types if not empty
std::vector<std::string> m_nfiletypes;
// Restrict to subtree.
std::string m_topdir;
bool m_topdirexcl; // Invert meaning bool m_topdirexcl; // Invert meaning
float m_topdirweight; // affect weight instead of filter float m_topdirweight; // affect weight instead of filter
bool m_haveDates; bool m_haveDates;
@ -162,11 +158,11 @@ private:
size_t m_minSize; size_t m_minSize;
// Printable expanded version of the complete query, retrieved/set // Printable expanded version of the complete query, retrieved/set
// from rcldb after the Xapian::setQuery() call // from rcldb after the Xapian::setQuery() call
string m_description; std::string m_description;
string m_reason; std::string m_reason;
bool m_haveWildCards; bool m_haveWildCards;
string m_stemlang; std::string m_stemlang;
bool expandFileTypes(RclConfig *cfg, vector<string>& exptps); bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
/* Copyconst and assignment private and forbidden */ /* Copyconst and assignment private and forbidden */
SearchData(const SearchData &) {} SearchData(const SearchData &) {}
SearchData& operator=(const SearchData&) {return *this;}; SearchData& operator=(const SearchData&) {return *this;};
@ -182,35 +178,52 @@ public:
m_modifiers(SDCM_NONE), m_weight(1.0) m_modifiers(SDCM_NONE), m_weight(1.0)
{} {}
virtual ~SearchDataClause() {} virtual ~SearchDataClause() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0; virtual bool toNativeQuery(Rcl::Db &db, void *, const std::string&) = 0;
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;} bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
virtual string getReason() const {return m_reason;} virtual std::string getReason() const {return m_reason;}
virtual bool getTerms(vector<string>&, vector<vector<string> >&, virtual void getTerms(HighlightData & hldata) const = 0;
vector<int>&) const = 0;
virtual void getUTerms(vector<string>&) const = 0;
SClType getTp() {return m_tp;} SClType getTp()
void setParent(SearchData *p) {m_parentSearch = p;} {
virtual void setModifiers(Modifier mod) {m_modifiers = mod;} return m_tp;
virtual int getModifiers() {return m_modifiers;} }
virtual void addModifier(Modifier mod) { void setParent(SearchData *p)
{
m_parentSearch = p;
}
virtual void setModifiers(Modifier mod)
{
m_modifiers = mod;
}
virtual int getModifiers()
{
return m_modifiers;
}
virtual void addModifier(Modifier mod)
{
int imod = getModifiers(); int imod = getModifiers();
imod |= mod; imod |= mod;
setModifiers(Modifier(imod)); setModifiers(Modifier(imod));
} }
virtual void setWeight(float w) {m_weight = w;} virtual void setWeight(float w)
{
m_weight = w;
}
friend class SearchData; friend class SearchData;
protected: protected:
string m_reason; std::string m_reason;
SClType m_tp; SClType m_tp;
SearchData *m_parentSearch; SearchData *m_parentSearch;
bool m_haveWildCards; bool m_haveWildCards;
Modifier m_modifiers; Modifier m_modifiers;
float m_weight; float m_weight;
private: private:
SearchDataClause(const SearchDataClause&) {} SearchDataClause(const SearchDataClause&)
SearchDataClause& operator=(const SearchDataClause&) { {
}
SearchDataClause& operator=(const SearchDataClause&)
{
return *this; return *this;
} }
}; };
@ -221,45 +234,37 @@ private:
*/ */
class SearchDataClauseSimple : public SearchDataClause { class SearchDataClauseSimple : public SearchDataClause {
public: public:
SearchDataClauseSimple(SClType tp, const string& txt, SearchDataClauseSimple(SClType tp, const std::string& txt,
const string& fld = string()) const std::string& fld = std::string())
: SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) { : SearchDataClause(tp), m_text(txt), m_field(fld)
m_haveWildCards = (txt.find_first_of(cstr_minwilds) != string::npos); {
m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos);
} }
virtual ~SearchDataClauseSimple() {} virtual ~SearchDataClauseSimple()
{
}
/** Translate to Xapian query */ /** Translate to Xapian query */
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
/** Retrieve query terms and term groups. This is used for highlighting */ virtual void getTerms(HighlightData& hldata) const
virtual bool getTerms(vector<string>& terms, /* Single terms */
vector<vector<string> >& groups, /* Prox grps */
vector<int>& gslks) const /* Prox slacks */
{ {
terms.insert(terms.end(), m_terms.begin(), m_terms.end()); hldata.append(m_hldata);
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
gslks.insert(gslks.end(), m_groups.size(), m_slack);
return true;
} }
virtual void getUTerms(vector<string>& terms) const virtual const std::string& gettext()
{ {
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end()); return m_text;
}
virtual const std::string& getfield()
{
return m_field;
} }
virtual const string& gettext() {return m_text;}
virtual const string& getfield() {return m_field;}
protected: protected:
string m_text; // Raw user entry text. std::string m_text; // Raw user entry text.
string m_field; // Field specification if any std::string m_field; // Field specification if any
// Single terms and phrases resulting from breaking up m_text; HighlightData m_hldata;
// valid after toNativeQuery() call
vector<string> m_terms;
vector<vector<string> > m_groups;
// User terms before expansion
vector<string> m_uterms;
// Declare m_slack here. Always 0, but allows getTerms to work for
// SearchDataClauseDist
int m_slack;
}; };
/** /**
@ -272,29 +277,39 @@ protected:
*/ */
class SearchDataClauseFilename : public SearchDataClauseSimple { class SearchDataClauseFilename : public SearchDataClauseSimple {
public: public:
SearchDataClauseFilename(const string& txt) SearchDataClauseFilename(const std::string& txt)
: SearchDataClauseSimple(SCLT_FILENAME, txt) { : SearchDataClauseSimple(SCLT_FILENAME, txt)
{
// File name searches don't count when looking for wild cards. // File name searches don't count when looking for wild cards.
m_haveWildCards = false; m_haveWildCards = false;
} }
virtual ~SearchDataClauseFilename() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); virtual ~SearchDataClauseFilename()
{
}
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
}; };
/** /**
* A clause coming from a NEAR or PHRASE entry field. There is only one * A clause coming from a NEAR or PHRASE entry field. There is only one
* string group, and a specified distance, which applies to it. * std::string group, and a specified distance, which applies to it.
*/ */
class SearchDataClauseDist : public SearchDataClauseSimple { class SearchDataClauseDist : public SearchDataClauseSimple {
public: public:
SearchDataClauseDist(SClType tp, const string& txt, int slack, SearchDataClauseDist(SClType tp, const std::string& txt, int slack,
const string& fld = string()) const std::string& fld = std::string())
: SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;} : SearchDataClauseSimple(tp, txt, fld), m_slack(slack)
virtual ~SearchDataClauseDist() {} {
}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); virtual ~SearchDataClauseDist()
{
}
// m_slack is declared in SearchDataClauseSimple virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
private:
int m_slack;
}; };
/** Subquery */ /** Subquery */
@ -302,15 +317,28 @@ class SearchDataClauseSub : public SearchDataClause {
public: public:
// We take charge of the SearchData * and will delete it. // We take charge of the SearchData * and will delete it.
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub) SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
: SearchDataClause(tp), m_sub(sub) {} : SearchDataClause(tp), m_sub(sub)
virtual ~SearchDataClauseSub() {} {
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); }
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
vector<int>&) const; virtual ~SearchDataClauseSub()
virtual void getUTerms(vector<string>&) const; {
}
virtual bool toNativeQuery(Rcl::Db &db, void *p, const std::string&)
{
return m_sub->toNativeQuery(db, p);
}
virtual void getTerms(HighlightData& hldata) const
{
m_sub.getconstptr()->getTerms(hldata);
}
protected: protected:
RefCntr<SearchData> m_sub; RefCntr<SearchData> m_sub;
}; };
} // Namespace Rcl } // Namespace Rcl
#endif /* _SEARCHDATA_H_INCLUDED_ */ #endif /* _SEARCHDATA_H_INCLUDED_ */

54
src/utils/hldata.h Normal file
View File

@ -0,0 +1,54 @@
#ifndef _hldata_h_included_
#define _hldata_h_included_
#include <vector>
#include <string>
#include <set>
/** Store about user terms and their expansions. This is used mostly for
* highlighting result text and walking the matches.
*/
struct HighlightData {
/** The user terms, excluding those with wildcards.
* This list is intended for orthographic suggestions but the terms are
* unaccented lowercased anyway because they are compared to the dictionary
* generated from the index term list (which is unaccented).
*/
std::set<std::string> uterms;
/** The original user terms-or-groups. This is for displaying the matched
* terms or groups, ie in relation with highlighting or skipping to the
* next match. These are raw, diacritics and case preserved.
*/
std::vector<std::vector<std::string> > ugroups;
/** Processed/expanded terms and groups. Used for looking for
* regions to highlight. Terms are just groups with 1 entry. All
* terms in there are unaccented, and the list may include values
* expanded from the original terms by stem or wildcard expansion.
*/
std::vector<std::vector<std::string> > groups;
/** Group slacks. Parallel to groups */
std::vector<int> slacks;
/** Index into ugroups for each group. Parallel to groups. As a
* user term or group may generate many processed/expanded terms
* or groups, this is how we relate them
*/
std::vector<unsigned int> grpsugidx;
void clear()
{
uterms.clear();
ugroups.clear();
groups.clear();
slacks.clear();
grpsugidx.clear();
}
void append(const HighlightData&);
// Print (debug)
void toString(std::string& out);
};
#endif /* _hldata_h_included_ */

View File

@ -36,6 +36,7 @@
#include "smallut.h" #include "smallut.h"
#include "utf8iter.h" #include "utf8iter.h"
#include "hldata.h"
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
using namespace std; using namespace std;
@ -1038,8 +1039,57 @@ void catstrerror(string *reason, const char *what, int _errno)
#endif #endif
} }
void HighlightData::toString(std::string& out)
{
out.append("\nUser terms (orthograph): ");
for (std::set<std::string>::const_iterator it = uterms.begin();
it != uterms.end(); it++) {
out.append(" [").append(*it).append("]");
}
#else out.append("\nGroups: ");
char cbuf[200];
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
out.append(cbuf);
unsigned int ugidx = (unsigned int)-1;
for (unsigned int i = 0; i < groups.size(); i++) {
if (ugidx != grpsugidx[i]) {
ugidx = grpsugidx[i];
out.append("\n(");
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
out.append("[").append(ugroups[ugidx][j]).append("] ");
}
out.append(") ->");
}
out.append(" {");
for (unsigned int j = 0; j < groups[i].size(); j++) {
out.append("[").append(groups[i][j]).append("]");
}
sprintf(cbuf, "%d", slacks[i]);
out.append("}").append(cbuf);
}
out.append("\n");
fprintf(stderr, "toString ok\n");
}
void HighlightData::append(const HighlightData& hl)
{
uterms.insert(hl.uterms.begin(), hl.uterms.end());
size_t ugsz0 = ugroups.size();
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
for (std::vector<unsigned int>::const_iterator it = hl.grpsugidx.begin();
it != hl.grpsugidx.end(); it++) {
grpsugidx.push_back(*it + ugsz0);
}
}
#else // TEST_SMALLUT
#include <string> #include <string>
using namespace std; using namespace std;