defined data structure to pass around the search term description used for highlighting and other

This commit is contained in:
"Jean-Francois Dockes ext:(%22) 2012-08-17 10:45:00 +02:00
parent ebdd6faaf5
commit dc7b3420a0
16 changed files with 409 additions and 351 deletions

View File

@ -686,12 +686,12 @@ class LoadThread : public QThread {
/* A thread to convert to rich text (mark search terms) */ /* A thread to convert to rich text (mark search terms) */
class ToRichThread : public QThread { class ToRichThread : public QThread {
string ∈ string ∈
const HiliteData &hdata; const HighlightData &hdata;
list<string> &out; list<string> &out;
int loglevel; int loglevel;
PlainToRichQtPreview *ptr; PlainToRichQtPreview *ptr;
public: public:
ToRichThread(string &i, const HiliteData& hd, list<string> &o, ToRichThread(string &i, const HighlightData& hd, list<string> &o,
PlainToRichQtPreview *_ptr) PlainToRichQtPreview *_ptr)
: in(i), hdata(hd), out(o), ptr(_ptr) : in(i), hdata(hd), out(o), ptr(_ptr)
{ {

View File

@ -91,7 +91,7 @@ class Preview : public QWidget {
public: public:
Preview(int sid, // Search Id Preview(int sid, // Search Id
const HiliteData& hdata) // Search terms etc. for highlighting const HighlightData& hdata) // Search terms etc. for highlighting
: QWidget(0), m_searchId(sid), m_hData(hdata) : QWidget(0), m_searchId(sid), m_hData(hdata)
{ {
init(); init();
@ -141,7 +141,7 @@ private:
bool m_canBeep; bool m_canBeep;
bool m_loading; bool m_loading;
QWidget *m_currentW; QWidget *m_currentW;
HiliteData m_hData; HighlightData m_hData;
bool m_justCreated; // First tab create is different bool m_justCreated; // First tab create is different
bool m_haveAnchors; // Search terms are marked in text bool m_haveAnchors; // Search terms are marked in text
int m_lastAnchor; // Number of last anchor. Then rewind to 1 int m_lastAnchor; // Number of last anchor. Then rewind to 1

View File

@ -1220,8 +1220,8 @@ void RclMain::startPreview(int docnum, Rcl::Doc doc, int mod)
curPreview = 0; curPreview = 0;
} }
if (curPreview == 0) { if (curPreview == 0) {
HiliteData hdata; HighlightData hdata;
m_source->getTerms(hdata.terms, hdata.groups, hdata.gslks); m_source->getTerms(hdata);
curPreview = new Preview(reslist->listId(), hdata); curPreview = new Preview(reslist->listId(), hdata);
if (curPreview == 0) { if (curPreview == 0) {
@ -1284,7 +1284,7 @@ void RclMain::updateIdxForDocs(vector<Rcl::Doc>& docs)
*/ */
void RclMain::startPreview(Rcl::Doc doc) void RclMain::startPreview(Rcl::Doc doc)
{ {
Preview *preview = new Preview(0, HiliteData()); Preview *preview = new Preview(0, HighlightData());
if (preview == 0) { if (preview == 0) {
QMessageBox::warning(0, tr("Warning"), QMessageBox::warning(0, tr("Warning"),
tr("Can't create preview window"), tr("Can't create preview window"),

View File

@ -299,8 +299,8 @@ void RecollModel::setDocSource(RefCntr<DocSequence> nsource)
m_source = RefCntr<DocSequence>(); m_source = RefCntr<DocSequence>();
} else { } else {
m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource)); m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource));
m_hdata.reset(); m_hdata.clear();
m_source->getTerms(m_hdata.terms, m_hdata.groups, m_hdata.gslks); m_source->getTerms(m_hdata);
} }
} }

View File

@ -19,6 +19,10 @@
#include <Qt> #include <Qt>
#include <string>
#include <map>
#include <vector>
#include "ui_restable.h" #include "ui_restable.h"
#include "refcntr.h" #include "refcntr.h"
#include "docseq.h" #include "docseq.h"
@ -26,7 +30,7 @@
class ResTable; class ResTable;
typedef string (FieldGetter)(const string& fldname, const Rcl::Doc& doc); typedef std::string (FieldGetter)(const std::string& fldname, const Rcl::Doc& doc);
class RecollModel : public QAbstractTableModel { class RecollModel : public QAbstractTableModel {
@ -49,15 +53,15 @@ public:
virtual void setDocSource(RefCntr<DocSequence> nsource); virtual void setDocSource(RefCntr<DocSequence> nsource);
virtual RefCntr<DocSequence> getDocSource() {return m_source;} virtual RefCntr<DocSequence> getDocSource() {return m_source;}
virtual void deleteColumn(int); virtual void deleteColumn(int);
virtual const vector<string>& getFields() {return m_fields;} virtual const std::vector<std::string>& getFields() {return m_fields;}
virtual const map<string, QString>& getAllFields() virtual const std::map<std::string, QString>& getAllFields()
{ {
return o_displayableFields; return o_displayableFields;
} }
virtual void addColumn(int, const string&); virtual void addColumn(int, const std::string&);
// Some column name are aliases/translator for base document field // Some column name are aliases/translator for base document field
// (ie: date, datetime->mtime). Help deal with this: // (ie: date, datetime->mtime). Help deal with this:
virtual string baseField(const string&); virtual std::string baseField(const std::string&);
// Ignore sort() call because // Ignore sort() call because
virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;} virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;}
@ -69,12 +73,12 @@ signals:
private: private:
mutable RefCntr<DocSequence> m_source; mutable RefCntr<DocSequence> m_source;
vector<string> m_fields; std::vector<std::string> m_fields;
vector<FieldGetter*> m_getters; std::vector<FieldGetter*> m_getters;
static map<string, QString> o_displayableFields; static std::map<std::string, QString> o_displayableFields;
bool m_ignoreSort; bool m_ignoreSort;
FieldGetter* chooseGetter(const string&); FieldGetter* chooseGetter(const std::string&);
HiliteData m_hdata; HighlightData m_hdata;
}; };
class ResTable; class ResTable;

View File

@ -19,19 +19,16 @@
#include <string> #include <string>
#include <list> #include <list>
#include <vector> #include <vector>
#ifndef NO_NAMESPACES
using std::string;
using std::list;
using std::vector;
#endif
#include "rcldoc.h" #include "rcldoc.h"
#include "refcntr.h" #include "refcntr.h"
#include "hldata.h"
// A result list entry. // A result list entry.
struct ResListEntry { struct ResListEntry {
Rcl::Doc doc; Rcl::Doc doc;
string subHeader; std::string subHeader;
}; };
/** Sort specification. */ /** Sort specification. */
@ -40,7 +37,7 @@ class DocSeqSortSpec {
DocSeqSortSpec() : desc(false) {} DocSeqSortSpec() : desc(false) {}
bool isNotNull() const {return !field.empty();} bool isNotNull() const {return !field.empty();}
void reset() {field.erase();} void reset() {field.erase();}
string field; std::string field;
bool desc; bool desc;
}; };
@ -50,12 +47,12 @@ class DocSeqFiltSpec {
public: public:
DocSeqFiltSpec() {} DocSeqFiltSpec() {}
enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL}; enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL};
void orCrit(Crit crit, const string& value) { void orCrit(Crit crit, const std::string& value) {
crits.push_back(crit); crits.push_back(crit);
values.push_back(value); values.push_back(value);
} }
std::vector<Crit> crits; std::vector<Crit> crits;
std::vector<string> values; std::vector<std::string> values;
void reset() {crits.clear(); values.clear();} void reset() {crits.clear(); values.clear();}
bool isNotNull() const {return crits.size() != 0;} bool isNotNull() const {return crits.size() != 0;}
}; };
@ -73,7 +70,7 @@ class DocSeqFiltSpec {
*/ */
class DocSequence { class DocSequence {
public: public:
DocSequence(const string &t) : m_title(t) {} DocSequence(const std::string &t) : m_title(t) {}
virtual ~DocSequence() {} virtual ~DocSequence() {}
/** Get document at given rank. /** Get document at given rank.
@ -84,16 +81,17 @@ class DocSequence {
* inside history) * inside history)
* @return true if ok, false for error or end of data * @return true if ok, false for error or end of data
*/ */
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) = 0; virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0) = 0;
/** Get next page of documents. This accumulates entries into the result /** Get next page of documents. This accumulates entries into the result
* list parameter (doesn't reset it). */ * list parameter (doesn't reset it). */
virtual int getSeqSlice(int offs, int cnt, vector<ResListEntry>& result); virtual int getSeqSlice(int offs, int cnt,
std::vector<ResListEntry>& result);
/** Get abstract for document. This is special because it may take time. /** Get abstract for document. This is special because it may take time.
* The default is to return the input doc's abstract fields, but some * The default is to return the input doc's abstract fields, but some
* sequences can compute a better value (ie: docseqdb) */ * sequences can compute a better value (ie: docseqdb) */
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) { virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs) {
abs.push_back(doc.meta[Rcl::Doc::keyabs]); abs.push_back(doc.meta[Rcl::Doc::keyabs]);
return true; return true;
} }
@ -103,25 +101,21 @@ class DocSequence {
virtual int getResCnt() = 0; virtual int getResCnt() = 0;
/** Get title for result list */ /** Get title for result list */
virtual string title() {return m_title;} virtual std::string title() {return m_title;}
/** Get description for underlying query */ /** Get description for underlying query */
virtual string getDescription() = 0; virtual std::string getDescription() = 0;
/** Get search terms (for highlighting abstracts). Some sequences /** Get search terms (for highlighting abstracts). Some sequences
* may have no associated search terms. Implement this for them. */ * may have no associated search terms. Implement this for them. */
virtual bool getTerms(vector<string>& terms, virtual void getTerms(HighlightData& hld)
vector<vector<string> >& groups,
vector<int>& gslks)
{ {
terms.clear(); groups.clear(); gslks.clear(); return true; hld.clear();
} }
/** Get user-input terms (before stemming etc.) */ virtual std::list<std::string> expand(Rcl::Doc &)
virtual void getUTerms(vector<string>& terms)
{ {
terms.clear(); return std::list<std::string>();
} }
virtual list<string> expand(Rcl::Doc &) {return list<string>();}
/** Optional functionality. */ /** Optional functionality. */
virtual bool canFilter() {return false;} virtual bool canFilter() {return false;}
@ -130,16 +124,16 @@ class DocSequence {
virtual bool setSortSpec(const DocSeqSortSpec &) {return false;} virtual bool setSortSpec(const DocSeqSortSpec &) {return false;}
virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();} virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();}
static void set_translations(const string& sort, const string& filt) static void set_translations(const std::string& sort, const std::string& filt)
{ {
o_sort_trans = sort; o_sort_trans = sort;
o_filt_trans = filt; o_filt_trans = filt;
} }
protected: protected:
static string o_sort_trans; static std::string o_sort_trans;
static string o_filt_trans; static std::string o_filt_trans;
private: private:
string m_title; std::string m_title;
}; };
/** A modifier has a child sequence which does the real work and does /** A modifier has a child sequence which does the real work and does
@ -152,25 +146,23 @@ public:
{} {}
virtual ~DocSeqModifier() {} virtual ~DocSeqModifier() {}
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs)
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return false; return false;
return m_seq->getAbstract(doc, abs); return m_seq->getAbstract(doc, abs);
} }
virtual string getDescription() virtual std::string getDescription()
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return ""; return "";
return m_seq->getDescription(); return m_seq->getDescription();
} }
virtual bool getTerms(vector<string>& terms, virtual void getTerms(HighlightData& hld)
vector<vector<string> >& groups,
vector<int>& gslks)
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return false; return;
return m_seq->getTerms(terms, groups, gslks); m_seq->getTerms(hld);
} }
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc) virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
{ {
@ -178,13 +170,7 @@ public:
return false; return false;
return m_seq->getEnclosing(doc, pdoc); return m_seq->getEnclosing(doc, pdoc);
} }
virtual void getUTerms(vector<string>& terms) virtual std::string title() {return m_seq->title();}
{
if (m_seq.isNull())
return;
m_seq->getUTerms(terms);
}
virtual string title() {return m_seq->title();}
virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;} virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}
protected: protected:
@ -203,7 +189,7 @@ public:
virtual bool canSort() {return true;} virtual bool canSort() {return true;}
virtual bool setFiltSpec(const DocSeqFiltSpec &); virtual bool setFiltSpec(const DocSeqFiltSpec &);
virtual bool setSortSpec(const DocSeqSortSpec &); virtual bool setSortSpec(const DocSeqSortSpec &);
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0)
{ {
if (m_seq.isNull()) if (m_seq.isNull())
return false; return false;
@ -215,7 +201,7 @@ public:
return 0; return 0;
return m_seq->getResCnt(); return m_seq->getResCnt();
} }
virtual string title(); virtual std::string title();
private: private:
bool buildStack(); bool buildStack();
void stripStack(); void stripStack();

View File

@ -39,16 +39,9 @@ DocSequenceDb::~DocSequenceDb()
{ {
} }
bool DocSequenceDb::getTerms(vector<string>& terms, void DocSequenceDb::getTerms(HighlightData& hld)
vector<vector<string> >& groups,
vector<int>& gslks)
{ {
return m_fsdata->getTerms(terms, groups, gslks); m_fsdata->getTerms(hld);
}
void DocSequenceDb::getUTerms(vector<string>& terms)
{
m_sdata->getUTerms(terms);
} }
string DocSequenceDb::getDescription() string DocSequenceDb::getDescription()
@ -180,5 +173,13 @@ bool DocSequenceDb::setQuery()
return true; return true;
m_rescnt = -1; m_rescnt = -1;
m_needSetQuery = !m_q->setQuery(m_fsdata); m_needSetQuery = !m_q->setQuery(m_fsdata);
if (0) {
HighlightData hld;
m_fsdata->getTerms(hld);
string str;
hld.toString(str);
fprintf(stderr, "DocSequenceDb::setQuery: terms: %s\n", str.c_str());
}
return !m_needSetQuery; return !m_needSetQuery;
} }

View File

@ -30,10 +30,7 @@ class DocSequenceDb : public DocSequence {
virtual ~DocSequenceDb(); virtual ~DocSequenceDb();
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0); virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
virtual int getResCnt(); virtual int getResCnt();
virtual bool getTerms(vector<string>& terms, virtual void getTerms(HighlightData& hld);
vector<vector<string> >& groups,
vector<int>& gslks);
virtual void getUTerms(vector<string>& terms);
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&); virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc); virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
virtual string getDescription(); virtual string getDescription();

View File

@ -24,12 +24,10 @@
#include <map> #include <map>
#include <algorithm> #include <algorithm>
#ifndef NO_NAMESPACES
using std::vector; using std::vector;
using std::list; using std::list;
using std::pair; using std::pair;
using std::set; using std::set;
#endif /* NO_NAMESPACES */
#include "rcldb.h" #include "rcldb.h"
#include "rclconfig.h" #include "rclconfig.h"
@ -51,28 +49,30 @@ static string vecStringToString(const vector<string>& t)
return sterms; return sterms;
} }
// Text splitter callback used to take note of the position of query terms // Text splitter used to take note of the position of query terms
// inside the result text. This is then used to insert highlight tags. // inside the result text. This is then used to insert highlight tags.
class TextSplitPTR : public TextSplit { class TextSplitPTR : public TextSplit {
public: public:
// Out: begin and end byte positions of query terms/groups in text // Out: begin and end byte positions of query terms/groups in text
vector<pair<int, int> > tboffs; vector<pair<int, int> > tboffs;
TextSplitPTR(const vector<string>& its, TextSplitPTR(const HighlightData& hdata)
const vector<vector<string> >&groups, : m_wcount(0), m_hdata(hdata)
const vector<int>& slacks)
: m_wcount(0), m_groups(groups), m_slacks(slacks)
{ {
for (vector<string>::const_iterator it = its.begin(); // We separate single terms and groups and extract the group
it != its.end(); it++) { // terms for computing positions list before looking for group
m_terms.insert(*it); // matches
}
for (vector<vector<string> >::const_iterator vit = m_groups.begin(); for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
vit != m_groups.end(); vit++) { vit != hdata.groups.end(); vit++) {
for (vector<string>::const_iterator it = (*vit).begin(); if (vit->size() == 1) {
it != (*vit).end(); it++) { m_terms.insert(vit->front());
m_gterms.insert(*it); } else if (vit->size() > 1) {
for (vector<string>::const_iterator it = vit->begin();
it != vit->end(); it++) {
m_gterms.insert(*it);
}
} }
} }
} }
@ -116,15 +116,16 @@ class TextSplitPTR : public TextSplit {
private: private:
virtual bool matchGroup(const vector<string>& terms, int dist); virtual bool matchGroup(const vector<string>& terms, int dist);
// Word count. Used to call checkCancel from time to time.
int m_wcount; int m_wcount;
// In: user query terms // In: user query terms
set<string> m_terms; set<string> m_terms;
// In: user query groups, for near/phrase searches. // m_gterms holds all the terms in m_groups, as a set for quick lookup
const vector<vector<string> >& m_groups; set<string> m_gterms;
const vector<int>& m_slacks;
set<string> m_gterms; const HighlightData& m_hdata;
// group/near terms word positions. // group/near terms word positions.
map<string, vector<int> > m_plists; map<string, vector<int> > m_plists;
@ -294,10 +295,11 @@ public:
// handle all groups as NEAR (ignore order). // handle all groups as NEAR (ignore order).
bool TextSplitPTR::matchGroups() bool TextSplitPTR::matchGroups()
{ {
vector<vector<string> >::const_iterator vit = m_groups.begin(); for (unsigned int i = 0; i < m_hdata.groups.size(); i++) {
vector<int>::const_iterator sit = m_slacks.begin(); if (m_hdata.groups[i].size() <= 1)
for (; vit != m_groups.end() && sit != m_slacks.end(); vit++, sit++) { continue;
matchGroup(*vit, *sit + (*vit).size()); matchGroup(m_hdata.groups[i],
m_hdata.groups[i].size() + m_hdata.slacks[i]);
} }
// Sort regions by increasing start and decreasing width. // Sort regions by increasing start and decreasing width.
@ -317,39 +319,22 @@ bool TextSplitPTR::matchGroups()
// the input is html, the body is always a single output chunk. // the input is html, the body is always a single output chunk.
bool PlainToRich::plaintorich(const string& in, bool PlainToRich::plaintorich(const string& in,
list<string>& out, // Output chunk list list<string>& out, // Output chunk list
const HiliteData& hdata, const HighlightData& hdata,
int chunksize) int chunksize)
{ {
Chrono chron; Chrono chron;
const vector<string>& terms(hdata.terms);
const vector<vector<string> >& groups(hdata.groups);
const vector<int>& slacks(hdata.gslks);
if (0 && DebugLog::getdbl()->getlevel() >= DEBDEB0) {
string sterms = vecStringToString(terms);
LOGDEB0(("plaintorich: terms: %s\n", sterms.c_str()));
sterms.clear();
for (vector<vector<string> >::const_iterator vit = groups.begin();
vit != groups.end(); vit++) {
sterms += "GROUP: ";
sterms += vecStringToString(*vit);
sterms += "\n";
}
LOGDEB0(("plaintorich: groups:\n %s", sterms.c_str()));
LOGDEB2((" TEXT:[%s]\n", in.c_str()));
}
// Compute the positions for the query terms. We use the text // Compute the positions for the query terms. We use the text
// splitter to break the text into words, and compare the words to // splitter to break the text into words, and compare the words to
// the search terms, // the search terms,
TextSplitPTR splitter(terms, groups, slacks); TextSplitPTR splitter(hdata);
// Note: the splitter returns the term locations in byte, not // Note: the splitter returns the term locations in byte, not
// character, offsets. // character, offsets.
splitter.text_to_words(in); splitter.text_to_words(in);
LOGDEB2(("plaintorich: split done %d mS\n", chron.millis())); LOGDEB2(("plaintorich: split done %d mS\n", chron.millis()));
// Compute the positions for NEAR and PHRASE groups. // Compute the positions for NEAR and PHRASE groups.
splitter.matchGroups(); splitter.matchGroups();
LOGDEB2(("plaintorich: group match done %d mS\n", chron.millis()));
out.clear(); out.clear();
out.push_back(""); out.push_back("");

View File

@ -19,26 +19,8 @@
#include <string> #include <string>
#include <list> #include <list>
using std::list;
using std::string;
/// Holder for plaintorich() input data: words and groups of words to #include "hldata.h"
/// be highlighted
struct HiliteData {
// Single terms
vector<string> terms;
// NEAR and PHRASE elements
vector<vector<string> > groups;
// Group slacks (number of permitted non-matched words).
// Parallel vector to the above 'groups'
vector<int> gslks;
void reset()
{
terms.clear();
groups.clear();
gslks.clear();
}
};
/** /**
* A class for highlighting search results. Overridable methods allow * A class for highlighting search results. Overridable methods allow
@ -83,21 +65,21 @@ public:
* lowercase and unaccented. * lowercase and unaccented.
* @param chunksize max size of chunks in output list * @param chunksize max size of chunks in output list
*/ */
virtual bool plaintorich(const string &in, list<string> &out, virtual bool plaintorich(const std::string &in, std::list<std::string> &out,
const HiliteData& hdata, const HighlightData& hdata,
int chunksize = 50000 int chunksize = 50000
); );
/* Overridable output methods for headers, highlighting and marking tags */ /* Overridable output methods for headers, highlighting and marking tags */
virtual string header() {return snull;} virtual std::string header() {return snull;}
virtual string startMatch() {return snull;} virtual std::string startMatch() {return snull;}
virtual string endMatch() {return snull;} virtual std::string endMatch() {return snull;}
virtual string startAnchor(int) {return snull;} virtual std::string startAnchor(int) {return snull;}
virtual string endAnchor() {return snull;} virtual std::string endAnchor() {return snull;}
virtual string startChunk() {return snull;} virtual std::string startChunk() {return snull;}
protected: protected:
const string snull; const std::string snull;
bool m_inputhtml; bool m_inputhtml;
// Use <br> to break plain text lines (else caller has used a <pre> tag) // Use <br> to break plain text lines (else caller has used a <pre> tag)
bool m_eolbr; bool m_eolbr;

View File

@ -132,9 +132,8 @@ void ResListPager::resultPageFor(int docnum)
m_respage = npage; m_respage = npage;
} }
void ResListPager::displayDoc(RclConfig *config, void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
int i, Rcl::Doc& doc, const HiliteData& hdata, const HighlightData& hdata, const string& sh)
const string& sh)
{ {
ostringstream chunk; ostringstream chunk;
int percent; int percent;
@ -309,8 +308,9 @@ void ResListPager::displayPage(RclConfig *config)
if (pageEmpty()) { if (pageEmpty()) {
chunk << trans("<p><b>No results found</b><br>"); chunk << trans("<p><b>No results found</b><br>");
vector<string>uterms; HighlightData hldata;
m_docSource->getUTerms(uterms); m_docSource->getTerms(hldata);
vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
if (!uterms.empty()) { if (!uterms.empty()) {
map<string, vector<string> > spellings; map<string, vector<string> > spellings;
suggest(uterms, spellings); suggest(uterms, spellings);
@ -366,8 +366,8 @@ void ResListPager::displayPage(RclConfig *config)
if (pageEmpty()) if (pageEmpty())
return; return;
HiliteData hdata; HighlightData hdata;
m_docSource->getTerms(hdata.terms, hdata.groups, hdata.gslks); m_docSource->getTerms(hdata);
// Emit data for result entry paragraph. Do it in chunks that make sense // Emit data for result entry paragraph. Do it in chunks that make sense
// html-wise, else our client may get confused // html-wise, else our client may get confused

View File

@ -19,14 +19,13 @@
#define _reslistpager_h_included_ #define _reslistpager_h_included_
#include <vector> #include <vector>
using std::vector;
#include "refcntr.h" #include "refcntr.h"
#include "docseq.h" #include "docseq.h"
#include "hldata.h"
class RclConfig; class RclConfig;
class PlainToRich; class PlainToRich;
struct HiliteData;
/** /**
* Manage a paged HTML result list. * Manage a paged HTML result list.
@ -85,7 +84,7 @@ public:
void resultPageFor(int docnum); void resultPageFor(int docnum);
void displayPage(RclConfig *); void displayPage(RclConfig *);
void displayDoc(RclConfig *, int idx, Rcl::Doc& doc, void displayDoc(RclConfig *, int idx, Rcl::Doc& doc,
const HiliteData& hdata, const string& sh = ""); const HighlightData& hdata, const string& sh = "");
bool pageEmpty() {return m_respage.size() == 0;} bool pageEmpty() {return m_respage.size() == 0;}
string queryDescription() {return m_docSource.isNull() ? "" : string queryDescription() {return m_docSource.isNull() ? "" :
@ -112,8 +111,9 @@ public:
virtual string pageTop() {return string();} virtual string pageTop() {return string();}
virtual string headerContent() {return string();} virtual string headerContent() {return string();}
virtual string iconUrl(RclConfig *, Rcl::Doc& doc); virtual string iconUrl(RclConfig *, Rcl::Doc& doc);
virtual void suggest(const vector<string>, virtual void suggest(const std::vector<std::string>,
map<string, vector<string> >& sugg) { std::map<std::string, std::vector<std::string> >& sugg)
{
sugg.clear(); sugg.clear();
} }
virtual string absSep() {return "&hellip;";} virtual string absSep() {return "&hellip;";}
@ -126,7 +126,7 @@ private:
bool m_hasNext; bool m_hasNext;
PlainToRich *m_hiliter; PlainToRich *m_hiliter;
RefCntr<DocSequence> m_docSource; RefCntr<DocSequence> m_docSource;
vector<ResListEntry> m_respage; std::vector<ResListEntry> m_respage;
}; };
#endif /* _reslistpager_h_included_ */ #endif /* _reslistpager_h_included_ */

View File

@ -498,23 +498,12 @@ bool SearchData::fileNameOnly()
return true; return true;
} }
// Extract all terms and term groups // Extract all term data
bool SearchData::getTerms(vector<string>& terms, void SearchData::getTerms(HighlightData &hld) const
vector<vector<string> >& groups,
vector<int>& gslks) const
{ {
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++) for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
(*it)->getTerms(terms, groups, gslks); (*it)->getTerms(hld);
return true; return;
}
// Extract user terms
void SearchData::getUTerms(vector<string>& terms) const
{
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
(*it)->getUTerms(terms);
sort(terms.begin(), terms.end());
vector<string>::iterator it = unique(terms.begin(), terms.end());
terms.erase(it, terms.end());
} }
// Splitter callback for breaking a user string into simple terms and // Splitter callback for breaking a user string into simple terms and
@ -590,10 +579,10 @@ private:
// translating. // translating.
class StringToXapianQ { class StringToXapianQ {
public: public:
StringToXapianQ(Db& db, const string& field, StringToXapianQ(Db& db, HighlightData& hld, const string& field,
const string &stmlng, bool boostUser) const string &stmlng, bool boostUser)
: m_db(db), m_field(field), m_stemlang(stmlng), : m_db(db), m_field(field), m_stemlang(stmlng),
m_doBoostUserTerms(boostUser) m_doBoostUserTerms(boostUser), m_hld(hld)
{ } { }
bool processUserString(const string &iq, bool processUserString(const string &iq,
@ -601,20 +590,6 @@ public:
vector<Xapian::Query> &pqueries, vector<Xapian::Query> &pqueries,
const StopList &stops, const StopList &stops,
int slack = 0, bool useNear = false); int slack = 0, bool useNear = false);
// After processing the string: return search terms and term
// groups (ie: for highlighting)
bool getTerms(vector<string>& terms, vector<vector<string> >& groups)
{
terms.insert(terms.end(), m_terms.begin(), m_terms.end());
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
return true;
}
bool getUTerms(vector<string>& terms)
{
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
return true;
}
private: private:
void expandTerm(bool dont, const string& term, vector<string>& exp, void expandTerm(bool dont, const string& term, vector<string>& exp,
string& sterm, const string& prefix); string& sterm, const string& prefix);
@ -630,10 +605,7 @@ private:
const string& m_field; const string& m_field;
const string& m_stemlang; const string& m_stemlang;
bool m_doBoostUserTerms; bool m_doBoostUserTerms;
// Single terms and phrases resulting from breaking up text; HighlightData& m_hld;
vector<string> m_uterms;
vector<string> m_terms;
vector<vector<string> > m_groups;
}; };
#if 1 #if 1
@ -647,7 +619,7 @@ static void listVector(const string& what, const vector<string>&l)
} }
#endif #endif
/** Expand stem and wildcards /** Take simple term and expand stem and wildcards
* *
* @param nostemexp don't perform stem expansion. This is mainly used to * @param nostemexp don't perform stem expansion. This is mainly used to
* prevent stem expansion inside phrases (because the user probably * prevent stem expansion inside phrases (because the user probably
@ -680,9 +652,11 @@ void StringToXapianQ::expandTerm(bool nostemexp,
nostemexp = true; nostemexp = true;
} }
if (!haswild)
m_hld.uterms.insert(term);
if (nostemexp && !haswild) { if (nostemexp && !haswild) {
sterm = term; sterm = term;
m_uterms.push_back(sterm);
exp.resize(1); exp.resize(1);
exp[0] = prefix + term; exp[0] = prefix + term;
} else { } else {
@ -692,7 +666,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
m_field); m_field);
} else { } else {
sterm = term; sterm = term;
m_uterms.push_back(sterm);
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
m_field); m_field);
} }
@ -701,7 +674,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
exp.push_back(it->term); exp.push_back(it->term);
} }
} }
//listVector("ExpandTerm:uterms now: ", m_uterms);
} }
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d // Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
@ -753,12 +725,15 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
} }
expandTerm(nostemexp, span, exp, sterm, prefix); expandTerm(nostemexp, span, exp, sterm, prefix);
// m_terms is used for highlighting, we don't want prefixes in there. // Set up the highlight data. No prefix should go in there
for (vector<string>::const_iterator it = exp.begin(); for (vector<string>::const_iterator it = exp.begin();
it != exp.end(); it++) { it != exp.end(); it++) {
m_terms.push_back(it->substr(prefix.size())); m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
m_hld.slacks.push_back(0);
m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
} }
// Push either term or OR of stem-expanded set // Push either term or OR of stem-expanded set
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end()); Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
@ -786,7 +761,9 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
Xapian::Query::OP_PHRASE; Xapian::Query::OP_PHRASE;
vector<Xapian::Query> orqueries; vector<Xapian::Query> orqueries;
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
bool hadmultiple = false; bool hadmultiple = false;
#endif
vector<vector<string> >groups; vector<vector<string> >groups;
string prefix; string prefix;
@ -805,15 +782,19 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
for (vector<string>::iterator it = splitData->terms.begin(); for (vector<string>::iterator it = splitData->terms.begin();
it != splitData->terms.end(); it++, nxit++) { it != splitData->terms.end(); it++, nxit++) {
LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str())); LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
// Adjust when we do stem expansion. Not inside phrases, and // Adjust when we do stem expansion. Not if disabled by
// some versions of xapian will accept only one OR clause // caller, not inside phrases, and some versions of xapian
// inside NEAR, all others must be leafs. // will accept only one OR clause inside NEAR.
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) || hadmultiple; bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|| hadmultiple
#endif // single OR inside NEAR
;
string sterm; string sterm;
vector<string> exp; vector<string> exp;
expandTerm(nostemexp, *it, exp, sterm, prefix); expandTerm(nostemexp, *it, exp, sterm, prefix);
LOGDEB0(("ProcessPhrase: exp size %d\n", exp.size())); LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
listVector("", exp); listVector("", exp);
// groups is used for highlighting, we don't want prefixes in there. // groups is used for highlighting, we don't want prefixes in there.
vector<string> noprefs; vector<string> noprefs;
@ -850,7 +831,13 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
vector<vector<string> > allcombs; vector<vector<string> > allcombs;
vector<string> comb; vector<string> comb;
multiply_groups(groups.begin(), groups.end(), comb, allcombs); multiply_groups(groups.begin(), groups.end(), comb, allcombs);
m_groups.insert(m_groups.end(), allcombs.begin(), allcombs.end());
// Insert the search groups and slacks in the highlight data, with
// a reference to the user entry that generated them:
m_hld.groups.insert(m_hld.groups.end(), allcombs.begin(), allcombs.end());
m_hld.slacks.insert(m_hld.slacks.end(), allcombs.size(), slack);
m_hld.grpsugidx.insert(m_hld.grpsugidx.end(), allcombs.size(),
m_hld.ugroups.size() - 1);
} }
// Trim string beginning with ^ or ending with $ and convert to flags // Trim string beginning with ^ or ending with $ and convert to flags
@ -875,7 +862,16 @@ static int stringToMods(string& s)
* We just separate words and phrases, and do wildcard and stem expansion, * We just separate words and phrases, and do wildcard and stem expansion,
* *
* This is used to process data entered into an OR/AND/NEAR/PHRASE field of * This is used to process data entered into an OR/AND/NEAR/PHRASE field of
* the GUI. * the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
* entry).
*
* This appears awful, and it would seem that the split into
* terms/phrases should be performed in the upper layer so that we
* only receive pure term or near/phrase pure elements here, but in
* fact there are things that would appear like terms to naive code,
* and which will actually may be turned into phrases (ie: tom:jerry),
* in a manner which intimately depends on the index implementation,
* so that it makes sense to process this here.
* *
* The final list contains one query for each term or phrase * The final list contains one query for each term or phrase
* - Elements corresponding to a stem-expanded part are an OP_OR * - Elements corresponding to a stem-expanded part are an OP_OR
@ -895,9 +891,6 @@ bool StringToXapianQ::processUserString(const string &iq,
{ {
LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear)); LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear));
ermsg.erase(); ermsg.erase();
m_uterms.clear();
m_terms.clear();
m_groups.clear();
// Simple whitespace-split input into user-level words and // Simple whitespace-split input into user-level words and
// double-quoted phrases: word1 word2 "this is a phrase". // double-quoted phrases: word1 word2 "this is a phrase".
@ -952,10 +945,12 @@ bool StringToXapianQ::processUserString(const string &iq,
case 0: case 0:
continue;// ?? continue;// ??
case 1: case 1:
m_hld.ugroups.push_back(vector<string>(1, *it));
processSimpleSpan(splitter.terms.front(), processSimpleSpan(splitter.terms.front(),
splitter.nostemexps.front(), pqueries); splitter.nostemexps.front(), pqueries);
break; break;
default: default:
m_hld.ugroups.push_back(vector<string>(1, *it));
processPhraseOrNear(&splitter, pqueries, useNear, slack, mods); processPhraseOrNear(&splitter, pqueries, useNear, slack, mods);
} }
} }
@ -984,8 +979,6 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n", LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
stemlang.c_str())); stemlang.c_str()));
m_terms.clear();
m_groups.clear();
Xapian::Query *qp = (Xapian::Query *)p; Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query(); *qp = Xapian::Query();
@ -1007,16 +1000,14 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
(m_parentSearch && !m_parentSearch->haveWildCards()) || (m_parentSearch && !m_parentSearch->haveWildCards()) ||
(m_parentSearch == 0 && !m_haveWildCards); (m_parentSearch == 0 && !m_haveWildCards);
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList())) if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
return false; return false;
if (pqueries.empty()) { if (pqueries.empty()) {
LOGERR(("SearchDataClauseSimple: resolved to null query\n")); LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
return true; return true;
} }
tr.getTerms(m_terms, m_groups);
tr.getUTerms(m_uterms);
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end()); *qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
if (m_weight != 1.0) { if (m_weight != 1.0) {
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
@ -1056,8 +1047,6 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null: const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
stemlang; stemlang;
LOGDEB(("SearchDataClauseDist::toNativeQuery\n")); LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
m_terms.clear();
m_groups.clear();
Xapian::Query *qp = (Xapian::Query *)p; Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query(); *qp = Xapian::Query();
@ -1080,7 +1069,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
} }
string s = cstr_dquote + m_text + cstr_dquote; string s = cstr_dquote + m_text + cstr_dquote;
bool useNear = (m_tp == SCLT_NEAR); bool useNear = (m_tp == SCLT_NEAR);
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm); StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(), if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(),
m_slack, useNear)) m_slack, useNear))
return false; return false;
@ -1088,8 +1077,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
LOGERR(("SearchDataClauseDist: resolved to null query\n")); LOGERR(("SearchDataClauseDist: resolved to null query\n"));
return true; return true;
} }
tr.getTerms(m_terms, m_groups);
tr.getUTerms(m_uterms);
*qp = *pqueries.begin(); *qp = *pqueries.begin();
if (m_weight != 1.0) { if (m_weight != 1.0) {
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
@ -1097,21 +1085,4 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
return true; return true;
} }
// Translate subquery
bool SearchDataClauseSub::toNativeQuery(Rcl::Db &db, void *p, const string&)
{
return m_sub->toNativeQuery(db, p);
}
bool SearchDataClauseSub::getTerms(vector<string>& terms,
vector<vector<string> >& groups,
vector<int>& gslks) const
{
return m_sub.getconstptr()->getTerms(terms, groups, gslks);
}
void SearchDataClauseSub::getUTerms(vector<string>& terms) const
{
m_sub.getconstptr()->getUTerms(terms);
}
} // Namespace Rcl } // Namespace Rcl

View File

@ -31,14 +31,11 @@
#include "refcntr.h" #include "refcntr.h"
#include "smallut.h" #include "smallut.h"
#include "cstr.h" #include "cstr.h"
#include "hldata.h"
class RclConfig; class RclConfig;
#ifndef NO_NAMESPACES
using std::vector;
using std::string;
namespace Rcl { namespace Rcl {
#endif // NO_NAMESPACES
/** Search clause types */ /** Search clause types */
enum SClType { enum SClType {
@ -50,33 +47,33 @@ enum SClType {
class SearchDataClause; class SearchDataClause;
/** /**
Data structure representing a Recoll user query, for translation Data structure representing a Recoll user query, for translation
into a Xapian query tree. This could probably better called a 'question'. into a Xapian query tree. This could probably better called a 'question'.
This is a list of search clauses combined through either OR or AND. This is a list of search clauses combined through either OR or AND.
Clauses either reflect user entry in a query field: some text, a Clauses either reflect user entry in a query field: some text, a
clause type (AND/OR/NEAR etc.), possibly a distance, or points to clause type (AND/OR/NEAR etc.), possibly a distance, or points to
another SearchData representing a subquery. another SearchData representing a subquery.
The content of each clause when added may not be fully parsed yet The content of each clause when added may not be fully parsed yet
(may come directly from a gui field). It will be parsed and may be (may come directly from a gui field). It will be parsed and may be
translated to several queries in the Xapian sense, for exemple translated to several queries in the Xapian sense, for exemple
several terms and phrases as would result from several terms and phrases as would result from
["this is a phrase" term1 term2] . ["this is a phrase" term1 term2] .
This is why the clauses also have an AND/OR/... type. This is why the clauses also have an AND/OR/... type.
A phrase clause could be added either explicitly or using double quotes: A phrase clause could be added either explicitly or using double quotes:
{SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]} {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
*/ */
class SearchData { class SearchData {
public: public:
SearchData(SClType tp) SearchData(SClType tp)
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
m_haveDates(false), m_maxSize(size_t(-1)), m_haveDates(false), m_maxSize(size_t(-1)),
m_minSize(size_t(-1)), m_haveWildCards(false) m_minSize(size_t(-1)), m_haveWildCards(false)
{ {
if (m_tp != SCLT_OR && m_tp != SCLT_AND) if (m_tp != SCLT_OR && m_tp != SCLT_AND)
m_tp = SCLT_OR; m_tp = SCLT_OR;
@ -108,7 +105,7 @@ public:
bool maybeAddAutoPhrase(Rcl::Db &db, double threshold); bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
/** Set/get top subdirectory for filtering results */ /** Set/get top subdirectory for filtering results */
void setTopdir(const string& t, bool excl = false, float w = 1.0) void setTopdir(const std::string& t, bool excl = false, float w = 1.0)
{ {
m_topdir = t; m_topdir = t;
m_topdirexcl = excl; m_topdirexcl = excl;
@ -122,38 +119,37 @@ public:
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;} void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
/** Add file type for filtering results */ /** Add file type for filtering results */
void addFiletype(const string& ft) {m_filetypes.push_back(ft);} void addFiletype(const std::string& ft) {m_filetypes.push_back(ft);}
/** Add file type to not wanted list */ /** Add file type to not wanted list */
void remFiletype(const string& ft) {m_nfiletypes.push_back(ft);} void remFiletype(const std::string& ft) {m_nfiletypes.push_back(ft);}
void setStemlang(const string& lang = "english") {m_stemlang = lang;} void setStemlang(const std::string& lang = "english") {m_stemlang = lang;}
/** Retrieve error description */ /** Retrieve error description */
string getReason() {return m_reason;} std::string getReason() {return m_reason;}
/** Get terms and phrase/near groups. Used in the GUI for highlighting /** Return term expansion data. Mostly used by caller for highlighting
* The groups and gslks vectors are parallel and hold the phrases/near
* string groups and their associated slacks (distance in excess of group
* size)
*/ */
bool getTerms(vector<string>& terms, void getTerms(HighlightData& hldata) const;
vector<vector<string> >& groups, vector<int>& gslks) const;
/** Get user-input terms (before expansion etc.) */
void getUTerms(vector<string>& terms) const;
/** /**
* Get/set the description field which is retrieved from xapian after * Get/set the description field which is retrieved from xapian after
* initializing the query. It is stored here for usage in the GUI. * initializing the query. It is stored here for usage in the GUI.
*/ */
string getDescription() {return m_description;} std::string getDescription() {return m_description;}
void setDescription(const string& d) {m_description = d;} void setDescription(const std::string& d) {m_description = d;}
private: private:
SClType m_tp; // Only SCLT_AND or SCLT_OR here // Combine type. Only SCLT_AND or SCLT_OR here
vector<SearchDataClause*> m_query; SClType m_tp;
vector<string> m_filetypes; // Restrict to filetypes if set. // Complex query descriptor
vector<string> m_nfiletypes; // Unwanted file types std::vector<SearchDataClause*> m_query;
string m_topdir; // Restrict to subtree. // Restricted set of filetypes if not empty.
std::vector<std::string> m_filetypes;
// Excluded set of file types if not empty
std::vector<std::string> m_nfiletypes;
// Restrict to subtree.
std::string m_topdir;
bool m_topdirexcl; // Invert meaning bool m_topdirexcl; // Invert meaning
float m_topdirweight; // affect weight instead of filter float m_topdirweight; // affect weight instead of filter
bool m_haveDates; bool m_haveDates;
@ -162,11 +158,11 @@ private:
size_t m_minSize; size_t m_minSize;
// Printable expanded version of the complete query, retrieved/set // Printable expanded version of the complete query, retrieved/set
// from rcldb after the Xapian::setQuery() call // from rcldb after the Xapian::setQuery() call
string m_description; std::string m_description;
string m_reason; std::string m_reason;
bool m_haveWildCards; bool m_haveWildCards;
string m_stemlang; std::string m_stemlang;
bool expandFileTypes(RclConfig *cfg, vector<string>& exptps); bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
/* Copyconst and assignment private and forbidden */ /* Copyconst and assignment private and forbidden */
SearchData(const SearchData &) {} SearchData(const SearchData &) {}
SearchData& operator=(const SearchData&) {return *this;}; SearchData& operator=(const SearchData&) {return *this;};
@ -178,39 +174,56 @@ public:
SDCM_ANCHOREND=4}; SDCM_ANCHOREND=4};
SearchDataClause(SClType tp) SearchDataClause(SClType tp)
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0), : m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
m_modifiers(SDCM_NONE), m_weight(1.0) m_modifiers(SDCM_NONE), m_weight(1.0)
{} {}
virtual ~SearchDataClause() {} virtual ~SearchDataClause() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0; virtual bool toNativeQuery(Rcl::Db &db, void *, const std::string&) = 0;
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;} bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
virtual string getReason() const {return m_reason;} virtual std::string getReason() const {return m_reason;}
virtual bool getTerms(vector<string>&, vector<vector<string> >&, virtual void getTerms(HighlightData & hldata) const = 0;
vector<int>&) const = 0;
virtual void getUTerms(vector<string>&) const = 0;
SClType getTp() {return m_tp;} SClType getTp()
void setParent(SearchData *p) {m_parentSearch = p;} {
virtual void setModifiers(Modifier mod) {m_modifiers = mod;} return m_tp;
virtual int getModifiers() {return m_modifiers;} }
virtual void addModifier(Modifier mod) { void setParent(SearchData *p)
{
m_parentSearch = p;
}
virtual void setModifiers(Modifier mod)
{
m_modifiers = mod;
}
virtual int getModifiers()
{
return m_modifiers;
}
virtual void addModifier(Modifier mod)
{
int imod = getModifiers(); int imod = getModifiers();
imod |= mod; imod |= mod;
setModifiers(Modifier(imod)); setModifiers(Modifier(imod));
} }
virtual void setWeight(float w) {m_weight = w;} virtual void setWeight(float w)
{
m_weight = w;
}
friend class SearchData; friend class SearchData;
protected: protected:
string m_reason; std::string m_reason;
SClType m_tp; SClType m_tp;
SearchData *m_parentSearch; SearchData *m_parentSearch;
bool m_haveWildCards; bool m_haveWildCards;
Modifier m_modifiers; Modifier m_modifiers;
float m_weight; float m_weight;
private: private:
SearchDataClause(const SearchDataClause&) {} SearchDataClause(const SearchDataClause&)
SearchDataClause& operator=(const SearchDataClause&) { {
}
SearchDataClause& operator=(const SearchDataClause&)
{
return *this; return *this;
} }
}; };
@ -221,45 +234,37 @@ private:
*/ */
class SearchDataClauseSimple : public SearchDataClause { class SearchDataClauseSimple : public SearchDataClause {
public: public:
SearchDataClauseSimple(SClType tp, const string& txt, SearchDataClauseSimple(SClType tp, const std::string& txt,
const string& fld = string()) const std::string& fld = std::string())
: SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) { : SearchDataClause(tp), m_text(txt), m_field(fld)
m_haveWildCards = (txt.find_first_of(cstr_minwilds) != string::npos); {
m_haveWildCards =
(txt.find_first_of(cstr_minwilds) != std::string::npos);
} }
virtual ~SearchDataClauseSimple() {} virtual ~SearchDataClauseSimple()
{
}
/** Translate to Xapian query */ /** Translate to Xapian query */
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
/** Retrieve query terms and term groups. This is used for highlighting */ virtual void getTerms(HighlightData& hldata) const
virtual bool getTerms(vector<string>& terms, /* Single terms */
vector<vector<string> >& groups, /* Prox grps */
vector<int>& gslks) const /* Prox slacks */
{ {
terms.insert(terms.end(), m_terms.begin(), m_terms.end()); hldata.append(m_hldata);
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
gslks.insert(gslks.end(), m_groups.size(), m_slack);
return true;
} }
virtual void getUTerms(vector<string>& terms) const virtual const std::string& gettext()
{ {
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end()); return m_text;
}
virtual const std::string& getfield()
{
return m_field;
} }
virtual const string& gettext() {return m_text;}
virtual const string& getfield() {return m_field;}
protected: protected:
string m_text; // Raw user entry text. std::string m_text; // Raw user entry text.
string m_field; // Field specification if any std::string m_field; // Field specification if any
// Single terms and phrases resulting from breaking up m_text; HighlightData m_hldata;
// valid after toNativeQuery() call
vector<string> m_terms;
vector<vector<string> > m_groups;
// User terms before expansion
vector<string> m_uterms;
// Declare m_slack here. Always 0, but allows getTerms to work for
// SearchDataClauseDist
int m_slack;
}; };
/** /**
@ -272,29 +277,39 @@ protected:
*/ */
class SearchDataClauseFilename : public SearchDataClauseSimple { class SearchDataClauseFilename : public SearchDataClauseSimple {
public: public:
SearchDataClauseFilename(const string& txt) SearchDataClauseFilename(const std::string& txt)
: SearchDataClauseSimple(SCLT_FILENAME, txt) { : SearchDataClauseSimple(SCLT_FILENAME, txt)
{
// File name searches don't count when looking for wild cards. // File name searches don't count when looking for wild cards.
m_haveWildCards = false; m_haveWildCards = false;
} }
virtual ~SearchDataClauseFilename() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); virtual ~SearchDataClauseFilename()
{
}
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
}; };
/** /**
* A clause coming from a NEAR or PHRASE entry field. There is only one * A clause coming from a NEAR or PHRASE entry field. There is only one
* string group, and a specified distance, which applies to it. * std::string group, and a specified distance, which applies to it.
*/ */
class SearchDataClauseDist : public SearchDataClauseSimple { class SearchDataClauseDist : public SearchDataClauseSimple {
public: public:
SearchDataClauseDist(SClType tp, const string& txt, int slack, SearchDataClauseDist(SClType tp, const std::string& txt, int slack,
const string& fld = string()) const std::string& fld = std::string())
: SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;} : SearchDataClauseSimple(tp, txt, fld), m_slack(slack)
virtual ~SearchDataClauseDist() {} {
}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); virtual ~SearchDataClauseDist()
{
}
// m_slack is declared in SearchDataClauseSimple virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
private:
int m_slack;
}; };
/** Subquery */ /** Subquery */
@ -302,15 +317,28 @@ class SearchDataClauseSub : public SearchDataClause {
public: public:
// We take charge of the SearchData * and will delete it. // We take charge of the SearchData * and will delete it.
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub) SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
: SearchDataClause(tp), m_sub(sub) {} : SearchDataClause(tp), m_sub(sub)
virtual ~SearchDataClauseSub() {} {
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); }
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
vector<int>&) const; virtual ~SearchDataClauseSub()
virtual void getUTerms(vector<string>&) const; {
}
virtual bool toNativeQuery(Rcl::Db &db, void *p, const std::string&)
{
return m_sub->toNativeQuery(db, p);
}
virtual void getTerms(HighlightData& hldata) const
{
m_sub.getconstptr()->getTerms(hldata);
}
protected: protected:
RefCntr<SearchData> m_sub; RefCntr<SearchData> m_sub;
}; };
} // Namespace Rcl } // Namespace Rcl
#endif /* _SEARCHDATA_H_INCLUDED_ */ #endif /* _SEARCHDATA_H_INCLUDED_ */

54
src/utils/hldata.h Normal file
View File

@ -0,0 +1,54 @@
#ifndef _hldata_h_included_
#define _hldata_h_included_
#include <vector>
#include <string>
#include <set>
/** Store about user terms and their expansions. This is used mostly for
* highlighting result text and walking the matches.
*/
struct HighlightData {
/** The user terms, excluding those with wildcards.
* This list is intended for orthographic suggestions but the terms are
* unaccented lowercased anyway because they are compared to the dictionary
* generated from the index term list (which is unaccented).
*/
std::set<std::string> uterms;
/** The original user terms-or-groups. This is for displaying the matched
* terms or groups, ie in relation with highlighting or skipping to the
* next match. These are raw, diacritics and case preserved.
*/
std::vector<std::vector<std::string> > ugroups;
/** Processed/expanded terms and groups. Used for looking for
* regions to highlight. Terms are just groups with 1 entry. All
* terms in there are unaccented, and the list may include values
* expanded from the original terms by stem or wildcard expansion.
*/
std::vector<std::vector<std::string> > groups;
/** Group slacks. Parallel to groups */
std::vector<int> slacks;
/** Index into ugroups for each group. Parallel to groups. As a
* user term or group may generate many processed/expanded terms
* or groups, this is how we relate them
*/
std::vector<unsigned int> grpsugidx;
void clear()
{
uterms.clear();
ugroups.clear();
groups.clear();
slacks.clear();
grpsugidx.clear();
}
void append(const HighlightData&);
// Print (debug)
void toString(std::string& out);
};
#endif /* _hldata_h_included_ */

View File

@ -36,6 +36,7 @@
#include "smallut.h" #include "smallut.h"
#include "utf8iter.h" #include "utf8iter.h"
#include "hldata.h"
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
using namespace std; using namespace std;
@ -1038,8 +1039,57 @@ void catstrerror(string *reason, const char *what, int _errno)
#endif #endif
} }
void HighlightData::toString(std::string& out)
{
out.append("\nUser terms (orthograph): ");
for (std::set<std::string>::const_iterator it = uterms.begin();
it != uterms.end(); it++) {
out.append(" [").append(*it).append("]");
}
#else out.append("\nGroups: ");
char cbuf[200];
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
out.append(cbuf);
unsigned int ugidx = (unsigned int)-1;
for (unsigned int i = 0; i < groups.size(); i++) {
if (ugidx != grpsugidx[i]) {
ugidx = grpsugidx[i];
out.append("\n(");
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
out.append("[").append(ugroups[ugidx][j]).append("] ");
}
out.append(") ->");
}
out.append(" {");
for (unsigned int j = 0; j < groups[i].size(); j++) {
out.append("[").append(groups[i][j]).append("]");
}
sprintf(cbuf, "%d", slacks[i]);
out.append("}").append(cbuf);
}
out.append("\n");
fprintf(stderr, "toString ok\n");
}
void HighlightData::append(const HighlightData& hl)
{
uterms.insert(hl.uterms.begin(), hl.uterms.end());
size_t ugsz0 = ugroups.size();
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
for (std::vector<unsigned int>::const_iterator it = hl.grpsugidx.begin();
it != hl.grpsugidx.end(); it++) {
grpsugidx.push_back(*it + ugsz0);
}
}
#else // TEST_SMALLUT
#include <string> #include <string>
using namespace std; using namespace std;