defined data structure to pass around the search term description used for highlighting and other
This commit is contained in:
parent
ebdd6faaf5
commit
dc7b3420a0
@ -686,12 +686,12 @@ class LoadThread : public QThread {
|
|||||||
/* A thread to convert to rich text (mark search terms) */
|
/* A thread to convert to rich text (mark search terms) */
|
||||||
class ToRichThread : public QThread {
|
class ToRichThread : public QThread {
|
||||||
string ∈
|
string ∈
|
||||||
const HiliteData &hdata;
|
const HighlightData &hdata;
|
||||||
list<string> &out;
|
list<string> &out;
|
||||||
int loglevel;
|
int loglevel;
|
||||||
PlainToRichQtPreview *ptr;
|
PlainToRichQtPreview *ptr;
|
||||||
public:
|
public:
|
||||||
ToRichThread(string &i, const HiliteData& hd, list<string> &o,
|
ToRichThread(string &i, const HighlightData& hd, list<string> &o,
|
||||||
PlainToRichQtPreview *_ptr)
|
PlainToRichQtPreview *_ptr)
|
||||||
: in(i), hdata(hd), out(o), ptr(_ptr)
|
: in(i), hdata(hd), out(o), ptr(_ptr)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -91,7 +91,7 @@ class Preview : public QWidget {
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
Preview(int sid, // Search Id
|
Preview(int sid, // Search Id
|
||||||
const HiliteData& hdata) // Search terms etc. for highlighting
|
const HighlightData& hdata) // Search terms etc. for highlighting
|
||||||
: QWidget(0), m_searchId(sid), m_hData(hdata)
|
: QWidget(0), m_searchId(sid), m_hData(hdata)
|
||||||
{
|
{
|
||||||
init();
|
init();
|
||||||
@ -141,7 +141,7 @@ private:
|
|||||||
bool m_canBeep;
|
bool m_canBeep;
|
||||||
bool m_loading;
|
bool m_loading;
|
||||||
QWidget *m_currentW;
|
QWidget *m_currentW;
|
||||||
HiliteData m_hData;
|
HighlightData m_hData;
|
||||||
bool m_justCreated; // First tab create is different
|
bool m_justCreated; // First tab create is different
|
||||||
bool m_haveAnchors; // Search terms are marked in text
|
bool m_haveAnchors; // Search terms are marked in text
|
||||||
int m_lastAnchor; // Number of last anchor. Then rewind to 1
|
int m_lastAnchor; // Number of last anchor. Then rewind to 1
|
||||||
|
|||||||
@ -1220,8 +1220,8 @@ void RclMain::startPreview(int docnum, Rcl::Doc doc, int mod)
|
|||||||
curPreview = 0;
|
curPreview = 0;
|
||||||
}
|
}
|
||||||
if (curPreview == 0) {
|
if (curPreview == 0) {
|
||||||
HiliteData hdata;
|
HighlightData hdata;
|
||||||
m_source->getTerms(hdata.terms, hdata.groups, hdata.gslks);
|
m_source->getTerms(hdata);
|
||||||
curPreview = new Preview(reslist->listId(), hdata);
|
curPreview = new Preview(reslist->listId(), hdata);
|
||||||
|
|
||||||
if (curPreview == 0) {
|
if (curPreview == 0) {
|
||||||
@ -1284,7 +1284,7 @@ void RclMain::updateIdxForDocs(vector<Rcl::Doc>& docs)
|
|||||||
*/
|
*/
|
||||||
void RclMain::startPreview(Rcl::Doc doc)
|
void RclMain::startPreview(Rcl::Doc doc)
|
||||||
{
|
{
|
||||||
Preview *preview = new Preview(0, HiliteData());
|
Preview *preview = new Preview(0, HighlightData());
|
||||||
if (preview == 0) {
|
if (preview == 0) {
|
||||||
QMessageBox::warning(0, tr("Warning"),
|
QMessageBox::warning(0, tr("Warning"),
|
||||||
tr("Can't create preview window"),
|
tr("Can't create preview window"),
|
||||||
|
|||||||
@ -299,8 +299,8 @@ void RecollModel::setDocSource(RefCntr<DocSequence> nsource)
|
|||||||
m_source = RefCntr<DocSequence>();
|
m_source = RefCntr<DocSequence>();
|
||||||
} else {
|
} else {
|
||||||
m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource));
|
m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource));
|
||||||
m_hdata.reset();
|
m_hdata.clear();
|
||||||
m_source->getTerms(m_hdata.terms, m_hdata.groups, m_hdata.gslks);
|
m_source->getTerms(m_hdata);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -19,6 +19,10 @@
|
|||||||
|
|
||||||
#include <Qt>
|
#include <Qt>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "ui_restable.h"
|
#include "ui_restable.h"
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
#include "docseq.h"
|
#include "docseq.h"
|
||||||
@ -26,7 +30,7 @@
|
|||||||
|
|
||||||
class ResTable;
|
class ResTable;
|
||||||
|
|
||||||
typedef string (FieldGetter)(const string& fldname, const Rcl::Doc& doc);
|
typedef std::string (FieldGetter)(const std::string& fldname, const Rcl::Doc& doc);
|
||||||
|
|
||||||
class RecollModel : public QAbstractTableModel {
|
class RecollModel : public QAbstractTableModel {
|
||||||
|
|
||||||
@ -49,15 +53,15 @@ public:
|
|||||||
virtual void setDocSource(RefCntr<DocSequence> nsource);
|
virtual void setDocSource(RefCntr<DocSequence> nsource);
|
||||||
virtual RefCntr<DocSequence> getDocSource() {return m_source;}
|
virtual RefCntr<DocSequence> getDocSource() {return m_source;}
|
||||||
virtual void deleteColumn(int);
|
virtual void deleteColumn(int);
|
||||||
virtual const vector<string>& getFields() {return m_fields;}
|
virtual const std::vector<std::string>& getFields() {return m_fields;}
|
||||||
virtual const map<string, QString>& getAllFields()
|
virtual const std::map<std::string, QString>& getAllFields()
|
||||||
{
|
{
|
||||||
return o_displayableFields;
|
return o_displayableFields;
|
||||||
}
|
}
|
||||||
virtual void addColumn(int, const string&);
|
virtual void addColumn(int, const std::string&);
|
||||||
// Some column name are aliases/translator for base document field
|
// Some column name are aliases/translator for base document field
|
||||||
// (ie: date, datetime->mtime). Help deal with this:
|
// (ie: date, datetime->mtime). Help deal with this:
|
||||||
virtual string baseField(const string&);
|
virtual std::string baseField(const std::string&);
|
||||||
|
|
||||||
// Ignore sort() call because
|
// Ignore sort() call because
|
||||||
virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;}
|
virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;}
|
||||||
@ -69,12 +73,12 @@ signals:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
mutable RefCntr<DocSequence> m_source;
|
mutable RefCntr<DocSequence> m_source;
|
||||||
vector<string> m_fields;
|
std::vector<std::string> m_fields;
|
||||||
vector<FieldGetter*> m_getters;
|
std::vector<FieldGetter*> m_getters;
|
||||||
static map<string, QString> o_displayableFields;
|
static std::map<std::string, QString> o_displayableFields;
|
||||||
bool m_ignoreSort;
|
bool m_ignoreSort;
|
||||||
FieldGetter* chooseGetter(const string&);
|
FieldGetter* chooseGetter(const std::string&);
|
||||||
HiliteData m_hdata;
|
HighlightData m_hdata;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ResTable;
|
class ResTable;
|
||||||
|
|||||||
@ -19,19 +19,16 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
using std::string;
|
|
||||||
using std::list;
|
|
||||||
using std::vector;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "rcldoc.h"
|
#include "rcldoc.h"
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
|
#include "hldata.h"
|
||||||
|
|
||||||
// A result list entry.
|
// A result list entry.
|
||||||
struct ResListEntry {
|
struct ResListEntry {
|
||||||
Rcl::Doc doc;
|
Rcl::Doc doc;
|
||||||
string subHeader;
|
std::string subHeader;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Sort specification. */
|
/** Sort specification. */
|
||||||
@ -40,7 +37,7 @@ class DocSeqSortSpec {
|
|||||||
DocSeqSortSpec() : desc(false) {}
|
DocSeqSortSpec() : desc(false) {}
|
||||||
bool isNotNull() const {return !field.empty();}
|
bool isNotNull() const {return !field.empty();}
|
||||||
void reset() {field.erase();}
|
void reset() {field.erase();}
|
||||||
string field;
|
std::string field;
|
||||||
bool desc;
|
bool desc;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -50,12 +47,12 @@ class DocSeqFiltSpec {
|
|||||||
public:
|
public:
|
||||||
DocSeqFiltSpec() {}
|
DocSeqFiltSpec() {}
|
||||||
enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL};
|
enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL};
|
||||||
void orCrit(Crit crit, const string& value) {
|
void orCrit(Crit crit, const std::string& value) {
|
||||||
crits.push_back(crit);
|
crits.push_back(crit);
|
||||||
values.push_back(value);
|
values.push_back(value);
|
||||||
}
|
}
|
||||||
std::vector<Crit> crits;
|
std::vector<Crit> crits;
|
||||||
std::vector<string> values;
|
std::vector<std::string> values;
|
||||||
void reset() {crits.clear(); values.clear();}
|
void reset() {crits.clear(); values.clear();}
|
||||||
bool isNotNull() const {return crits.size() != 0;}
|
bool isNotNull() const {return crits.size() != 0;}
|
||||||
};
|
};
|
||||||
@ -73,7 +70,7 @@ class DocSeqFiltSpec {
|
|||||||
*/
|
*/
|
||||||
class DocSequence {
|
class DocSequence {
|
||||||
public:
|
public:
|
||||||
DocSequence(const string &t) : m_title(t) {}
|
DocSequence(const std::string &t) : m_title(t) {}
|
||||||
virtual ~DocSequence() {}
|
virtual ~DocSequence() {}
|
||||||
|
|
||||||
/** Get document at given rank.
|
/** Get document at given rank.
|
||||||
@ -84,16 +81,17 @@ class DocSequence {
|
|||||||
* inside history)
|
* inside history)
|
||||||
* @return true if ok, false for error or end of data
|
* @return true if ok, false for error or end of data
|
||||||
*/
|
*/
|
||||||
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) = 0;
|
virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0) = 0;
|
||||||
|
|
||||||
/** Get next page of documents. This accumulates entries into the result
|
/** Get next page of documents. This accumulates entries into the result
|
||||||
* list parameter (doesn't reset it). */
|
* list parameter (doesn't reset it). */
|
||||||
virtual int getSeqSlice(int offs, int cnt, vector<ResListEntry>& result);
|
virtual int getSeqSlice(int offs, int cnt,
|
||||||
|
std::vector<ResListEntry>& result);
|
||||||
|
|
||||||
/** Get abstract for document. This is special because it may take time.
|
/** Get abstract for document. This is special because it may take time.
|
||||||
* The default is to return the input doc's abstract fields, but some
|
* The default is to return the input doc's abstract fields, but some
|
||||||
* sequences can compute a better value (ie: docseqdb) */
|
* sequences can compute a better value (ie: docseqdb) */
|
||||||
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) {
|
virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs) {
|
||||||
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -103,25 +101,21 @@ class DocSequence {
|
|||||||
virtual int getResCnt() = 0;
|
virtual int getResCnt() = 0;
|
||||||
|
|
||||||
/** Get title for result list */
|
/** Get title for result list */
|
||||||
virtual string title() {return m_title;}
|
virtual std::string title() {return m_title;}
|
||||||
|
|
||||||
/** Get description for underlying query */
|
/** Get description for underlying query */
|
||||||
virtual string getDescription() = 0;
|
virtual std::string getDescription() = 0;
|
||||||
|
|
||||||
/** Get search terms (for highlighting abstracts). Some sequences
|
/** Get search terms (for highlighting abstracts). Some sequences
|
||||||
* may have no associated search terms. Implement this for them. */
|
* may have no associated search terms. Implement this for them. */
|
||||||
virtual bool getTerms(vector<string>& terms,
|
virtual void getTerms(HighlightData& hld)
|
||||||
vector<vector<string> >& groups,
|
|
||||||
vector<int>& gslks)
|
|
||||||
{
|
{
|
||||||
terms.clear(); groups.clear(); gslks.clear(); return true;
|
hld.clear();
|
||||||
}
|
}
|
||||||
/** Get user-input terms (before stemming etc.) */
|
virtual std::list<std::string> expand(Rcl::Doc &)
|
||||||
virtual void getUTerms(vector<string>& terms)
|
|
||||||
{
|
{
|
||||||
terms.clear();
|
return std::list<std::string>();
|
||||||
}
|
}
|
||||||
virtual list<string> expand(Rcl::Doc &) {return list<string>();}
|
|
||||||
|
|
||||||
/** Optional functionality. */
|
/** Optional functionality. */
|
||||||
virtual bool canFilter() {return false;}
|
virtual bool canFilter() {return false;}
|
||||||
@ -130,16 +124,16 @@ class DocSequence {
|
|||||||
virtual bool setSortSpec(const DocSeqSortSpec &) {return false;}
|
virtual bool setSortSpec(const DocSeqSortSpec &) {return false;}
|
||||||
virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();}
|
virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();}
|
||||||
|
|
||||||
static void set_translations(const string& sort, const string& filt)
|
static void set_translations(const std::string& sort, const std::string& filt)
|
||||||
{
|
{
|
||||||
o_sort_trans = sort;
|
o_sort_trans = sort;
|
||||||
o_filt_trans = filt;
|
o_filt_trans = filt;
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
static string o_sort_trans;
|
static std::string o_sort_trans;
|
||||||
static string o_filt_trans;
|
static std::string o_filt_trans;
|
||||||
private:
|
private:
|
||||||
string m_title;
|
std::string m_title;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** A modifier has a child sequence which does the real work and does
|
/** A modifier has a child sequence which does the real work and does
|
||||||
@ -152,25 +146,23 @@ public:
|
|||||||
{}
|
{}
|
||||||
virtual ~DocSeqModifier() {}
|
virtual ~DocSeqModifier() {}
|
||||||
|
|
||||||
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs)
|
virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs)
|
||||||
{
|
{
|
||||||
if (m_seq.isNull())
|
if (m_seq.isNull())
|
||||||
return false;
|
return false;
|
||||||
return m_seq->getAbstract(doc, abs);
|
return m_seq->getAbstract(doc, abs);
|
||||||
}
|
}
|
||||||
virtual string getDescription()
|
virtual std::string getDescription()
|
||||||
{
|
{
|
||||||
if (m_seq.isNull())
|
if (m_seq.isNull())
|
||||||
return "";
|
return "";
|
||||||
return m_seq->getDescription();
|
return m_seq->getDescription();
|
||||||
}
|
}
|
||||||
virtual bool getTerms(vector<string>& terms,
|
virtual void getTerms(HighlightData& hld)
|
||||||
vector<vector<string> >& groups,
|
|
||||||
vector<int>& gslks)
|
|
||||||
{
|
{
|
||||||
if (m_seq.isNull())
|
if (m_seq.isNull())
|
||||||
return false;
|
return;
|
||||||
return m_seq->getTerms(terms, groups, gslks);
|
m_seq->getTerms(hld);
|
||||||
}
|
}
|
||||||
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
|
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
|
||||||
{
|
{
|
||||||
@ -178,13 +170,7 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
return m_seq->getEnclosing(doc, pdoc);
|
return m_seq->getEnclosing(doc, pdoc);
|
||||||
}
|
}
|
||||||
virtual void getUTerms(vector<string>& terms)
|
virtual std::string title() {return m_seq->title();}
|
||||||
{
|
|
||||||
if (m_seq.isNull())
|
|
||||||
return;
|
|
||||||
m_seq->getUTerms(terms);
|
|
||||||
}
|
|
||||||
virtual string title() {return m_seq->title();}
|
|
||||||
virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}
|
virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -203,7 +189,7 @@ public:
|
|||||||
virtual bool canSort() {return true;}
|
virtual bool canSort() {return true;}
|
||||||
virtual bool setFiltSpec(const DocSeqFiltSpec &);
|
virtual bool setFiltSpec(const DocSeqFiltSpec &);
|
||||||
virtual bool setSortSpec(const DocSeqSortSpec &);
|
virtual bool setSortSpec(const DocSeqSortSpec &);
|
||||||
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0)
|
virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0)
|
||||||
{
|
{
|
||||||
if (m_seq.isNull())
|
if (m_seq.isNull())
|
||||||
return false;
|
return false;
|
||||||
@ -215,7 +201,7 @@ public:
|
|||||||
return 0;
|
return 0;
|
||||||
return m_seq->getResCnt();
|
return m_seq->getResCnt();
|
||||||
}
|
}
|
||||||
virtual string title();
|
virtual std::string title();
|
||||||
private:
|
private:
|
||||||
bool buildStack();
|
bool buildStack();
|
||||||
void stripStack();
|
void stripStack();
|
||||||
|
|||||||
@ -39,16 +39,9 @@ DocSequenceDb::~DocSequenceDb()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DocSequenceDb::getTerms(vector<string>& terms,
|
void DocSequenceDb::getTerms(HighlightData& hld)
|
||||||
vector<vector<string> >& groups,
|
|
||||||
vector<int>& gslks)
|
|
||||||
{
|
{
|
||||||
return m_fsdata->getTerms(terms, groups, gslks);
|
m_fsdata->getTerms(hld);
|
||||||
}
|
|
||||||
|
|
||||||
void DocSequenceDb::getUTerms(vector<string>& terms)
|
|
||||||
{
|
|
||||||
m_sdata->getUTerms(terms);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
string DocSequenceDb::getDescription()
|
string DocSequenceDb::getDescription()
|
||||||
@ -180,5 +173,13 @@ bool DocSequenceDb::setQuery()
|
|||||||
return true;
|
return true;
|
||||||
m_rescnt = -1;
|
m_rescnt = -1;
|
||||||
m_needSetQuery = !m_q->setQuery(m_fsdata);
|
m_needSetQuery = !m_q->setQuery(m_fsdata);
|
||||||
|
|
||||||
|
if (0) {
|
||||||
|
HighlightData hld;
|
||||||
|
m_fsdata->getTerms(hld);
|
||||||
|
string str;
|
||||||
|
hld.toString(str);
|
||||||
|
fprintf(stderr, "DocSequenceDb::setQuery: terms: %s\n", str.c_str());
|
||||||
|
}
|
||||||
return !m_needSetQuery;
|
return !m_needSetQuery;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -30,10 +30,7 @@ class DocSequenceDb : public DocSequence {
|
|||||||
virtual ~DocSequenceDb();
|
virtual ~DocSequenceDb();
|
||||||
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
|
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
|
||||||
virtual int getResCnt();
|
virtual int getResCnt();
|
||||||
virtual bool getTerms(vector<string>& terms,
|
virtual void getTerms(HighlightData& hld);
|
||||||
vector<vector<string> >& groups,
|
|
||||||
vector<int>& gslks);
|
|
||||||
virtual void getUTerms(vector<string>& terms);
|
|
||||||
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
|
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
|
||||||
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
|
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
|
||||||
virtual string getDescription();
|
virtual string getDescription();
|
||||||
|
|||||||
@ -24,12 +24,10 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::list;
|
using std::list;
|
||||||
using std::pair;
|
using std::pair;
|
||||||
using std::set;
|
using std::set;
|
||||||
#endif /* NO_NAMESPACES */
|
|
||||||
|
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
@ -51,28 +49,30 @@ static string vecStringToString(const vector<string>& t)
|
|||||||
return sterms;
|
return sterms;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Text splitter callback used to take note of the position of query terms
|
// Text splitter used to take note of the position of query terms
|
||||||
// inside the result text. This is then used to insert highlight tags.
|
// inside the result text. This is then used to insert highlight tags.
|
||||||
class TextSplitPTR : public TextSplit {
|
class TextSplitPTR : public TextSplit {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
// Out: begin and end byte positions of query terms/groups in text
|
// Out: begin and end byte positions of query terms/groups in text
|
||||||
vector<pair<int, int> > tboffs;
|
vector<pair<int, int> > tboffs;
|
||||||
|
|
||||||
TextSplitPTR(const vector<string>& its,
|
TextSplitPTR(const HighlightData& hdata)
|
||||||
const vector<vector<string> >&groups,
|
: m_wcount(0), m_hdata(hdata)
|
||||||
const vector<int>& slacks)
|
|
||||||
: m_wcount(0), m_groups(groups), m_slacks(slacks)
|
|
||||||
{
|
{
|
||||||
for (vector<string>::const_iterator it = its.begin();
|
// We separate single terms and groups and extract the group
|
||||||
it != its.end(); it++) {
|
// terms for computing positions list before looking for group
|
||||||
m_terms.insert(*it);
|
// matches
|
||||||
}
|
|
||||||
for (vector<vector<string> >::const_iterator vit = m_groups.begin();
|
for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
|
||||||
vit != m_groups.end(); vit++) {
|
vit != hdata.groups.end(); vit++) {
|
||||||
for (vector<string>::const_iterator it = (*vit).begin();
|
if (vit->size() == 1) {
|
||||||
it != (*vit).end(); it++) {
|
m_terms.insert(vit->front());
|
||||||
m_gterms.insert(*it);
|
} else if (vit->size() > 1) {
|
||||||
|
for (vector<string>::const_iterator it = vit->begin();
|
||||||
|
it != vit->end(); it++) {
|
||||||
|
m_gterms.insert(*it);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -116,15 +116,16 @@ class TextSplitPTR : public TextSplit {
|
|||||||
private:
|
private:
|
||||||
virtual bool matchGroup(const vector<string>& terms, int dist);
|
virtual bool matchGroup(const vector<string>& terms, int dist);
|
||||||
|
|
||||||
|
// Word count. Used to call checkCancel from time to time.
|
||||||
int m_wcount;
|
int m_wcount;
|
||||||
|
|
||||||
// In: user query terms
|
// In: user query terms
|
||||||
set<string> m_terms;
|
set<string> m_terms;
|
||||||
|
|
||||||
// In: user query groups, for near/phrase searches.
|
// m_gterms holds all the terms in m_groups, as a set for quick lookup
|
||||||
const vector<vector<string> >& m_groups;
|
set<string> m_gterms;
|
||||||
const vector<int>& m_slacks;
|
|
||||||
set<string> m_gterms;
|
const HighlightData& m_hdata;
|
||||||
|
|
||||||
// group/near terms word positions.
|
// group/near terms word positions.
|
||||||
map<string, vector<int> > m_plists;
|
map<string, vector<int> > m_plists;
|
||||||
@ -294,10 +295,11 @@ public:
|
|||||||
// handle all groups as NEAR (ignore order).
|
// handle all groups as NEAR (ignore order).
|
||||||
bool TextSplitPTR::matchGroups()
|
bool TextSplitPTR::matchGroups()
|
||||||
{
|
{
|
||||||
vector<vector<string> >::const_iterator vit = m_groups.begin();
|
for (unsigned int i = 0; i < m_hdata.groups.size(); i++) {
|
||||||
vector<int>::const_iterator sit = m_slacks.begin();
|
if (m_hdata.groups[i].size() <= 1)
|
||||||
for (; vit != m_groups.end() && sit != m_slacks.end(); vit++, sit++) {
|
continue;
|
||||||
matchGroup(*vit, *sit + (*vit).size());
|
matchGroup(m_hdata.groups[i],
|
||||||
|
m_hdata.groups[i].size() + m_hdata.slacks[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort regions by increasing start and decreasing width.
|
// Sort regions by increasing start and decreasing width.
|
||||||
@ -317,39 +319,22 @@ bool TextSplitPTR::matchGroups()
|
|||||||
// the input is html, the body is always a single output chunk.
|
// the input is html, the body is always a single output chunk.
|
||||||
bool PlainToRich::plaintorich(const string& in,
|
bool PlainToRich::plaintorich(const string& in,
|
||||||
list<string>& out, // Output chunk list
|
list<string>& out, // Output chunk list
|
||||||
const HiliteData& hdata,
|
const HighlightData& hdata,
|
||||||
int chunksize)
|
int chunksize)
|
||||||
{
|
{
|
||||||
Chrono chron;
|
Chrono chron;
|
||||||
const vector<string>& terms(hdata.terms);
|
|
||||||
const vector<vector<string> >& groups(hdata.groups);
|
|
||||||
const vector<int>& slacks(hdata.gslks);
|
|
||||||
|
|
||||||
if (0 && DebugLog::getdbl()->getlevel() >= DEBDEB0) {
|
|
||||||
string sterms = vecStringToString(terms);
|
|
||||||
LOGDEB0(("plaintorich: terms: %s\n", sterms.c_str()));
|
|
||||||
sterms.clear();
|
|
||||||
for (vector<vector<string> >::const_iterator vit = groups.begin();
|
|
||||||
vit != groups.end(); vit++) {
|
|
||||||
sterms += "GROUP: ";
|
|
||||||
sterms += vecStringToString(*vit);
|
|
||||||
sterms += "\n";
|
|
||||||
}
|
|
||||||
LOGDEB0(("plaintorich: groups:\n %s", sterms.c_str()));
|
|
||||||
LOGDEB2((" TEXT:[%s]\n", in.c_str()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute the positions for the query terms. We use the text
|
// Compute the positions for the query terms. We use the text
|
||||||
// splitter to break the text into words, and compare the words to
|
// splitter to break the text into words, and compare the words to
|
||||||
// the search terms,
|
// the search terms,
|
||||||
TextSplitPTR splitter(terms, groups, slacks);
|
TextSplitPTR splitter(hdata);
|
||||||
// Note: the splitter returns the term locations in byte, not
|
// Note: the splitter returns the term locations in byte, not
|
||||||
// character, offsets.
|
// character, offsets.
|
||||||
splitter.text_to_words(in);
|
splitter.text_to_words(in);
|
||||||
LOGDEB2(("plaintorich: split done %d mS\n", chron.millis()));
|
LOGDEB2(("plaintorich: split done %d mS\n", chron.millis()));
|
||||||
|
|
||||||
// Compute the positions for NEAR and PHRASE groups.
|
// Compute the positions for NEAR and PHRASE groups.
|
||||||
splitter.matchGroups();
|
splitter.matchGroups();
|
||||||
|
LOGDEB2(("plaintorich: group match done %d mS\n", chron.millis()));
|
||||||
|
|
||||||
out.clear();
|
out.clear();
|
||||||
out.push_back("");
|
out.push_back("");
|
||||||
|
|||||||
@ -19,26 +19,8 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
using std::list;
|
|
||||||
using std::string;
|
|
||||||
|
|
||||||
/// Holder for plaintorich() input data: words and groups of words to
|
#include "hldata.h"
|
||||||
/// be highlighted
|
|
||||||
struct HiliteData {
|
|
||||||
// Single terms
|
|
||||||
vector<string> terms;
|
|
||||||
// NEAR and PHRASE elements
|
|
||||||
vector<vector<string> > groups;
|
|
||||||
// Group slacks (number of permitted non-matched words).
|
|
||||||
// Parallel vector to the above 'groups'
|
|
||||||
vector<int> gslks;
|
|
||||||
void reset()
|
|
||||||
{
|
|
||||||
terms.clear();
|
|
||||||
groups.clear();
|
|
||||||
gslks.clear();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class for highlighting search results. Overridable methods allow
|
* A class for highlighting search results. Overridable methods allow
|
||||||
@ -83,21 +65,21 @@ public:
|
|||||||
* lowercase and unaccented.
|
* lowercase and unaccented.
|
||||||
* @param chunksize max size of chunks in output list
|
* @param chunksize max size of chunks in output list
|
||||||
*/
|
*/
|
||||||
virtual bool plaintorich(const string &in, list<string> &out,
|
virtual bool plaintorich(const std::string &in, std::list<std::string> &out,
|
||||||
const HiliteData& hdata,
|
const HighlightData& hdata,
|
||||||
int chunksize = 50000
|
int chunksize = 50000
|
||||||
);
|
);
|
||||||
|
|
||||||
/* Overridable output methods for headers, highlighting and marking tags */
|
/* Overridable output methods for headers, highlighting and marking tags */
|
||||||
virtual string header() {return snull;}
|
virtual std::string header() {return snull;}
|
||||||
virtual string startMatch() {return snull;}
|
virtual std::string startMatch() {return snull;}
|
||||||
virtual string endMatch() {return snull;}
|
virtual std::string endMatch() {return snull;}
|
||||||
virtual string startAnchor(int) {return snull;}
|
virtual std::string startAnchor(int) {return snull;}
|
||||||
virtual string endAnchor() {return snull;}
|
virtual std::string endAnchor() {return snull;}
|
||||||
virtual string startChunk() {return snull;}
|
virtual std::string startChunk() {return snull;}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const string snull;
|
const std::string snull;
|
||||||
bool m_inputhtml;
|
bool m_inputhtml;
|
||||||
// Use <br> to break plain text lines (else caller has used a <pre> tag)
|
// Use <br> to break plain text lines (else caller has used a <pre> tag)
|
||||||
bool m_eolbr;
|
bool m_eolbr;
|
||||||
|
|||||||
@ -132,9 +132,8 @@ void ResListPager::resultPageFor(int docnum)
|
|||||||
m_respage = npage;
|
m_respage = npage;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResListPager::displayDoc(RclConfig *config,
|
void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
|
||||||
int i, Rcl::Doc& doc, const HiliteData& hdata,
|
const HighlightData& hdata, const string& sh)
|
||||||
const string& sh)
|
|
||||||
{
|
{
|
||||||
ostringstream chunk;
|
ostringstream chunk;
|
||||||
int percent;
|
int percent;
|
||||||
@ -309,8 +308,9 @@ void ResListPager::displayPage(RclConfig *config)
|
|||||||
|
|
||||||
if (pageEmpty()) {
|
if (pageEmpty()) {
|
||||||
chunk << trans("<p><b>No results found</b><br>");
|
chunk << trans("<p><b>No results found</b><br>");
|
||||||
vector<string>uterms;
|
HighlightData hldata;
|
||||||
m_docSource->getUTerms(uterms);
|
m_docSource->getTerms(hldata);
|
||||||
|
vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
|
||||||
if (!uterms.empty()) {
|
if (!uterms.empty()) {
|
||||||
map<string, vector<string> > spellings;
|
map<string, vector<string> > spellings;
|
||||||
suggest(uterms, spellings);
|
suggest(uterms, spellings);
|
||||||
@ -366,8 +366,8 @@ void ResListPager::displayPage(RclConfig *config)
|
|||||||
if (pageEmpty())
|
if (pageEmpty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
HiliteData hdata;
|
HighlightData hdata;
|
||||||
m_docSource->getTerms(hdata.terms, hdata.groups, hdata.gslks);
|
m_docSource->getTerms(hdata);
|
||||||
|
|
||||||
// Emit data for result entry paragraph. Do it in chunks that make sense
|
// Emit data for result entry paragraph. Do it in chunks that make sense
|
||||||
// html-wise, else our client may get confused
|
// html-wise, else our client may get confused
|
||||||
|
|||||||
@ -19,14 +19,13 @@
|
|||||||
#define _reslistpager_h_included_
|
#define _reslistpager_h_included_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
using std::vector;
|
|
||||||
|
|
||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
#include "docseq.h"
|
#include "docseq.h"
|
||||||
|
#include "hldata.h"
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
class PlainToRich;
|
class PlainToRich;
|
||||||
struct HiliteData;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Manage a paged HTML result list.
|
* Manage a paged HTML result list.
|
||||||
@ -85,7 +84,7 @@ public:
|
|||||||
void resultPageFor(int docnum);
|
void resultPageFor(int docnum);
|
||||||
void displayPage(RclConfig *);
|
void displayPage(RclConfig *);
|
||||||
void displayDoc(RclConfig *, int idx, Rcl::Doc& doc,
|
void displayDoc(RclConfig *, int idx, Rcl::Doc& doc,
|
||||||
const HiliteData& hdata, const string& sh = "");
|
const HighlightData& hdata, const string& sh = "");
|
||||||
bool pageEmpty() {return m_respage.size() == 0;}
|
bool pageEmpty() {return m_respage.size() == 0;}
|
||||||
|
|
||||||
string queryDescription() {return m_docSource.isNull() ? "" :
|
string queryDescription() {return m_docSource.isNull() ? "" :
|
||||||
@ -112,8 +111,9 @@ public:
|
|||||||
virtual string pageTop() {return string();}
|
virtual string pageTop() {return string();}
|
||||||
virtual string headerContent() {return string();}
|
virtual string headerContent() {return string();}
|
||||||
virtual string iconUrl(RclConfig *, Rcl::Doc& doc);
|
virtual string iconUrl(RclConfig *, Rcl::Doc& doc);
|
||||||
virtual void suggest(const vector<string>,
|
virtual void suggest(const std::vector<std::string>,
|
||||||
map<string, vector<string> >& sugg) {
|
std::map<std::string, std::vector<std::string> >& sugg)
|
||||||
|
{
|
||||||
sugg.clear();
|
sugg.clear();
|
||||||
}
|
}
|
||||||
virtual string absSep() {return "…";}
|
virtual string absSep() {return "…";}
|
||||||
@ -126,7 +126,7 @@ private:
|
|||||||
bool m_hasNext;
|
bool m_hasNext;
|
||||||
PlainToRich *m_hiliter;
|
PlainToRich *m_hiliter;
|
||||||
RefCntr<DocSequence> m_docSource;
|
RefCntr<DocSequence> m_docSource;
|
||||||
vector<ResListEntry> m_respage;
|
std::vector<ResListEntry> m_respage;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _reslistpager_h_included_ */
|
#endif /* _reslistpager_h_included_ */
|
||||||
|
|||||||
@ -498,23 +498,12 @@ bool SearchData::fileNameOnly()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract all terms and term groups
|
// Extract all term data
|
||||||
bool SearchData::getTerms(vector<string>& terms,
|
void SearchData::getTerms(HighlightData &hld) const
|
||||||
vector<vector<string> >& groups,
|
|
||||||
vector<int>& gslks) const
|
|
||||||
{
|
{
|
||||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
|
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
|
||||||
(*it)->getTerms(terms, groups, gslks);
|
(*it)->getTerms(hld);
|
||||||
return true;
|
return;
|
||||||
}
|
|
||||||
// Extract user terms
|
|
||||||
void SearchData::getUTerms(vector<string>& terms) const
|
|
||||||
{
|
|
||||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
|
|
||||||
(*it)->getUTerms(terms);
|
|
||||||
sort(terms.begin(), terms.end());
|
|
||||||
vector<string>::iterator it = unique(terms.begin(), terms.end());
|
|
||||||
terms.erase(it, terms.end());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Splitter callback for breaking a user string into simple terms and
|
// Splitter callback for breaking a user string into simple terms and
|
||||||
@ -590,10 +579,10 @@ private:
|
|||||||
// translating.
|
// translating.
|
||||||
class StringToXapianQ {
|
class StringToXapianQ {
|
||||||
public:
|
public:
|
||||||
StringToXapianQ(Db& db, const string& field,
|
StringToXapianQ(Db& db, HighlightData& hld, const string& field,
|
||||||
const string &stmlng, bool boostUser)
|
const string &stmlng, bool boostUser)
|
||||||
: m_db(db), m_field(field), m_stemlang(stmlng),
|
: m_db(db), m_field(field), m_stemlang(stmlng),
|
||||||
m_doBoostUserTerms(boostUser)
|
m_doBoostUserTerms(boostUser), m_hld(hld)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
bool processUserString(const string &iq,
|
bool processUserString(const string &iq,
|
||||||
@ -601,20 +590,6 @@ public:
|
|||||||
vector<Xapian::Query> &pqueries,
|
vector<Xapian::Query> &pqueries,
|
||||||
const StopList &stops,
|
const StopList &stops,
|
||||||
int slack = 0, bool useNear = false);
|
int slack = 0, bool useNear = false);
|
||||||
// After processing the string: return search terms and term
|
|
||||||
// groups (ie: for highlighting)
|
|
||||||
bool getTerms(vector<string>& terms, vector<vector<string> >& groups)
|
|
||||||
{
|
|
||||||
terms.insert(terms.end(), m_terms.begin(), m_terms.end());
|
|
||||||
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
bool getUTerms(vector<string>& terms)
|
|
||||||
{
|
|
||||||
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void expandTerm(bool dont, const string& term, vector<string>& exp,
|
void expandTerm(bool dont, const string& term, vector<string>& exp,
|
||||||
string& sterm, const string& prefix);
|
string& sterm, const string& prefix);
|
||||||
@ -630,10 +605,7 @@ private:
|
|||||||
const string& m_field;
|
const string& m_field;
|
||||||
const string& m_stemlang;
|
const string& m_stemlang;
|
||||||
bool m_doBoostUserTerms;
|
bool m_doBoostUserTerms;
|
||||||
// Single terms and phrases resulting from breaking up text;
|
HighlightData& m_hld;
|
||||||
vector<string> m_uterms;
|
|
||||||
vector<string> m_terms;
|
|
||||||
vector<vector<string> > m_groups;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
@ -647,7 +619,7 @@ static void listVector(const string& what, const vector<string>&l)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** Expand stem and wildcards
|
/** Take simple term and expand stem and wildcards
|
||||||
*
|
*
|
||||||
* @param nostemexp don't perform stem expansion. This is mainly used to
|
* @param nostemexp don't perform stem expansion. This is mainly used to
|
||||||
* prevent stem expansion inside phrases (because the user probably
|
* prevent stem expansion inside phrases (because the user probably
|
||||||
@ -680,9 +652,11 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
|||||||
nostemexp = true;
|
nostemexp = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!haswild)
|
||||||
|
m_hld.uterms.insert(term);
|
||||||
|
|
||||||
if (nostemexp && !haswild) {
|
if (nostemexp && !haswild) {
|
||||||
sterm = term;
|
sterm = term;
|
||||||
m_uterms.push_back(sterm);
|
|
||||||
exp.resize(1);
|
exp.resize(1);
|
||||||
exp[0] = prefix + term;
|
exp[0] = prefix + term;
|
||||||
} else {
|
} else {
|
||||||
@ -692,7 +666,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
|||||||
m_field);
|
m_field);
|
||||||
} else {
|
} else {
|
||||||
sterm = term;
|
sterm = term;
|
||||||
m_uterms.push_back(sterm);
|
|
||||||
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
|
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
|
||||||
m_field);
|
m_field);
|
||||||
}
|
}
|
||||||
@ -701,7 +674,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
|||||||
exp.push_back(it->term);
|
exp.push_back(it->term);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//listVector("ExpandTerm:uterms now: ", m_uterms);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
|
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
|
||||||
@ -753,12 +725,15 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
expandTerm(nostemexp, span, exp, sterm, prefix);
|
expandTerm(nostemexp, span, exp, sterm, prefix);
|
||||||
|
|
||||||
// m_terms is used for highlighting, we don't want prefixes in there.
|
// Set up the highlight data. No prefix should go in there
|
||||||
for (vector<string>::const_iterator it = exp.begin();
|
for (vector<string>::const_iterator it = exp.begin();
|
||||||
it != exp.end(); it++) {
|
it != exp.end(); it++) {
|
||||||
m_terms.push_back(it->substr(prefix.size()));
|
m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
|
||||||
|
m_hld.slacks.push_back(0);
|
||||||
|
m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Push either term or OR of stem-expanded set
|
// Push either term or OR of stem-expanded set
|
||||||
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
|
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
|
||||||
|
|
||||||
@ -786,7 +761,9 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|||||||
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
||||||
Xapian::Query::OP_PHRASE;
|
Xapian::Query::OP_PHRASE;
|
||||||
vector<Xapian::Query> orqueries;
|
vector<Xapian::Query> orqueries;
|
||||||
|
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
||||||
bool hadmultiple = false;
|
bool hadmultiple = false;
|
||||||
|
#endif
|
||||||
vector<vector<string> >groups;
|
vector<vector<string> >groups;
|
||||||
|
|
||||||
string prefix;
|
string prefix;
|
||||||
@ -805,15 +782,19 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|||||||
for (vector<string>::iterator it = splitData->terms.begin();
|
for (vector<string>::iterator it = splitData->terms.begin();
|
||||||
it != splitData->terms.end(); it++, nxit++) {
|
it != splitData->terms.end(); it++, nxit++) {
|
||||||
LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
|
LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
|
||||||
// Adjust when we do stem expansion. Not inside phrases, and
|
// Adjust when we do stem expansion. Not if disabled by
|
||||||
// some versions of xapian will accept only one OR clause
|
// caller, not inside phrases, and some versions of xapian
|
||||||
// inside NEAR, all others must be leafs.
|
// will accept only one OR clause inside NEAR.
|
||||||
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) || hadmultiple;
|
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
|
||||||
|
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
||||||
|
|| hadmultiple
|
||||||
|
#endif // single OR inside NEAR
|
||||||
|
;
|
||||||
|
|
||||||
string sterm;
|
string sterm;
|
||||||
vector<string> exp;
|
vector<string> exp;
|
||||||
expandTerm(nostemexp, *it, exp, sterm, prefix);
|
expandTerm(nostemexp, *it, exp, sterm, prefix);
|
||||||
LOGDEB0(("ProcessPhrase: exp size %d\n", exp.size()));
|
LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
|
||||||
listVector("", exp);
|
listVector("", exp);
|
||||||
// groups is used for highlighting, we don't want prefixes in there.
|
// groups is used for highlighting, we don't want prefixes in there.
|
||||||
vector<string> noprefs;
|
vector<string> noprefs;
|
||||||
@ -850,7 +831,13 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|||||||
vector<vector<string> > allcombs;
|
vector<vector<string> > allcombs;
|
||||||
vector<string> comb;
|
vector<string> comb;
|
||||||
multiply_groups(groups.begin(), groups.end(), comb, allcombs);
|
multiply_groups(groups.begin(), groups.end(), comb, allcombs);
|
||||||
m_groups.insert(m_groups.end(), allcombs.begin(), allcombs.end());
|
|
||||||
|
// Insert the search groups and slacks in the highlight data, with
|
||||||
|
// a reference to the user entry that generated them:
|
||||||
|
m_hld.groups.insert(m_hld.groups.end(), allcombs.begin(), allcombs.end());
|
||||||
|
m_hld.slacks.insert(m_hld.slacks.end(), allcombs.size(), slack);
|
||||||
|
m_hld.grpsugidx.insert(m_hld.grpsugidx.end(), allcombs.size(),
|
||||||
|
m_hld.ugroups.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trim string beginning with ^ or ending with $ and convert to flags
|
// Trim string beginning with ^ or ending with $ and convert to flags
|
||||||
@ -875,7 +862,16 @@ static int stringToMods(string& s)
|
|||||||
* We just separate words and phrases, and do wildcard and stem expansion,
|
* We just separate words and phrases, and do wildcard and stem expansion,
|
||||||
*
|
*
|
||||||
* This is used to process data entered into an OR/AND/NEAR/PHRASE field of
|
* This is used to process data entered into an OR/AND/NEAR/PHRASE field of
|
||||||
* the GUI.
|
* the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
|
||||||
|
* entry).
|
||||||
|
*
|
||||||
|
* This appears awful, and it would seem that the split into
|
||||||
|
* terms/phrases should be performed in the upper layer so that we
|
||||||
|
* only receive pure term or near/phrase pure elements here, but in
|
||||||
|
* fact there are things that would appear like terms to naive code,
|
||||||
|
* and which will actually may be turned into phrases (ie: tom:jerry),
|
||||||
|
* in a manner which intimately depends on the index implementation,
|
||||||
|
* so that it makes sense to process this here.
|
||||||
*
|
*
|
||||||
* The final list contains one query for each term or phrase
|
* The final list contains one query for each term or phrase
|
||||||
* - Elements corresponding to a stem-expanded part are an OP_OR
|
* - Elements corresponding to a stem-expanded part are an OP_OR
|
||||||
@ -895,9 +891,6 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
{
|
{
|
||||||
LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear));
|
LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear));
|
||||||
ermsg.erase();
|
ermsg.erase();
|
||||||
m_uterms.clear();
|
|
||||||
m_terms.clear();
|
|
||||||
m_groups.clear();
|
|
||||||
|
|
||||||
// Simple whitespace-split input into user-level words and
|
// Simple whitespace-split input into user-level words and
|
||||||
// double-quoted phrases: word1 word2 "this is a phrase".
|
// double-quoted phrases: word1 word2 "this is a phrase".
|
||||||
@ -952,10 +945,12 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
case 0:
|
case 0:
|
||||||
continue;// ??
|
continue;// ??
|
||||||
case 1:
|
case 1:
|
||||||
|
m_hld.ugroups.push_back(vector<string>(1, *it));
|
||||||
processSimpleSpan(splitter.terms.front(),
|
processSimpleSpan(splitter.terms.front(),
|
||||||
splitter.nostemexps.front(), pqueries);
|
splitter.nostemexps.front(), pqueries);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
m_hld.ugroups.push_back(vector<string>(1, *it));
|
||||||
processPhraseOrNear(&splitter, pqueries, useNear, slack, mods);
|
processPhraseOrNear(&splitter, pqueries, useNear, slack, mods);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -984,8 +979,6 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
|
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
|
||||||
stemlang.c_str()));
|
stemlang.c_str()));
|
||||||
|
|
||||||
m_terms.clear();
|
|
||||||
m_groups.clear();
|
|
||||||
Xapian::Query *qp = (Xapian::Query *)p;
|
Xapian::Query *qp = (Xapian::Query *)p;
|
||||||
*qp = Xapian::Query();
|
*qp = Xapian::Query();
|
||||||
|
|
||||||
@ -1007,16 +1000,14 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
|
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
|
||||||
(m_parentSearch == 0 && !m_haveWildCards);
|
(m_parentSearch == 0 && !m_haveWildCards);
|
||||||
|
|
||||||
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
|
StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
|
||||||
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
|
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
|
||||||
return false;
|
return false;
|
||||||
if (pqueries.empty()) {
|
if (pqueries.empty()) {
|
||||||
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
tr.getTerms(m_terms, m_groups);
|
|
||||||
tr.getUTerms(m_uterms);
|
|
||||||
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
|
|
||||||
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
||||||
if (m_weight != 1.0) {
|
if (m_weight != 1.0) {
|
||||||
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||||
@ -1056,8 +1047,6 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
||||||
stemlang;
|
stemlang;
|
||||||
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
||||||
m_terms.clear();
|
|
||||||
m_groups.clear();
|
|
||||||
|
|
||||||
Xapian::Query *qp = (Xapian::Query *)p;
|
Xapian::Query *qp = (Xapian::Query *)p;
|
||||||
*qp = Xapian::Query();
|
*qp = Xapian::Query();
|
||||||
@ -1080,7 +1069,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
}
|
}
|
||||||
string s = cstr_dquote + m_text + cstr_dquote;
|
string s = cstr_dquote + m_text + cstr_dquote;
|
||||||
bool useNear = (m_tp == SCLT_NEAR);
|
bool useNear = (m_tp == SCLT_NEAR);
|
||||||
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
|
StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
|
||||||
if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(),
|
if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(),
|
||||||
m_slack, useNear))
|
m_slack, useNear))
|
||||||
return false;
|
return false;
|
||||||
@ -1088,8 +1077,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
|
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
tr.getTerms(m_terms, m_groups);
|
|
||||||
tr.getUTerms(m_uterms);
|
|
||||||
*qp = *pqueries.begin();
|
*qp = *pqueries.begin();
|
||||||
if (m_weight != 1.0) {
|
if (m_weight != 1.0) {
|
||||||
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||||
@ -1097,21 +1085,4 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Translate subquery
|
|
||||||
bool SearchDataClauseSub::toNativeQuery(Rcl::Db &db, void *p, const string&)
|
|
||||||
{
|
|
||||||
return m_sub->toNativeQuery(db, p);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SearchDataClauseSub::getTerms(vector<string>& terms,
|
|
||||||
vector<vector<string> >& groups,
|
|
||||||
vector<int>& gslks) const
|
|
||||||
{
|
|
||||||
return m_sub.getconstptr()->getTerms(terms, groups, gslks);
|
|
||||||
}
|
|
||||||
void SearchDataClauseSub::getUTerms(vector<string>& terms) const
|
|
||||||
{
|
|
||||||
m_sub.getconstptr()->getUTerms(terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // Namespace Rcl
|
} // Namespace Rcl
|
||||||
|
|||||||
@ -31,14 +31,11 @@
|
|||||||
#include "refcntr.h"
|
#include "refcntr.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "cstr.h"
|
#include "cstr.h"
|
||||||
|
#include "hldata.h"
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
|
||||||
using std::vector;
|
|
||||||
using std::string;
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
#endif // NO_NAMESPACES
|
|
||||||
|
|
||||||
/** Search clause types */
|
/** Search clause types */
|
||||||
enum SClType {
|
enum SClType {
|
||||||
@ -50,33 +47,33 @@ enum SClType {
|
|||||||
class SearchDataClause;
|
class SearchDataClause;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Data structure representing a Recoll user query, for translation
|
Data structure representing a Recoll user query, for translation
|
||||||
into a Xapian query tree. This could probably better called a 'question'.
|
into a Xapian query tree. This could probably better called a 'question'.
|
||||||
|
|
||||||
This is a list of search clauses combined through either OR or AND.
|
This is a list of search clauses combined through either OR or AND.
|
||||||
|
|
||||||
Clauses either reflect user entry in a query field: some text, a
|
Clauses either reflect user entry in a query field: some text, a
|
||||||
clause type (AND/OR/NEAR etc.), possibly a distance, or points to
|
clause type (AND/OR/NEAR etc.), possibly a distance, or points to
|
||||||
another SearchData representing a subquery.
|
another SearchData representing a subquery.
|
||||||
|
|
||||||
The content of each clause when added may not be fully parsed yet
|
The content of each clause when added may not be fully parsed yet
|
||||||
(may come directly from a gui field). It will be parsed and may be
|
(may come directly from a gui field). It will be parsed and may be
|
||||||
translated to several queries in the Xapian sense, for exemple
|
translated to several queries in the Xapian sense, for exemple
|
||||||
several terms and phrases as would result from
|
several terms and phrases as would result from
|
||||||
["this is a phrase" term1 term2] .
|
["this is a phrase" term1 term2] .
|
||||||
|
|
||||||
This is why the clauses also have an AND/OR/... type.
|
This is why the clauses also have an AND/OR/... type.
|
||||||
|
|
||||||
A phrase clause could be added either explicitly or using double quotes:
|
A phrase clause could be added either explicitly or using double quotes:
|
||||||
{SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
|
{SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
|
||||||
|
|
||||||
*/
|
*/
|
||||||
class SearchData {
|
class SearchData {
|
||||||
public:
|
public:
|
||||||
SearchData(SClType tp)
|
SearchData(SClType tp)
|
||||||
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
|
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
|
||||||
m_haveDates(false), m_maxSize(size_t(-1)),
|
m_haveDates(false), m_maxSize(size_t(-1)),
|
||||||
m_minSize(size_t(-1)), m_haveWildCards(false)
|
m_minSize(size_t(-1)), m_haveWildCards(false)
|
||||||
{
|
{
|
||||||
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
||||||
m_tp = SCLT_OR;
|
m_tp = SCLT_OR;
|
||||||
@ -108,7 +105,7 @@ public:
|
|||||||
bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
|
bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
|
||||||
|
|
||||||
/** Set/get top subdirectory for filtering results */
|
/** Set/get top subdirectory for filtering results */
|
||||||
void setTopdir(const string& t, bool excl = false, float w = 1.0)
|
void setTopdir(const std::string& t, bool excl = false, float w = 1.0)
|
||||||
{
|
{
|
||||||
m_topdir = t;
|
m_topdir = t;
|
||||||
m_topdirexcl = excl;
|
m_topdirexcl = excl;
|
||||||
@ -122,38 +119,37 @@ public:
|
|||||||
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
||||||
|
|
||||||
/** Add file type for filtering results */
|
/** Add file type for filtering results */
|
||||||
void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
|
void addFiletype(const std::string& ft) {m_filetypes.push_back(ft);}
|
||||||
/** Add file type to not wanted list */
|
/** Add file type to not wanted list */
|
||||||
void remFiletype(const string& ft) {m_nfiletypes.push_back(ft);}
|
void remFiletype(const std::string& ft) {m_nfiletypes.push_back(ft);}
|
||||||
|
|
||||||
void setStemlang(const string& lang = "english") {m_stemlang = lang;}
|
void setStemlang(const std::string& lang = "english") {m_stemlang = lang;}
|
||||||
|
|
||||||
/** Retrieve error description */
|
/** Retrieve error description */
|
||||||
string getReason() {return m_reason;}
|
std::string getReason() {return m_reason;}
|
||||||
|
|
||||||
/** Get terms and phrase/near groups. Used in the GUI for highlighting
|
/** Return term expansion data. Mostly used by caller for highlighting
|
||||||
* The groups and gslks vectors are parallel and hold the phrases/near
|
|
||||||
* string groups and their associated slacks (distance in excess of group
|
|
||||||
* size)
|
|
||||||
*/
|
*/
|
||||||
bool getTerms(vector<string>& terms,
|
void getTerms(HighlightData& hldata) const;
|
||||||
vector<vector<string> >& groups, vector<int>& gslks) const;
|
|
||||||
/** Get user-input terms (before expansion etc.) */
|
|
||||||
void getUTerms(vector<string>& terms) const;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get/set the description field which is retrieved from xapian after
|
* Get/set the description field which is retrieved from xapian after
|
||||||
* initializing the query. It is stored here for usage in the GUI.
|
* initializing the query. It is stored here for usage in the GUI.
|
||||||
*/
|
*/
|
||||||
string getDescription() {return m_description;}
|
std::string getDescription() {return m_description;}
|
||||||
void setDescription(const string& d) {m_description = d;}
|
void setDescription(const std::string& d) {m_description = d;}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SClType m_tp; // Only SCLT_AND or SCLT_OR here
|
// Combine type. Only SCLT_AND or SCLT_OR here
|
||||||
vector<SearchDataClause*> m_query;
|
SClType m_tp;
|
||||||
vector<string> m_filetypes; // Restrict to filetypes if set.
|
// Complex query descriptor
|
||||||
vector<string> m_nfiletypes; // Unwanted file types
|
std::vector<SearchDataClause*> m_query;
|
||||||
string m_topdir; // Restrict to subtree.
|
// Restricted set of filetypes if not empty.
|
||||||
|
std::vector<std::string> m_filetypes;
|
||||||
|
// Excluded set of file types if not empty
|
||||||
|
std::vector<std::string> m_nfiletypes;
|
||||||
|
// Restrict to subtree.
|
||||||
|
std::string m_topdir;
|
||||||
bool m_topdirexcl; // Invert meaning
|
bool m_topdirexcl; // Invert meaning
|
||||||
float m_topdirweight; // affect weight instead of filter
|
float m_topdirweight; // affect weight instead of filter
|
||||||
bool m_haveDates;
|
bool m_haveDates;
|
||||||
@ -162,11 +158,11 @@ private:
|
|||||||
size_t m_minSize;
|
size_t m_minSize;
|
||||||
// Printable expanded version of the complete query, retrieved/set
|
// Printable expanded version of the complete query, retrieved/set
|
||||||
// from rcldb after the Xapian::setQuery() call
|
// from rcldb after the Xapian::setQuery() call
|
||||||
string m_description;
|
std::string m_description;
|
||||||
string m_reason;
|
std::string m_reason;
|
||||||
bool m_haveWildCards;
|
bool m_haveWildCards;
|
||||||
string m_stemlang;
|
std::string m_stemlang;
|
||||||
bool expandFileTypes(RclConfig *cfg, vector<string>& exptps);
|
bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
|
||||||
/* Copyconst and assignment private and forbidden */
|
/* Copyconst and assignment private and forbidden */
|
||||||
SearchData(const SearchData &) {}
|
SearchData(const SearchData &) {}
|
||||||
SearchData& operator=(const SearchData&) {return *this;};
|
SearchData& operator=(const SearchData&) {return *this;};
|
||||||
@ -178,39 +174,56 @@ public:
|
|||||||
SDCM_ANCHOREND=4};
|
SDCM_ANCHOREND=4};
|
||||||
|
|
||||||
SearchDataClause(SClType tp)
|
SearchDataClause(SClType tp)
|
||||||
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
||||||
m_modifiers(SDCM_NONE), m_weight(1.0)
|
m_modifiers(SDCM_NONE), m_weight(1.0)
|
||||||
{}
|
{}
|
||||||
virtual ~SearchDataClause() {}
|
virtual ~SearchDataClause() {}
|
||||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
|
virtual bool toNativeQuery(Rcl::Db &db, void *, const std::string&) = 0;
|
||||||
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
|
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
|
||||||
virtual string getReason() const {return m_reason;}
|
virtual std::string getReason() const {return m_reason;}
|
||||||
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
|
virtual void getTerms(HighlightData & hldata) const = 0;
|
||||||
vector<int>&) const = 0;
|
|
||||||
virtual void getUTerms(vector<string>&) const = 0;
|
|
||||||
|
|
||||||
SClType getTp() {return m_tp;}
|
SClType getTp()
|
||||||
void setParent(SearchData *p) {m_parentSearch = p;}
|
{
|
||||||
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
|
return m_tp;
|
||||||
virtual int getModifiers() {return m_modifiers;}
|
}
|
||||||
virtual void addModifier(Modifier mod) {
|
void setParent(SearchData *p)
|
||||||
|
{
|
||||||
|
m_parentSearch = p;
|
||||||
|
}
|
||||||
|
virtual void setModifiers(Modifier mod)
|
||||||
|
{
|
||||||
|
m_modifiers = mod;
|
||||||
|
}
|
||||||
|
virtual int getModifiers()
|
||||||
|
{
|
||||||
|
return m_modifiers;
|
||||||
|
}
|
||||||
|
virtual void addModifier(Modifier mod)
|
||||||
|
{
|
||||||
int imod = getModifiers();
|
int imod = getModifiers();
|
||||||
imod |= mod;
|
imod |= mod;
|
||||||
setModifiers(Modifier(imod));
|
setModifiers(Modifier(imod));
|
||||||
}
|
}
|
||||||
virtual void setWeight(float w) {m_weight = w;}
|
virtual void setWeight(float w)
|
||||||
|
{
|
||||||
|
m_weight = w;
|
||||||
|
}
|
||||||
friend class SearchData;
|
friend class SearchData;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
string m_reason;
|
std::string m_reason;
|
||||||
SClType m_tp;
|
SClType m_tp;
|
||||||
SearchData *m_parentSearch;
|
SearchData *m_parentSearch;
|
||||||
bool m_haveWildCards;
|
bool m_haveWildCards;
|
||||||
Modifier m_modifiers;
|
Modifier m_modifiers;
|
||||||
float m_weight;
|
float m_weight;
|
||||||
private:
|
private:
|
||||||
SearchDataClause(const SearchDataClause&) {}
|
SearchDataClause(const SearchDataClause&)
|
||||||
SearchDataClause& operator=(const SearchDataClause&) {
|
{
|
||||||
|
}
|
||||||
|
SearchDataClause& operator=(const SearchDataClause&)
|
||||||
|
{
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -221,45 +234,37 @@ private:
|
|||||||
*/
|
*/
|
||||||
class SearchDataClauseSimple : public SearchDataClause {
|
class SearchDataClauseSimple : public SearchDataClause {
|
||||||
public:
|
public:
|
||||||
SearchDataClauseSimple(SClType tp, const string& txt,
|
SearchDataClauseSimple(SClType tp, const std::string& txt,
|
||||||
const string& fld = string())
|
const std::string& fld = std::string())
|
||||||
: SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) {
|
: SearchDataClause(tp), m_text(txt), m_field(fld)
|
||||||
m_haveWildCards = (txt.find_first_of(cstr_minwilds) != string::npos);
|
{
|
||||||
|
m_haveWildCards =
|
||||||
|
(txt.find_first_of(cstr_minwilds) != std::string::npos);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~SearchDataClauseSimple() {}
|
virtual ~SearchDataClauseSimple()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
/** Translate to Xapian query */
|
/** Translate to Xapian query */
|
||||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
|
||||||
|
|
||||||
/** Retrieve query terms and term groups. This is used for highlighting */
|
virtual void getTerms(HighlightData& hldata) const
|
||||||
virtual bool getTerms(vector<string>& terms, /* Single terms */
|
|
||||||
vector<vector<string> >& groups, /* Prox grps */
|
|
||||||
vector<int>& gslks) const /* Prox slacks */
|
|
||||||
{
|
{
|
||||||
terms.insert(terms.end(), m_terms.begin(), m_terms.end());
|
hldata.append(m_hldata);
|
||||||
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
|
|
||||||
gslks.insert(gslks.end(), m_groups.size(), m_slack);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
virtual void getUTerms(vector<string>& terms) const
|
virtual const std::string& gettext()
|
||||||
{
|
{
|
||||||
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
|
return m_text;
|
||||||
|
}
|
||||||
|
virtual const std::string& getfield()
|
||||||
|
{
|
||||||
|
return m_field;
|
||||||
}
|
}
|
||||||
virtual const string& gettext() {return m_text;}
|
|
||||||
virtual const string& getfield() {return m_field;}
|
|
||||||
protected:
|
protected:
|
||||||
string m_text; // Raw user entry text.
|
std::string m_text; // Raw user entry text.
|
||||||
string m_field; // Field specification if any
|
std::string m_field; // Field specification if any
|
||||||
// Single terms and phrases resulting from breaking up m_text;
|
HighlightData m_hldata;
|
||||||
// valid after toNativeQuery() call
|
|
||||||
vector<string> m_terms;
|
|
||||||
vector<vector<string> > m_groups;
|
|
||||||
// User terms before expansion
|
|
||||||
vector<string> m_uterms;
|
|
||||||
// Declare m_slack here. Always 0, but allows getTerms to work for
|
|
||||||
// SearchDataClauseDist
|
|
||||||
int m_slack;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -272,29 +277,39 @@ protected:
|
|||||||
*/
|
*/
|
||||||
class SearchDataClauseFilename : public SearchDataClauseSimple {
|
class SearchDataClauseFilename : public SearchDataClauseSimple {
|
||||||
public:
|
public:
|
||||||
SearchDataClauseFilename(const string& txt)
|
SearchDataClauseFilename(const std::string& txt)
|
||||||
: SearchDataClauseSimple(SCLT_FILENAME, txt) {
|
: SearchDataClauseSimple(SCLT_FILENAME, txt)
|
||||||
|
{
|
||||||
// File name searches don't count when looking for wild cards.
|
// File name searches don't count when looking for wild cards.
|
||||||
m_haveWildCards = false;
|
m_haveWildCards = false;
|
||||||
}
|
}
|
||||||
virtual ~SearchDataClauseFilename() {}
|
|
||||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
virtual ~SearchDataClauseFilename()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A clause coming from a NEAR or PHRASE entry field. There is only one
|
* A clause coming from a NEAR or PHRASE entry field. There is only one
|
||||||
* string group, and a specified distance, which applies to it.
|
* std::string group, and a specified distance, which applies to it.
|
||||||
*/
|
*/
|
||||||
class SearchDataClauseDist : public SearchDataClauseSimple {
|
class SearchDataClauseDist : public SearchDataClauseSimple {
|
||||||
public:
|
public:
|
||||||
SearchDataClauseDist(SClType tp, const string& txt, int slack,
|
SearchDataClauseDist(SClType tp, const std::string& txt, int slack,
|
||||||
const string& fld = string())
|
const std::string& fld = std::string())
|
||||||
: SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;}
|
: SearchDataClauseSimple(tp, txt, fld), m_slack(slack)
|
||||||
virtual ~SearchDataClauseDist() {}
|
{
|
||||||
|
}
|
||||||
|
|
||||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
virtual ~SearchDataClauseDist()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
// m_slack is declared in SearchDataClauseSimple
|
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
|
||||||
|
private:
|
||||||
|
int m_slack;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Subquery */
|
/** Subquery */
|
||||||
@ -302,15 +317,28 @@ class SearchDataClauseSub : public SearchDataClause {
|
|||||||
public:
|
public:
|
||||||
// We take charge of the SearchData * and will delete it.
|
// We take charge of the SearchData * and will delete it.
|
||||||
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
|
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
|
||||||
: SearchDataClause(tp), m_sub(sub) {}
|
: SearchDataClause(tp), m_sub(sub)
|
||||||
virtual ~SearchDataClauseSub() {}
|
{
|
||||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
}
|
||||||
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
|
|
||||||
vector<int>&) const;
|
virtual ~SearchDataClauseSub()
|
||||||
virtual void getUTerms(vector<string>&) const;
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool toNativeQuery(Rcl::Db &db, void *p, const std::string&)
|
||||||
|
{
|
||||||
|
return m_sub->toNativeQuery(db, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void getTerms(HighlightData& hldata) const
|
||||||
|
{
|
||||||
|
m_sub.getconstptr()->getTerms(hldata);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
RefCntr<SearchData> m_sub;
|
RefCntr<SearchData> m_sub;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // Namespace Rcl
|
} // Namespace Rcl
|
||||||
|
|
||||||
#endif /* _SEARCHDATA_H_INCLUDED_ */
|
#endif /* _SEARCHDATA_H_INCLUDED_ */
|
||||||
|
|||||||
54
src/utils/hldata.h
Normal file
54
src/utils/hldata.h
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
#ifndef _hldata_h_included_
|
||||||
|
#define _hldata_h_included_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
/** Store about user terms and their expansions. This is used mostly for
|
||||||
|
* highlighting result text and walking the matches.
|
||||||
|
*/
|
||||||
|
struct HighlightData {
|
||||||
|
/** The user terms, excluding those with wildcards.
|
||||||
|
* This list is intended for orthographic suggestions but the terms are
|
||||||
|
* unaccented lowercased anyway because they are compared to the dictionary
|
||||||
|
* generated from the index term list (which is unaccented).
|
||||||
|
*/
|
||||||
|
std::set<std::string> uterms;
|
||||||
|
|
||||||
|
/** The original user terms-or-groups. This is for displaying the matched
|
||||||
|
* terms or groups, ie in relation with highlighting or skipping to the
|
||||||
|
* next match. These are raw, diacritics and case preserved.
|
||||||
|
*/
|
||||||
|
std::vector<std::vector<std::string> > ugroups;
|
||||||
|
|
||||||
|
/** Processed/expanded terms and groups. Used for looking for
|
||||||
|
* regions to highlight. Terms are just groups with 1 entry. All
|
||||||
|
* terms in there are unaccented, and the list may include values
|
||||||
|
* expanded from the original terms by stem or wildcard expansion.
|
||||||
|
*/
|
||||||
|
std::vector<std::vector<std::string> > groups;
|
||||||
|
/** Group slacks. Parallel to groups */
|
||||||
|
std::vector<int> slacks;
|
||||||
|
|
||||||
|
/** Index into ugroups for each group. Parallel to groups. As a
|
||||||
|
* user term or group may generate many processed/expanded terms
|
||||||
|
* or groups, this is how we relate them
|
||||||
|
*/
|
||||||
|
std::vector<unsigned int> grpsugidx;
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
uterms.clear();
|
||||||
|
ugroups.clear();
|
||||||
|
groups.clear();
|
||||||
|
slacks.clear();
|
||||||
|
grpsugidx.clear();
|
||||||
|
}
|
||||||
|
void append(const HighlightData&);
|
||||||
|
|
||||||
|
// Print (debug)
|
||||||
|
void toString(std::string& out);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _hldata_h_included_ */
|
||||||
@ -36,6 +36,7 @@
|
|||||||
|
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "utf8iter.h"
|
#include "utf8iter.h"
|
||||||
|
#include "hldata.h"
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -1038,8 +1039,57 @@ void catstrerror(string *reason, const char *what, int _errno)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HighlightData::toString(std::string& out)
|
||||||
|
{
|
||||||
|
out.append("\nUser terms (orthograph): ");
|
||||||
|
for (std::set<std::string>::const_iterator it = uterms.begin();
|
||||||
|
it != uterms.end(); it++) {
|
||||||
|
out.append(" [").append(*it).append("]");
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
out.append("\nGroups: ");
|
||||||
|
char cbuf[200];
|
||||||
|
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
|
||||||
|
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
|
||||||
|
out.append(cbuf);
|
||||||
|
|
||||||
|
unsigned int ugidx = (unsigned int)-1;
|
||||||
|
for (unsigned int i = 0; i < groups.size(); i++) {
|
||||||
|
if (ugidx != grpsugidx[i]) {
|
||||||
|
ugidx = grpsugidx[i];
|
||||||
|
out.append("\n(");
|
||||||
|
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
|
||||||
|
out.append("[").append(ugroups[ugidx][j]).append("] ");
|
||||||
|
}
|
||||||
|
out.append(") ->");
|
||||||
|
}
|
||||||
|
out.append(" {");
|
||||||
|
for (unsigned int j = 0; j < groups[i].size(); j++) {
|
||||||
|
out.append("[").append(groups[i][j]).append("]");
|
||||||
|
}
|
||||||
|
sprintf(cbuf, "%d", slacks[i]);
|
||||||
|
out.append("}").append(cbuf);
|
||||||
|
}
|
||||||
|
out.append("\n");
|
||||||
|
fprintf(stderr, "toString ok\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void HighlightData::append(const HighlightData& hl)
|
||||||
|
{
|
||||||
|
uterms.insert(hl.uterms.begin(), hl.uterms.end());
|
||||||
|
|
||||||
|
size_t ugsz0 = ugroups.size();
|
||||||
|
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
|
||||||
|
|
||||||
|
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
|
||||||
|
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
|
||||||
|
for (std::vector<unsigned int>::const_iterator it = hl.grpsugidx.begin();
|
||||||
|
it != hl.grpsugidx.end(); it++) {
|
||||||
|
grpsugidx.push_back(*it + ugsz0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // TEST_SMALLUT
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user