defined data structure to pass around the search term description used for highlighting and other
This commit is contained in:
parent
ebdd6faaf5
commit
dc7b3420a0
@ -686,12 +686,12 @@ class LoadThread : public QThread {
|
||||
/* A thread to convert to rich text (mark search terms) */
|
||||
class ToRichThread : public QThread {
|
||||
string ∈
|
||||
const HiliteData &hdata;
|
||||
const HighlightData &hdata;
|
||||
list<string> &out;
|
||||
int loglevel;
|
||||
PlainToRichQtPreview *ptr;
|
||||
public:
|
||||
ToRichThread(string &i, const HiliteData& hd, list<string> &o,
|
||||
ToRichThread(string &i, const HighlightData& hd, list<string> &o,
|
||||
PlainToRichQtPreview *_ptr)
|
||||
: in(i), hdata(hd), out(o), ptr(_ptr)
|
||||
{
|
||||
|
||||
@ -91,7 +91,7 @@ class Preview : public QWidget {
|
||||
public:
|
||||
|
||||
Preview(int sid, // Search Id
|
||||
const HiliteData& hdata) // Search terms etc. for highlighting
|
||||
const HighlightData& hdata) // Search terms etc. for highlighting
|
||||
: QWidget(0), m_searchId(sid), m_hData(hdata)
|
||||
{
|
||||
init();
|
||||
@ -141,7 +141,7 @@ private:
|
||||
bool m_canBeep;
|
||||
bool m_loading;
|
||||
QWidget *m_currentW;
|
||||
HiliteData m_hData;
|
||||
HighlightData m_hData;
|
||||
bool m_justCreated; // First tab create is different
|
||||
bool m_haveAnchors; // Search terms are marked in text
|
||||
int m_lastAnchor; // Number of last anchor. Then rewind to 1
|
||||
|
||||
@ -1220,8 +1220,8 @@ void RclMain::startPreview(int docnum, Rcl::Doc doc, int mod)
|
||||
curPreview = 0;
|
||||
}
|
||||
if (curPreview == 0) {
|
||||
HiliteData hdata;
|
||||
m_source->getTerms(hdata.terms, hdata.groups, hdata.gslks);
|
||||
HighlightData hdata;
|
||||
m_source->getTerms(hdata);
|
||||
curPreview = new Preview(reslist->listId(), hdata);
|
||||
|
||||
if (curPreview == 0) {
|
||||
@ -1284,7 +1284,7 @@ void RclMain::updateIdxForDocs(vector<Rcl::Doc>& docs)
|
||||
*/
|
||||
void RclMain::startPreview(Rcl::Doc doc)
|
||||
{
|
||||
Preview *preview = new Preview(0, HiliteData());
|
||||
Preview *preview = new Preview(0, HighlightData());
|
||||
if (preview == 0) {
|
||||
QMessageBox::warning(0, tr("Warning"),
|
||||
tr("Can't create preview window"),
|
||||
|
||||
@ -299,8 +299,8 @@ void RecollModel::setDocSource(RefCntr<DocSequence> nsource)
|
||||
m_source = RefCntr<DocSequence>();
|
||||
} else {
|
||||
m_source = RefCntr<DocSequence>(new DocSource(theconfig, nsource));
|
||||
m_hdata.reset();
|
||||
m_source->getTerms(m_hdata.terms, m_hdata.groups, m_hdata.gslks);
|
||||
m_hdata.clear();
|
||||
m_source->getTerms(m_hdata);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -19,6 +19,10 @@
|
||||
|
||||
#include <Qt>
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "ui_restable.h"
|
||||
#include "refcntr.h"
|
||||
#include "docseq.h"
|
||||
@ -26,7 +30,7 @@
|
||||
|
||||
class ResTable;
|
||||
|
||||
typedef string (FieldGetter)(const string& fldname, const Rcl::Doc& doc);
|
||||
typedef std::string (FieldGetter)(const std::string& fldname, const Rcl::Doc& doc);
|
||||
|
||||
class RecollModel : public QAbstractTableModel {
|
||||
|
||||
@ -49,15 +53,15 @@ public:
|
||||
virtual void setDocSource(RefCntr<DocSequence> nsource);
|
||||
virtual RefCntr<DocSequence> getDocSource() {return m_source;}
|
||||
virtual void deleteColumn(int);
|
||||
virtual const vector<string>& getFields() {return m_fields;}
|
||||
virtual const map<string, QString>& getAllFields()
|
||||
virtual const std::vector<std::string>& getFields() {return m_fields;}
|
||||
virtual const std::map<std::string, QString>& getAllFields()
|
||||
{
|
||||
return o_displayableFields;
|
||||
}
|
||||
virtual void addColumn(int, const string&);
|
||||
virtual void addColumn(int, const std::string&);
|
||||
// Some column name are aliases/translator for base document field
|
||||
// (ie: date, datetime->mtime). Help deal with this:
|
||||
virtual string baseField(const string&);
|
||||
virtual std::string baseField(const std::string&);
|
||||
|
||||
// Ignore sort() call because
|
||||
virtual void setIgnoreSort(bool onoff) {m_ignoreSort = onoff;}
|
||||
@ -69,12 +73,12 @@ signals:
|
||||
|
||||
private:
|
||||
mutable RefCntr<DocSequence> m_source;
|
||||
vector<string> m_fields;
|
||||
vector<FieldGetter*> m_getters;
|
||||
static map<string, QString> o_displayableFields;
|
||||
std::vector<std::string> m_fields;
|
||||
std::vector<FieldGetter*> m_getters;
|
||||
static std::map<std::string, QString> o_displayableFields;
|
||||
bool m_ignoreSort;
|
||||
FieldGetter* chooseGetter(const string&);
|
||||
HiliteData m_hdata;
|
||||
FieldGetter* chooseGetter(const std::string&);
|
||||
HighlightData m_hdata;
|
||||
};
|
||||
|
||||
class ResTable;
|
||||
|
||||
@ -19,19 +19,16 @@
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::string;
|
||||
using std::list;
|
||||
using std::vector;
|
||||
#endif
|
||||
|
||||
|
||||
#include "rcldoc.h"
|
||||
#include "refcntr.h"
|
||||
#include "hldata.h"
|
||||
|
||||
// A result list entry.
|
||||
struct ResListEntry {
|
||||
Rcl::Doc doc;
|
||||
string subHeader;
|
||||
std::string subHeader;
|
||||
};
|
||||
|
||||
/** Sort specification. */
|
||||
@ -40,7 +37,7 @@ class DocSeqSortSpec {
|
||||
DocSeqSortSpec() : desc(false) {}
|
||||
bool isNotNull() const {return !field.empty();}
|
||||
void reset() {field.erase();}
|
||||
string field;
|
||||
std::string field;
|
||||
bool desc;
|
||||
};
|
||||
|
||||
@ -50,12 +47,12 @@ class DocSeqFiltSpec {
|
||||
public:
|
||||
DocSeqFiltSpec() {}
|
||||
enum Crit {DSFS_MIMETYPE, DSFS_QLANG, DSFS_PASSALL};
|
||||
void orCrit(Crit crit, const string& value) {
|
||||
void orCrit(Crit crit, const std::string& value) {
|
||||
crits.push_back(crit);
|
||||
values.push_back(value);
|
||||
}
|
||||
std::vector<Crit> crits;
|
||||
std::vector<string> values;
|
||||
std::vector<std::string> values;
|
||||
void reset() {crits.clear(); values.clear();}
|
||||
bool isNotNull() const {return crits.size() != 0;}
|
||||
};
|
||||
@ -73,7 +70,7 @@ class DocSeqFiltSpec {
|
||||
*/
|
||||
class DocSequence {
|
||||
public:
|
||||
DocSequence(const string &t) : m_title(t) {}
|
||||
DocSequence(const std::string &t) : m_title(t) {}
|
||||
virtual ~DocSequence() {}
|
||||
|
||||
/** Get document at given rank.
|
||||
@ -84,16 +81,17 @@ class DocSequence {
|
||||
* inside history)
|
||||
* @return true if ok, false for error or end of data
|
||||
*/
|
||||
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0) = 0;
|
||||
virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0) = 0;
|
||||
|
||||
/** Get next page of documents. This accumulates entries into the result
|
||||
* list parameter (doesn't reset it). */
|
||||
virtual int getSeqSlice(int offs, int cnt, vector<ResListEntry>& result);
|
||||
virtual int getSeqSlice(int offs, int cnt,
|
||||
std::vector<ResListEntry>& result);
|
||||
|
||||
/** Get abstract for document. This is special because it may take time.
|
||||
* The default is to return the input doc's abstract fields, but some
|
||||
* sequences can compute a better value (ie: docseqdb) */
|
||||
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs) {
|
||||
virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs) {
|
||||
abs.push_back(doc.meta[Rcl::Doc::keyabs]);
|
||||
return true;
|
||||
}
|
||||
@ -103,25 +101,21 @@ class DocSequence {
|
||||
virtual int getResCnt() = 0;
|
||||
|
||||
/** Get title for result list */
|
||||
virtual string title() {return m_title;}
|
||||
virtual std::string title() {return m_title;}
|
||||
|
||||
/** Get description for underlying query */
|
||||
virtual string getDescription() = 0;
|
||||
virtual std::string getDescription() = 0;
|
||||
|
||||
/** Get search terms (for highlighting abstracts). Some sequences
|
||||
* may have no associated search terms. Implement this for them. */
|
||||
virtual bool getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks)
|
||||
virtual void getTerms(HighlightData& hld)
|
||||
{
|
||||
terms.clear(); groups.clear(); gslks.clear(); return true;
|
||||
hld.clear();
|
||||
}
|
||||
/** Get user-input terms (before stemming etc.) */
|
||||
virtual void getUTerms(vector<string>& terms)
|
||||
virtual std::list<std::string> expand(Rcl::Doc &)
|
||||
{
|
||||
terms.clear();
|
||||
return std::list<std::string>();
|
||||
}
|
||||
virtual list<string> expand(Rcl::Doc &) {return list<string>();}
|
||||
|
||||
/** Optional functionality. */
|
||||
virtual bool canFilter() {return false;}
|
||||
@ -130,16 +124,16 @@ class DocSequence {
|
||||
virtual bool setSortSpec(const DocSeqSortSpec &) {return false;}
|
||||
virtual RefCntr<DocSequence> getSourceSeq() {return RefCntr<DocSequence>();}
|
||||
|
||||
static void set_translations(const string& sort, const string& filt)
|
||||
static void set_translations(const std::string& sort, const std::string& filt)
|
||||
{
|
||||
o_sort_trans = sort;
|
||||
o_filt_trans = filt;
|
||||
}
|
||||
protected:
|
||||
static string o_sort_trans;
|
||||
static string o_filt_trans;
|
||||
static std::string o_sort_trans;
|
||||
static std::string o_filt_trans;
|
||||
private:
|
||||
string m_title;
|
||||
std::string m_title;
|
||||
};
|
||||
|
||||
/** A modifier has a child sequence which does the real work and does
|
||||
@ -152,25 +146,23 @@ public:
|
||||
{}
|
||||
virtual ~DocSeqModifier() {}
|
||||
|
||||
virtual bool getAbstract(Rcl::Doc& doc, vector<string>& abs)
|
||||
virtual bool getAbstract(Rcl::Doc& doc, std::vector<std::string>& abs)
|
||||
{
|
||||
if (m_seq.isNull())
|
||||
return false;
|
||||
return m_seq->getAbstract(doc, abs);
|
||||
}
|
||||
virtual string getDescription()
|
||||
virtual std::string getDescription()
|
||||
{
|
||||
if (m_seq.isNull())
|
||||
return "";
|
||||
return m_seq->getDescription();
|
||||
}
|
||||
virtual bool getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks)
|
||||
virtual void getTerms(HighlightData& hld)
|
||||
{
|
||||
if (m_seq.isNull())
|
||||
return false;
|
||||
return m_seq->getTerms(terms, groups, gslks);
|
||||
return;
|
||||
m_seq->getTerms(hld);
|
||||
}
|
||||
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc)
|
||||
{
|
||||
@ -178,13 +170,7 @@ public:
|
||||
return false;
|
||||
return m_seq->getEnclosing(doc, pdoc);
|
||||
}
|
||||
virtual void getUTerms(vector<string>& terms)
|
||||
{
|
||||
if (m_seq.isNull())
|
||||
return;
|
||||
m_seq->getUTerms(terms);
|
||||
}
|
||||
virtual string title() {return m_seq->title();}
|
||||
virtual std::string title() {return m_seq->title();}
|
||||
virtual RefCntr<DocSequence> getSourceSeq() {return m_seq;}
|
||||
|
||||
protected:
|
||||
@ -203,7 +189,7 @@ public:
|
||||
virtual bool canSort() {return true;}
|
||||
virtual bool setFiltSpec(const DocSeqFiltSpec &);
|
||||
virtual bool setSortSpec(const DocSeqSortSpec &);
|
||||
virtual bool getDoc(int num, Rcl::Doc &doc, string *sh = 0)
|
||||
virtual bool getDoc(int num, Rcl::Doc &doc, std::string *sh = 0)
|
||||
{
|
||||
if (m_seq.isNull())
|
||||
return false;
|
||||
@ -215,7 +201,7 @@ public:
|
||||
return 0;
|
||||
return m_seq->getResCnt();
|
||||
}
|
||||
virtual string title();
|
||||
virtual std::string title();
|
||||
private:
|
||||
bool buildStack();
|
||||
void stripStack();
|
||||
|
||||
@ -39,16 +39,9 @@ DocSequenceDb::~DocSequenceDb()
|
||||
{
|
||||
}
|
||||
|
||||
bool DocSequenceDb::getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks)
|
||||
void DocSequenceDb::getTerms(HighlightData& hld)
|
||||
{
|
||||
return m_fsdata->getTerms(terms, groups, gslks);
|
||||
}
|
||||
|
||||
void DocSequenceDb::getUTerms(vector<string>& terms)
|
||||
{
|
||||
m_sdata->getUTerms(terms);
|
||||
m_fsdata->getTerms(hld);
|
||||
}
|
||||
|
||||
string DocSequenceDb::getDescription()
|
||||
@ -180,5 +173,13 @@ bool DocSequenceDb::setQuery()
|
||||
return true;
|
||||
m_rescnt = -1;
|
||||
m_needSetQuery = !m_q->setQuery(m_fsdata);
|
||||
|
||||
if (0) {
|
||||
HighlightData hld;
|
||||
m_fsdata->getTerms(hld);
|
||||
string str;
|
||||
hld.toString(str);
|
||||
fprintf(stderr, "DocSequenceDb::setQuery: terms: %s\n", str.c_str());
|
||||
}
|
||||
return !m_needSetQuery;
|
||||
}
|
||||
|
||||
@ -30,10 +30,7 @@ class DocSequenceDb : public DocSequence {
|
||||
virtual ~DocSequenceDb();
|
||||
virtual bool getDoc(int num, Rcl::Doc &doc, string * = 0);
|
||||
virtual int getResCnt();
|
||||
virtual bool getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks);
|
||||
virtual void getUTerms(vector<string>& terms);
|
||||
virtual void getTerms(HighlightData& hld);
|
||||
virtual bool getAbstract(Rcl::Doc &doc, vector<string>&);
|
||||
virtual bool getEnclosing(Rcl::Doc& doc, Rcl::Doc& pdoc);
|
||||
virtual string getDescription();
|
||||
|
||||
@ -24,12 +24,10 @@
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::vector;
|
||||
using std::list;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
#endif /* NO_NAMESPACES */
|
||||
|
||||
#include "rcldb.h"
|
||||
#include "rclconfig.h"
|
||||
@ -51,28 +49,30 @@ static string vecStringToString(const vector<string>& t)
|
||||
return sterms;
|
||||
}
|
||||
|
||||
// Text splitter callback used to take note of the position of query terms
|
||||
// inside the result text. This is then used to insert highlight tags.
|
||||
// Text splitter used to take note of the position of query terms
|
||||
// inside the result text. This is then used to insert highlight tags.
|
||||
class TextSplitPTR : public TextSplit {
|
||||
public:
|
||||
|
||||
// Out: begin and end byte positions of query terms/groups in text
|
||||
vector<pair<int, int> > tboffs;
|
||||
|
||||
TextSplitPTR(const vector<string>& its,
|
||||
const vector<vector<string> >&groups,
|
||||
const vector<int>& slacks)
|
||||
: m_wcount(0), m_groups(groups), m_slacks(slacks)
|
||||
TextSplitPTR(const HighlightData& hdata)
|
||||
: m_wcount(0), m_hdata(hdata)
|
||||
{
|
||||
for (vector<string>::const_iterator it = its.begin();
|
||||
it != its.end(); it++) {
|
||||
m_terms.insert(*it);
|
||||
}
|
||||
for (vector<vector<string> >::const_iterator vit = m_groups.begin();
|
||||
vit != m_groups.end(); vit++) {
|
||||
for (vector<string>::const_iterator it = (*vit).begin();
|
||||
it != (*vit).end(); it++) {
|
||||
m_gterms.insert(*it);
|
||||
// We separate single terms and groups and extract the group
|
||||
// terms for computing positions list before looking for group
|
||||
// matches
|
||||
|
||||
for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
|
||||
vit != hdata.groups.end(); vit++) {
|
||||
if (vit->size() == 1) {
|
||||
m_terms.insert(vit->front());
|
||||
} else if (vit->size() > 1) {
|
||||
for (vector<string>::const_iterator it = vit->begin();
|
||||
it != vit->end(); it++) {
|
||||
m_gterms.insert(*it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -116,15 +116,16 @@ class TextSplitPTR : public TextSplit {
|
||||
private:
|
||||
virtual bool matchGroup(const vector<string>& terms, int dist);
|
||||
|
||||
// Word count. Used to call checkCancel from time to time.
|
||||
int m_wcount;
|
||||
|
||||
// In: user query terms
|
||||
set<string> m_terms;
|
||||
|
||||
// In: user query groups, for near/phrase searches.
|
||||
const vector<vector<string> >& m_groups;
|
||||
const vector<int>& m_slacks;
|
||||
set<string> m_gterms;
|
||||
// m_gterms holds all the terms in m_groups, as a set for quick lookup
|
||||
set<string> m_gterms;
|
||||
|
||||
const HighlightData& m_hdata;
|
||||
|
||||
// group/near terms word positions.
|
||||
map<string, vector<int> > m_plists;
|
||||
@ -294,10 +295,11 @@ public:
|
||||
// handle all groups as NEAR (ignore order).
|
||||
bool TextSplitPTR::matchGroups()
|
||||
{
|
||||
vector<vector<string> >::const_iterator vit = m_groups.begin();
|
||||
vector<int>::const_iterator sit = m_slacks.begin();
|
||||
for (; vit != m_groups.end() && sit != m_slacks.end(); vit++, sit++) {
|
||||
matchGroup(*vit, *sit + (*vit).size());
|
||||
for (unsigned int i = 0; i < m_hdata.groups.size(); i++) {
|
||||
if (m_hdata.groups[i].size() <= 1)
|
||||
continue;
|
||||
matchGroup(m_hdata.groups[i],
|
||||
m_hdata.groups[i].size() + m_hdata.slacks[i]);
|
||||
}
|
||||
|
||||
// Sort regions by increasing start and decreasing width.
|
||||
@ -317,39 +319,22 @@ bool TextSplitPTR::matchGroups()
|
||||
// the input is html, the body is always a single output chunk.
|
||||
bool PlainToRich::plaintorich(const string& in,
|
||||
list<string>& out, // Output chunk list
|
||||
const HiliteData& hdata,
|
||||
const HighlightData& hdata,
|
||||
int chunksize)
|
||||
{
|
||||
Chrono chron;
|
||||
const vector<string>& terms(hdata.terms);
|
||||
const vector<vector<string> >& groups(hdata.groups);
|
||||
const vector<int>& slacks(hdata.gslks);
|
||||
|
||||
if (0 && DebugLog::getdbl()->getlevel() >= DEBDEB0) {
|
||||
string sterms = vecStringToString(terms);
|
||||
LOGDEB0(("plaintorich: terms: %s\n", sterms.c_str()));
|
||||
sterms.clear();
|
||||
for (vector<vector<string> >::const_iterator vit = groups.begin();
|
||||
vit != groups.end(); vit++) {
|
||||
sterms += "GROUP: ";
|
||||
sterms += vecStringToString(*vit);
|
||||
sterms += "\n";
|
||||
}
|
||||
LOGDEB0(("plaintorich: groups:\n %s", sterms.c_str()));
|
||||
LOGDEB2((" TEXT:[%s]\n", in.c_str()));
|
||||
}
|
||||
|
||||
// Compute the positions for the query terms. We use the text
|
||||
// splitter to break the text into words, and compare the words to
|
||||
// the search terms,
|
||||
TextSplitPTR splitter(terms, groups, slacks);
|
||||
TextSplitPTR splitter(hdata);
|
||||
// Note: the splitter returns the term locations in byte, not
|
||||
// character, offsets.
|
||||
splitter.text_to_words(in);
|
||||
LOGDEB2(("plaintorich: split done %d mS\n", chron.millis()));
|
||||
|
||||
// Compute the positions for NEAR and PHRASE groups.
|
||||
splitter.matchGroups();
|
||||
LOGDEB2(("plaintorich: group match done %d mS\n", chron.millis()));
|
||||
|
||||
out.clear();
|
||||
out.push_back("");
|
||||
|
||||
@ -19,26 +19,8 @@
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
using std::list;
|
||||
using std::string;
|
||||
|
||||
/// Holder for plaintorich() input data: words and groups of words to
|
||||
/// be highlighted
|
||||
struct HiliteData {
|
||||
// Single terms
|
||||
vector<string> terms;
|
||||
// NEAR and PHRASE elements
|
||||
vector<vector<string> > groups;
|
||||
// Group slacks (number of permitted non-matched words).
|
||||
// Parallel vector to the above 'groups'
|
||||
vector<int> gslks;
|
||||
void reset()
|
||||
{
|
||||
terms.clear();
|
||||
groups.clear();
|
||||
gslks.clear();
|
||||
}
|
||||
};
|
||||
#include "hldata.h"
|
||||
|
||||
/**
|
||||
* A class for highlighting search results. Overridable methods allow
|
||||
@ -83,21 +65,21 @@ public:
|
||||
* lowercase and unaccented.
|
||||
* @param chunksize max size of chunks in output list
|
||||
*/
|
||||
virtual bool plaintorich(const string &in, list<string> &out,
|
||||
const HiliteData& hdata,
|
||||
virtual bool plaintorich(const std::string &in, std::list<std::string> &out,
|
||||
const HighlightData& hdata,
|
||||
int chunksize = 50000
|
||||
);
|
||||
|
||||
/* Overridable output methods for headers, highlighting and marking tags */
|
||||
virtual string header() {return snull;}
|
||||
virtual string startMatch() {return snull;}
|
||||
virtual string endMatch() {return snull;}
|
||||
virtual string startAnchor(int) {return snull;}
|
||||
virtual string endAnchor() {return snull;}
|
||||
virtual string startChunk() {return snull;}
|
||||
virtual std::string header() {return snull;}
|
||||
virtual std::string startMatch() {return snull;}
|
||||
virtual std::string endMatch() {return snull;}
|
||||
virtual std::string startAnchor(int) {return snull;}
|
||||
virtual std::string endAnchor() {return snull;}
|
||||
virtual std::string startChunk() {return snull;}
|
||||
|
||||
protected:
|
||||
const string snull;
|
||||
const std::string snull;
|
||||
bool m_inputhtml;
|
||||
// Use <br> to break plain text lines (else caller has used a <pre> tag)
|
||||
bool m_eolbr;
|
||||
|
||||
@ -132,9 +132,8 @@ void ResListPager::resultPageFor(int docnum)
|
||||
m_respage = npage;
|
||||
}
|
||||
|
||||
void ResListPager::displayDoc(RclConfig *config,
|
||||
int i, Rcl::Doc& doc, const HiliteData& hdata,
|
||||
const string& sh)
|
||||
void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc,
|
||||
const HighlightData& hdata, const string& sh)
|
||||
{
|
||||
ostringstream chunk;
|
||||
int percent;
|
||||
@ -309,8 +308,9 @@ void ResListPager::displayPage(RclConfig *config)
|
||||
|
||||
if (pageEmpty()) {
|
||||
chunk << trans("<p><b>No results found</b><br>");
|
||||
vector<string>uterms;
|
||||
m_docSource->getUTerms(uterms);
|
||||
HighlightData hldata;
|
||||
m_docSource->getTerms(hldata);
|
||||
vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
|
||||
if (!uterms.empty()) {
|
||||
map<string, vector<string> > spellings;
|
||||
suggest(uterms, spellings);
|
||||
@ -366,8 +366,8 @@ void ResListPager::displayPage(RclConfig *config)
|
||||
if (pageEmpty())
|
||||
return;
|
||||
|
||||
HiliteData hdata;
|
||||
m_docSource->getTerms(hdata.terms, hdata.groups, hdata.gslks);
|
||||
HighlightData hdata;
|
||||
m_docSource->getTerms(hdata);
|
||||
|
||||
// Emit data for result entry paragraph. Do it in chunks that make sense
|
||||
// html-wise, else our client may get confused
|
||||
|
||||
@ -19,14 +19,13 @@
|
||||
#define _reslistpager_h_included_
|
||||
|
||||
#include <vector>
|
||||
using std::vector;
|
||||
|
||||
#include "refcntr.h"
|
||||
#include "docseq.h"
|
||||
#include "hldata.h"
|
||||
|
||||
class RclConfig;
|
||||
class PlainToRich;
|
||||
struct HiliteData;
|
||||
|
||||
/**
|
||||
* Manage a paged HTML result list.
|
||||
@ -85,7 +84,7 @@ public:
|
||||
void resultPageFor(int docnum);
|
||||
void displayPage(RclConfig *);
|
||||
void displayDoc(RclConfig *, int idx, Rcl::Doc& doc,
|
||||
const HiliteData& hdata, const string& sh = "");
|
||||
const HighlightData& hdata, const string& sh = "");
|
||||
bool pageEmpty() {return m_respage.size() == 0;}
|
||||
|
||||
string queryDescription() {return m_docSource.isNull() ? "" :
|
||||
@ -112,8 +111,9 @@ public:
|
||||
virtual string pageTop() {return string();}
|
||||
virtual string headerContent() {return string();}
|
||||
virtual string iconUrl(RclConfig *, Rcl::Doc& doc);
|
||||
virtual void suggest(const vector<string>,
|
||||
map<string, vector<string> >& sugg) {
|
||||
virtual void suggest(const std::vector<std::string>,
|
||||
std::map<std::string, std::vector<std::string> >& sugg)
|
||||
{
|
||||
sugg.clear();
|
||||
}
|
||||
virtual string absSep() {return "…";}
|
||||
@ -126,7 +126,7 @@ private:
|
||||
bool m_hasNext;
|
||||
PlainToRich *m_hiliter;
|
||||
RefCntr<DocSequence> m_docSource;
|
||||
vector<ResListEntry> m_respage;
|
||||
std::vector<ResListEntry> m_respage;
|
||||
};
|
||||
|
||||
#endif /* _reslistpager_h_included_ */
|
||||
|
||||
@ -498,23 +498,12 @@ bool SearchData::fileNameOnly()
|
||||
return true;
|
||||
}
|
||||
|
||||
// Extract all terms and term groups
|
||||
bool SearchData::getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks) const
|
||||
// Extract all term data
|
||||
void SearchData::getTerms(HighlightData &hld) const
|
||||
{
|
||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
(*it)->getTerms(terms, groups, gslks);
|
||||
return true;
|
||||
}
|
||||
// Extract user terms
|
||||
void SearchData::getUTerms(vector<string>& terms) const
|
||||
{
|
||||
for (qlist_cit_t it = m_query.begin(); it != m_query.end(); it++)
|
||||
(*it)->getUTerms(terms);
|
||||
sort(terms.begin(), terms.end());
|
||||
vector<string>::iterator it = unique(terms.begin(), terms.end());
|
||||
terms.erase(it, terms.end());
|
||||
(*it)->getTerms(hld);
|
||||
return;
|
||||
}
|
||||
|
||||
// Splitter callback for breaking a user string into simple terms and
|
||||
@ -590,10 +579,10 @@ private:
|
||||
// translating.
|
||||
class StringToXapianQ {
|
||||
public:
|
||||
StringToXapianQ(Db& db, const string& field,
|
||||
StringToXapianQ(Db& db, HighlightData& hld, const string& field,
|
||||
const string &stmlng, bool boostUser)
|
||||
: m_db(db), m_field(field), m_stemlang(stmlng),
|
||||
m_doBoostUserTerms(boostUser)
|
||||
: m_db(db), m_field(field), m_stemlang(stmlng),
|
||||
m_doBoostUserTerms(boostUser), m_hld(hld)
|
||||
{ }
|
||||
|
||||
bool processUserString(const string &iq,
|
||||
@ -601,20 +590,6 @@ public:
|
||||
vector<Xapian::Query> &pqueries,
|
||||
const StopList &stops,
|
||||
int slack = 0, bool useNear = false);
|
||||
// After processing the string: return search terms and term
|
||||
// groups (ie: for highlighting)
|
||||
bool getTerms(vector<string>& terms, vector<vector<string> >& groups)
|
||||
{
|
||||
terms.insert(terms.end(), m_terms.begin(), m_terms.end());
|
||||
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
|
||||
return true;
|
||||
}
|
||||
bool getUTerms(vector<string>& terms)
|
||||
{
|
||||
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
void expandTerm(bool dont, const string& term, vector<string>& exp,
|
||||
string& sterm, const string& prefix);
|
||||
@ -630,10 +605,7 @@ private:
|
||||
const string& m_field;
|
||||
const string& m_stemlang;
|
||||
bool m_doBoostUserTerms;
|
||||
// Single terms and phrases resulting from breaking up text;
|
||||
vector<string> m_uterms;
|
||||
vector<string> m_terms;
|
||||
vector<vector<string> > m_groups;
|
||||
HighlightData& m_hld;
|
||||
};
|
||||
|
||||
#if 1
|
||||
@ -647,7 +619,7 @@ static void listVector(const string& what, const vector<string>&l)
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Expand stem and wildcards
|
||||
/** Take simple term and expand stem and wildcards
|
||||
*
|
||||
* @param nostemexp don't perform stem expansion. This is mainly used to
|
||||
* prevent stem expansion inside phrases (because the user probably
|
||||
@ -680,9 +652,11 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
||||
nostemexp = true;
|
||||
}
|
||||
|
||||
if (!haswild)
|
||||
m_hld.uterms.insert(term);
|
||||
|
||||
if (nostemexp && !haswild) {
|
||||
sterm = term;
|
||||
m_uterms.push_back(sterm);
|
||||
exp.resize(1);
|
||||
exp[0] = prefix + term;
|
||||
} else {
|
||||
@ -692,7 +666,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
||||
m_field);
|
||||
} else {
|
||||
sterm = term;
|
||||
m_uterms.push_back(sterm);
|
||||
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
|
||||
m_field);
|
||||
}
|
||||
@ -701,7 +674,6 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
||||
exp.push_back(it->term);
|
||||
}
|
||||
}
|
||||
//listVector("ExpandTerm:uterms now: ", m_uterms);
|
||||
}
|
||||
|
||||
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
|
||||
@ -753,12 +725,15 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
||||
}
|
||||
|
||||
expandTerm(nostemexp, span, exp, sterm, prefix);
|
||||
|
||||
// m_terms is used for highlighting, we don't want prefixes in there.
|
||||
|
||||
// Set up the highlight data. No prefix should go in there
|
||||
for (vector<string>::const_iterator it = exp.begin();
|
||||
it != exp.end(); it++) {
|
||||
m_terms.push_back(it->substr(prefix.size()));
|
||||
m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
|
||||
m_hld.slacks.push_back(0);
|
||||
m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
|
||||
}
|
||||
|
||||
// Push either term or OR of stem-expanded set
|
||||
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
|
||||
|
||||
@ -786,7 +761,9 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
||||
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
||||
Xapian::Query::OP_PHRASE;
|
||||
vector<Xapian::Query> orqueries;
|
||||
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
||||
bool hadmultiple = false;
|
||||
#endif
|
||||
vector<vector<string> >groups;
|
||||
|
||||
string prefix;
|
||||
@ -805,15 +782,19 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
||||
for (vector<string>::iterator it = splitData->terms.begin();
|
||||
it != splitData->terms.end(); it++, nxit++) {
|
||||
LOGDEB0(("ProcessPhrase: processing [%s]\n", it->c_str()));
|
||||
// Adjust when we do stem expansion. Not inside phrases, and
|
||||
// some versions of xapian will accept only one OR clause
|
||||
// inside NEAR, all others must be leafs.
|
||||
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) || hadmultiple;
|
||||
// Adjust when we do stem expansion. Not if disabled by
|
||||
// caller, not inside phrases, and some versions of xapian
|
||||
// will accept only one OR clause inside NEAR.
|
||||
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
|
||||
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
||||
|| hadmultiple
|
||||
#endif // single OR inside NEAR
|
||||
;
|
||||
|
||||
string sterm;
|
||||
vector<string> exp;
|
||||
expandTerm(nostemexp, *it, exp, sterm, prefix);
|
||||
LOGDEB0(("ProcessPhrase: exp size %d\n", exp.size()));
|
||||
LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
|
||||
listVector("", exp);
|
||||
// groups is used for highlighting, we don't want prefixes in there.
|
||||
vector<string> noprefs;
|
||||
@ -850,7 +831,13 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
||||
vector<vector<string> > allcombs;
|
||||
vector<string> comb;
|
||||
multiply_groups(groups.begin(), groups.end(), comb, allcombs);
|
||||
m_groups.insert(m_groups.end(), allcombs.begin(), allcombs.end());
|
||||
|
||||
// Insert the search groups and slacks in the highlight data, with
|
||||
// a reference to the user entry that generated them:
|
||||
m_hld.groups.insert(m_hld.groups.end(), allcombs.begin(), allcombs.end());
|
||||
m_hld.slacks.insert(m_hld.slacks.end(), allcombs.size(), slack);
|
||||
m_hld.grpsugidx.insert(m_hld.grpsugidx.end(), allcombs.size(),
|
||||
m_hld.ugroups.size() - 1);
|
||||
}
|
||||
|
||||
// Trim string beginning with ^ or ending with $ and convert to flags
|
||||
@ -875,7 +862,16 @@ static int stringToMods(string& s)
|
||||
* We just separate words and phrases, and do wildcard and stem expansion,
|
||||
*
|
||||
* This is used to process data entered into an OR/AND/NEAR/PHRASE field of
|
||||
* the GUI.
|
||||
* the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
|
||||
* entry).
|
||||
*
|
||||
* This appears awful, and it would seem that the split into
|
||||
* terms/phrases should be performed in the upper layer so that we
|
||||
* only receive pure term or near/phrase pure elements here, but in
|
||||
* fact there are things that would appear like terms to naive code,
|
||||
* and which will actually may be turned into phrases (ie: tom:jerry),
|
||||
* in a manner which intimately depends on the index implementation,
|
||||
* so that it makes sense to process this here.
|
||||
*
|
||||
* The final list contains one query for each term or phrase
|
||||
* - Elements corresponding to a stem-expanded part are an OP_OR
|
||||
@ -895,9 +891,6 @@ bool StringToXapianQ::processUserString(const string &iq,
|
||||
{
|
||||
LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear));
|
||||
ermsg.erase();
|
||||
m_uterms.clear();
|
||||
m_terms.clear();
|
||||
m_groups.clear();
|
||||
|
||||
// Simple whitespace-split input into user-level words and
|
||||
// double-quoted phrases: word1 word2 "this is a phrase".
|
||||
@ -952,10 +945,12 @@ bool StringToXapianQ::processUserString(const string &iq,
|
||||
case 0:
|
||||
continue;// ??
|
||||
case 1:
|
||||
m_hld.ugroups.push_back(vector<string>(1, *it));
|
||||
processSimpleSpan(splitter.terms.front(),
|
||||
splitter.nostemexps.front(), pqueries);
|
||||
break;
|
||||
default:
|
||||
m_hld.ugroups.push_back(vector<string>(1, *it));
|
||||
processPhraseOrNear(&splitter, pqueries, useNear, slack, mods);
|
||||
}
|
||||
}
|
||||
@ -984,8 +979,6 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
|
||||
stemlang.c_str()));
|
||||
|
||||
m_terms.clear();
|
||||
m_groups.clear();
|
||||
Xapian::Query *qp = (Xapian::Query *)p;
|
||||
*qp = Xapian::Query();
|
||||
|
||||
@ -1007,16 +1000,14 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
|
||||
(m_parentSearch == 0 && !m_haveWildCards);
|
||||
|
||||
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
|
||||
StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
|
||||
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
|
||||
return false;
|
||||
if (pqueries.empty()) {
|
||||
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
||||
return true;
|
||||
}
|
||||
tr.getTerms(m_terms, m_groups);
|
||||
tr.getUTerms(m_uterms);
|
||||
//listVector("SearchDataClauseSimple: Uterms: ", m_uterms);
|
||||
|
||||
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
||||
if (m_weight != 1.0) {
|
||||
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||
@ -1056,8 +1047,6 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
||||
stemlang;
|
||||
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
||||
m_terms.clear();
|
||||
m_groups.clear();
|
||||
|
||||
Xapian::Query *qp = (Xapian::Query *)p;
|
||||
*qp = Xapian::Query();
|
||||
@ -1080,7 +1069,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||
}
|
||||
string s = cstr_dquote + m_text + cstr_dquote;
|
||||
bool useNear = (m_tp == SCLT_NEAR);
|
||||
StringToXapianQ tr(db, m_field, l_stemlang, doBoostUserTerm);
|
||||
StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
|
||||
if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(),
|
||||
m_slack, useNear))
|
||||
return false;
|
||||
@ -1088,8 +1077,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
|
||||
return true;
|
||||
}
|
||||
tr.getTerms(m_terms, m_groups);
|
||||
tr.getUTerms(m_uterms);
|
||||
|
||||
*qp = *pqueries.begin();
|
||||
if (m_weight != 1.0) {
|
||||
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
||||
@ -1097,21 +1085,4 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Translate subquery
|
||||
bool SearchDataClauseSub::toNativeQuery(Rcl::Db &db, void *p, const string&)
|
||||
{
|
||||
return m_sub->toNativeQuery(db, p);
|
||||
}
|
||||
|
||||
bool SearchDataClauseSub::getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups,
|
||||
vector<int>& gslks) const
|
||||
{
|
||||
return m_sub.getconstptr()->getTerms(terms, groups, gslks);
|
||||
}
|
||||
void SearchDataClauseSub::getUTerms(vector<string>& terms) const
|
||||
{
|
||||
m_sub.getconstptr()->getUTerms(terms);
|
||||
}
|
||||
|
||||
} // Namespace Rcl
|
||||
|
||||
@ -31,14 +31,11 @@
|
||||
#include "refcntr.h"
|
||||
#include "smallut.h"
|
||||
#include "cstr.h"
|
||||
#include "hldata.h"
|
||||
|
||||
class RclConfig;
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::vector;
|
||||
using std::string;
|
||||
namespace Rcl {
|
||||
#endif // NO_NAMESPACES
|
||||
|
||||
/** Search clause types */
|
||||
enum SClType {
|
||||
@ -50,33 +47,33 @@ enum SClType {
|
||||
class SearchDataClause;
|
||||
|
||||
/**
|
||||
Data structure representing a Recoll user query, for translation
|
||||
into a Xapian query tree. This could probably better called a 'question'.
|
||||
Data structure representing a Recoll user query, for translation
|
||||
into a Xapian query tree. This could probably better called a 'question'.
|
||||
|
||||
This is a list of search clauses combined through either OR or AND.
|
||||
This is a list of search clauses combined through either OR or AND.
|
||||
|
||||
Clauses either reflect user entry in a query field: some text, a
|
||||
clause type (AND/OR/NEAR etc.), possibly a distance, or points to
|
||||
another SearchData representing a subquery.
|
||||
Clauses either reflect user entry in a query field: some text, a
|
||||
clause type (AND/OR/NEAR etc.), possibly a distance, or points to
|
||||
another SearchData representing a subquery.
|
||||
|
||||
The content of each clause when added may not be fully parsed yet
|
||||
(may come directly from a gui field). It will be parsed and may be
|
||||
translated to several queries in the Xapian sense, for exemple
|
||||
several terms and phrases as would result from
|
||||
["this is a phrase" term1 term2] .
|
||||
The content of each clause when added may not be fully parsed yet
|
||||
(may come directly from a gui field). It will be parsed and may be
|
||||
translated to several queries in the Xapian sense, for exemple
|
||||
several terms and phrases as would result from
|
||||
["this is a phrase" term1 term2] .
|
||||
|
||||
This is why the clauses also have an AND/OR/... type.
|
||||
This is why the clauses also have an AND/OR/... type.
|
||||
|
||||
A phrase clause could be added either explicitly or using double quotes:
|
||||
{SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
|
||||
A phrase clause could be added either explicitly or using double quotes:
|
||||
{SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
|
||||
|
||||
*/
|
||||
*/
|
||||
class SearchData {
|
||||
public:
|
||||
SearchData(SClType tp)
|
||||
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
|
||||
m_haveDates(false), m_maxSize(size_t(-1)),
|
||||
m_minSize(size_t(-1)), m_haveWildCards(false)
|
||||
: m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0),
|
||||
m_haveDates(false), m_maxSize(size_t(-1)),
|
||||
m_minSize(size_t(-1)), m_haveWildCards(false)
|
||||
{
|
||||
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
||||
m_tp = SCLT_OR;
|
||||
@ -108,7 +105,7 @@ public:
|
||||
bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
|
||||
|
||||
/** Set/get top subdirectory for filtering results */
|
||||
void setTopdir(const string& t, bool excl = false, float w = 1.0)
|
||||
void setTopdir(const std::string& t, bool excl = false, float w = 1.0)
|
||||
{
|
||||
m_topdir = t;
|
||||
m_topdirexcl = excl;
|
||||
@ -122,38 +119,37 @@ public:
|
||||
void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
|
||||
|
||||
/** Add file type for filtering results */
|
||||
void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
|
||||
void addFiletype(const std::string& ft) {m_filetypes.push_back(ft);}
|
||||
/** Add file type to not wanted list */
|
||||
void remFiletype(const string& ft) {m_nfiletypes.push_back(ft);}
|
||||
void remFiletype(const std::string& ft) {m_nfiletypes.push_back(ft);}
|
||||
|
||||
void setStemlang(const string& lang = "english") {m_stemlang = lang;}
|
||||
void setStemlang(const std::string& lang = "english") {m_stemlang = lang;}
|
||||
|
||||
/** Retrieve error description */
|
||||
string getReason() {return m_reason;}
|
||||
std::string getReason() {return m_reason;}
|
||||
|
||||
/** Get terms and phrase/near groups. Used in the GUI for highlighting
|
||||
* The groups and gslks vectors are parallel and hold the phrases/near
|
||||
* string groups and their associated slacks (distance in excess of group
|
||||
* size)
|
||||
/** Return term expansion data. Mostly used by caller for highlighting
|
||||
*/
|
||||
bool getTerms(vector<string>& terms,
|
||||
vector<vector<string> >& groups, vector<int>& gslks) const;
|
||||
/** Get user-input terms (before expansion etc.) */
|
||||
void getUTerms(vector<string>& terms) const;
|
||||
void getTerms(HighlightData& hldata) const;
|
||||
|
||||
/**
|
||||
* Get/set the description field which is retrieved from xapian after
|
||||
* initializing the query. It is stored here for usage in the GUI.
|
||||
*/
|
||||
string getDescription() {return m_description;}
|
||||
void setDescription(const string& d) {m_description = d;}
|
||||
std::string getDescription() {return m_description;}
|
||||
void setDescription(const std::string& d) {m_description = d;}
|
||||
|
||||
private:
|
||||
SClType m_tp; // Only SCLT_AND or SCLT_OR here
|
||||
vector<SearchDataClause*> m_query;
|
||||
vector<string> m_filetypes; // Restrict to filetypes if set.
|
||||
vector<string> m_nfiletypes; // Unwanted file types
|
||||
string m_topdir; // Restrict to subtree.
|
||||
// Combine type. Only SCLT_AND or SCLT_OR here
|
||||
SClType m_tp;
|
||||
// Complex query descriptor
|
||||
std::vector<SearchDataClause*> m_query;
|
||||
// Restricted set of filetypes if not empty.
|
||||
std::vector<std::string> m_filetypes;
|
||||
// Excluded set of file types if not empty
|
||||
std::vector<std::string> m_nfiletypes;
|
||||
// Restrict to subtree.
|
||||
std::string m_topdir;
|
||||
bool m_topdirexcl; // Invert meaning
|
||||
float m_topdirweight; // affect weight instead of filter
|
||||
bool m_haveDates;
|
||||
@ -162,11 +158,11 @@ private:
|
||||
size_t m_minSize;
|
||||
// Printable expanded version of the complete query, retrieved/set
|
||||
// from rcldb after the Xapian::setQuery() call
|
||||
string m_description;
|
||||
string m_reason;
|
||||
std::string m_description;
|
||||
std::string m_reason;
|
||||
bool m_haveWildCards;
|
||||
string m_stemlang;
|
||||
bool expandFileTypes(RclConfig *cfg, vector<string>& exptps);
|
||||
std::string m_stemlang;
|
||||
bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
|
||||
/* Copyconst and assignment private and forbidden */
|
||||
SearchData(const SearchData &) {}
|
||||
SearchData& operator=(const SearchData&) {return *this;};
|
||||
@ -178,39 +174,56 @@ public:
|
||||
SDCM_ANCHOREND=4};
|
||||
|
||||
SearchDataClause(SClType tp)
|
||||
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
||||
m_modifiers(SDCM_NONE), m_weight(1.0)
|
||||
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
||||
m_modifiers(SDCM_NONE), m_weight(1.0)
|
||||
{}
|
||||
virtual ~SearchDataClause() {}
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const std::string&) = 0;
|
||||
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
|
||||
virtual string getReason() const {return m_reason;}
|
||||
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
|
||||
vector<int>&) const = 0;
|
||||
virtual void getUTerms(vector<string>&) const = 0;
|
||||
virtual std::string getReason() const {return m_reason;}
|
||||
virtual void getTerms(HighlightData & hldata) const = 0;
|
||||
|
||||
SClType getTp() {return m_tp;}
|
||||
void setParent(SearchData *p) {m_parentSearch = p;}
|
||||
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
|
||||
virtual int getModifiers() {return m_modifiers;}
|
||||
virtual void addModifier(Modifier mod) {
|
||||
SClType getTp()
|
||||
{
|
||||
return m_tp;
|
||||
}
|
||||
void setParent(SearchData *p)
|
||||
{
|
||||
m_parentSearch = p;
|
||||
}
|
||||
virtual void setModifiers(Modifier mod)
|
||||
{
|
||||
m_modifiers = mod;
|
||||
}
|
||||
virtual int getModifiers()
|
||||
{
|
||||
return m_modifiers;
|
||||
}
|
||||
virtual void addModifier(Modifier mod)
|
||||
{
|
||||
int imod = getModifiers();
|
||||
imod |= mod;
|
||||
setModifiers(Modifier(imod));
|
||||
}
|
||||
virtual void setWeight(float w) {m_weight = w;}
|
||||
virtual void setWeight(float w)
|
||||
{
|
||||
m_weight = w;
|
||||
}
|
||||
friend class SearchData;
|
||||
|
||||
protected:
|
||||
string m_reason;
|
||||
std::string m_reason;
|
||||
SClType m_tp;
|
||||
SearchData *m_parentSearch;
|
||||
bool m_haveWildCards;
|
||||
Modifier m_modifiers;
|
||||
float m_weight;
|
||||
private:
|
||||
SearchDataClause(const SearchDataClause&) {}
|
||||
SearchDataClause& operator=(const SearchDataClause&) {
|
||||
SearchDataClause(const SearchDataClause&)
|
||||
{
|
||||
}
|
||||
SearchDataClause& operator=(const SearchDataClause&)
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
@ -221,45 +234,37 @@ private:
|
||||
*/
|
||||
class SearchDataClauseSimple : public SearchDataClause {
|
||||
public:
|
||||
SearchDataClauseSimple(SClType tp, const string& txt,
|
||||
const string& fld = string())
|
||||
: SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) {
|
||||
m_haveWildCards = (txt.find_first_of(cstr_minwilds) != string::npos);
|
||||
SearchDataClauseSimple(SClType tp, const std::string& txt,
|
||||
const std::string& fld = std::string())
|
||||
: SearchDataClause(tp), m_text(txt), m_field(fld)
|
||||
{
|
||||
m_haveWildCards =
|
||||
(txt.find_first_of(cstr_minwilds) != std::string::npos);
|
||||
}
|
||||
|
||||
virtual ~SearchDataClauseSimple() {}
|
||||
virtual ~SearchDataClauseSimple()
|
||||
{
|
||||
}
|
||||
|
||||
/** Translate to Xapian query */
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
||||
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
|
||||
|
||||
/** Retrieve query terms and term groups. This is used for highlighting */
|
||||
virtual bool getTerms(vector<string>& terms, /* Single terms */
|
||||
vector<vector<string> >& groups, /* Prox grps */
|
||||
vector<int>& gslks) const /* Prox slacks */
|
||||
virtual void getTerms(HighlightData& hldata) const
|
||||
{
|
||||
terms.insert(terms.end(), m_terms.begin(), m_terms.end());
|
||||
groups.insert(groups.end(), m_groups.begin(), m_groups.end());
|
||||
gslks.insert(gslks.end(), m_groups.size(), m_slack);
|
||||
return true;
|
||||
hldata.append(m_hldata);
|
||||
}
|
||||
virtual void getUTerms(vector<string>& terms) const
|
||||
virtual const std::string& gettext()
|
||||
{
|
||||
terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
|
||||
return m_text;
|
||||
}
|
||||
virtual const std::string& getfield()
|
||||
{
|
||||
return m_field;
|
||||
}
|
||||
virtual const string& gettext() {return m_text;}
|
||||
virtual const string& getfield() {return m_field;}
|
||||
protected:
|
||||
string m_text; // Raw user entry text.
|
||||
string m_field; // Field specification if any
|
||||
// Single terms and phrases resulting from breaking up m_text;
|
||||
// valid after toNativeQuery() call
|
||||
vector<string> m_terms;
|
||||
vector<vector<string> > m_groups;
|
||||
// User terms before expansion
|
||||
vector<string> m_uterms;
|
||||
// Declare m_slack here. Always 0, but allows getTerms to work for
|
||||
// SearchDataClauseDist
|
||||
int m_slack;
|
||||
std::string m_text; // Raw user entry text.
|
||||
std::string m_field; // Field specification if any
|
||||
HighlightData m_hldata;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -272,29 +277,39 @@ protected:
|
||||
*/
|
||||
class SearchDataClauseFilename : public SearchDataClauseSimple {
|
||||
public:
|
||||
SearchDataClauseFilename(const string& txt)
|
||||
: SearchDataClauseSimple(SCLT_FILENAME, txt) {
|
||||
SearchDataClauseFilename(const std::string& txt)
|
||||
: SearchDataClauseSimple(SCLT_FILENAME, txt)
|
||||
{
|
||||
// File name searches don't count when looking for wild cards.
|
||||
m_haveWildCards = false;
|
||||
}
|
||||
virtual ~SearchDataClauseFilename() {}
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
||||
|
||||
virtual ~SearchDataClauseFilename()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
|
||||
};
|
||||
|
||||
/**
|
||||
* A clause coming from a NEAR or PHRASE entry field. There is only one
|
||||
* string group, and a specified distance, which applies to it.
|
||||
* std::string group, and a specified distance, which applies to it.
|
||||
*/
|
||||
class SearchDataClauseDist : public SearchDataClauseSimple {
|
||||
public:
|
||||
SearchDataClauseDist(SClType tp, const string& txt, int slack,
|
||||
const string& fld = string())
|
||||
: SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;}
|
||||
virtual ~SearchDataClauseDist() {}
|
||||
SearchDataClauseDist(SClType tp, const std::string& txt, int slack,
|
||||
const std::string& fld = std::string())
|
||||
: SearchDataClauseSimple(tp, txt, fld), m_slack(slack)
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
||||
virtual ~SearchDataClauseDist()
|
||||
{
|
||||
}
|
||||
|
||||
// m_slack is declared in SearchDataClauseSimple
|
||||
virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
|
||||
private:
|
||||
int m_slack;
|
||||
};
|
||||
|
||||
/** Subquery */
|
||||
@ -302,15 +317,28 @@ class SearchDataClauseSub : public SearchDataClause {
|
||||
public:
|
||||
// We take charge of the SearchData * and will delete it.
|
||||
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
|
||||
: SearchDataClause(tp), m_sub(sub) {}
|
||||
virtual ~SearchDataClauseSub() {}
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
|
||||
virtual bool getTerms(vector<string>&, vector<vector<string> >&,
|
||||
vector<int>&) const;
|
||||
virtual void getUTerms(vector<string>&) const;
|
||||
: SearchDataClause(tp), m_sub(sub)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~SearchDataClauseSub()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool toNativeQuery(Rcl::Db &db, void *p, const std::string&)
|
||||
{
|
||||
return m_sub->toNativeQuery(db, p);
|
||||
}
|
||||
|
||||
virtual void getTerms(HighlightData& hldata) const
|
||||
{
|
||||
m_sub.getconstptr()->getTerms(hldata);
|
||||
}
|
||||
|
||||
protected:
|
||||
RefCntr<SearchData> m_sub;
|
||||
};
|
||||
|
||||
} // Namespace Rcl
|
||||
|
||||
#endif /* _SEARCHDATA_H_INCLUDED_ */
|
||||
|
||||
54
src/utils/hldata.h
Normal file
54
src/utils/hldata.h
Normal file
@ -0,0 +1,54 @@
|
||||
#ifndef _hldata_h_included_
|
||||
#define _hldata_h_included_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
/** Store about user terms and their expansions. This is used mostly for
|
||||
* highlighting result text and walking the matches.
|
||||
*/
|
||||
struct HighlightData {
|
||||
/** The user terms, excluding those with wildcards.
|
||||
* This list is intended for orthographic suggestions but the terms are
|
||||
* unaccented lowercased anyway because they are compared to the dictionary
|
||||
* generated from the index term list (which is unaccented).
|
||||
*/
|
||||
std::set<std::string> uterms;
|
||||
|
||||
/** The original user terms-or-groups. This is for displaying the matched
|
||||
* terms or groups, ie in relation with highlighting or skipping to the
|
||||
* next match. These are raw, diacritics and case preserved.
|
||||
*/
|
||||
std::vector<std::vector<std::string> > ugroups;
|
||||
|
||||
/** Processed/expanded terms and groups. Used for looking for
|
||||
* regions to highlight. Terms are just groups with 1 entry. All
|
||||
* terms in there are unaccented, and the list may include values
|
||||
* expanded from the original terms by stem or wildcard expansion.
|
||||
*/
|
||||
std::vector<std::vector<std::string> > groups;
|
||||
/** Group slacks. Parallel to groups */
|
||||
std::vector<int> slacks;
|
||||
|
||||
/** Index into ugroups for each group. Parallel to groups. As a
|
||||
* user term or group may generate many processed/expanded terms
|
||||
* or groups, this is how we relate them
|
||||
*/
|
||||
std::vector<unsigned int> grpsugidx;
|
||||
|
||||
void clear()
|
||||
{
|
||||
uterms.clear();
|
||||
ugroups.clear();
|
||||
groups.clear();
|
||||
slacks.clear();
|
||||
grpsugidx.clear();
|
||||
}
|
||||
void append(const HighlightData&);
|
||||
|
||||
// Print (debug)
|
||||
void toString(std::string& out);
|
||||
};
|
||||
|
||||
#endif /* _hldata_h_included_ */
|
||||
@ -36,6 +36,7 @@
|
||||
|
||||
#include "smallut.h"
|
||||
#include "utf8iter.h"
|
||||
#include "hldata.h"
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using namespace std;
|
||||
@ -1038,8 +1039,57 @@ void catstrerror(string *reason, const char *what, int _errno)
|
||||
#endif
|
||||
}
|
||||
|
||||
void HighlightData::toString(std::string& out)
|
||||
{
|
||||
out.append("\nUser terms (orthograph): ");
|
||||
for (std::set<std::string>::const_iterator it = uterms.begin();
|
||||
it != uterms.end(); it++) {
|
||||
out.append(" [").append(*it).append("]");
|
||||
}
|
||||
|
||||
#else
|
||||
out.append("\nGroups: ");
|
||||
char cbuf[200];
|
||||
sprintf(cbuf, "Groups size %d grpsugidx size %d ugroups size %d",
|
||||
int(groups.size()), int(grpsugidx.size()), int(ugroups.size()));
|
||||
out.append(cbuf);
|
||||
|
||||
unsigned int ugidx = (unsigned int)-1;
|
||||
for (unsigned int i = 0; i < groups.size(); i++) {
|
||||
if (ugidx != grpsugidx[i]) {
|
||||
ugidx = grpsugidx[i];
|
||||
out.append("\n(");
|
||||
for (unsigned int j = 0; j < ugroups[ugidx].size(); j++) {
|
||||
out.append("[").append(ugroups[ugidx][j]).append("] ");
|
||||
}
|
||||
out.append(") ->");
|
||||
}
|
||||
out.append(" {");
|
||||
for (unsigned int j = 0; j < groups[i].size(); j++) {
|
||||
out.append("[").append(groups[i][j]).append("]");
|
||||
}
|
||||
sprintf(cbuf, "%d", slacks[i]);
|
||||
out.append("}").append(cbuf);
|
||||
}
|
||||
out.append("\n");
|
||||
fprintf(stderr, "toString ok\n");
|
||||
}
|
||||
|
||||
void HighlightData::append(const HighlightData& hl)
|
||||
{
|
||||
uterms.insert(hl.uterms.begin(), hl.uterms.end());
|
||||
|
||||
size_t ugsz0 = ugroups.size();
|
||||
ugroups.insert(ugroups.end(), hl.ugroups.begin(), hl.ugroups.end());
|
||||
|
||||
groups.insert(groups.end(), hl.groups.begin(), hl.groups.end());
|
||||
slacks.insert(slacks.end(), hl.slacks.begin(), hl.slacks.end());
|
||||
for (std::vector<unsigned int>::const_iterator it = hl.grpsugidx.begin();
|
||||
it != hl.grpsugidx.end(); it++) {
|
||||
grpsugidx.push_back(*it + ugsz0);
|
||||
}
|
||||
}
|
||||
|
||||
#else // TEST_SMALLUT
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user