Implement anchored searches: terms to be found at a maximum distance of the start or end of the text
This commit is contained in:
parent
5a6534113b
commit
ee0d602ab3
@ -20,9 +20,10 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <regex.h>
|
#include <regex.h>
|
||||||
|
|
||||||
|
#include "smallut.h"
|
||||||
#include "wasastringtoquery.h"
|
#include "wasastringtoquery.h"
|
||||||
|
|
||||||
// #define DEB_WASASTRINGTOQ 1
|
#undef DEB_WASASTRINGTOQ
|
||||||
#ifdef DEB_WASASTRINGTOQ
|
#ifdef DEB_WASASTRINGTOQ
|
||||||
#define DPRINT(X) fprintf X
|
#define DPRINT(X) fprintf X
|
||||||
#define DUMPQ(Q) {string D;Q->describe(D);fprintf(stderr, "%s\n", D.c_str());}
|
#define DUMPQ(Q) {string D;Q->describe(D);fprintf(stderr, "%s\n", D.c_str());}
|
||||||
@ -89,13 +90,18 @@ void WasaQuery::describe(string &desc) const
|
|||||||
if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|";
|
if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|";
|
||||||
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
|
if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
|
||||||
if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|";
|
if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|";
|
||||||
if (m_modifiers & WQM_PHRASESLACK) desc += "PHRASESLACK|";
|
if (m_modifiers & WQM_PHRASESLACK) {
|
||||||
|
char buf[100];
|
||||||
|
sprintf(buf, "%d", m_slack);
|
||||||
|
desc += "PHRASESLACK(" + string(buf) + string(")|");
|
||||||
|
}
|
||||||
if (m_modifiers & WQM_PROX) desc += "PROX|";
|
if (m_modifiers & WQM_PROX) desc += "PROX|";
|
||||||
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
|
if (m_modifiers & WQM_REGEX) desc += "REGEX|";
|
||||||
if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|";
|
if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|";
|
||||||
if (m_modifiers & WQM_WORDS) desc += "WORDS|";
|
if (m_modifiers & WQM_WORDS) desc += "WORDS|";
|
||||||
|
|
||||||
if (desc.length() > 0 && desc[desc.length()-1] == '|')
|
if (desc.length() > 0 && desc[desc.length()-1] == '|')
|
||||||
desc = desc.substr(0, desc.length()-1);
|
desc.erase(desc.length()-1);
|
||||||
}
|
}
|
||||||
desc += " ";
|
desc += " ";
|
||||||
}
|
}
|
||||||
@ -224,7 +230,11 @@ StringToWasaQuery::~StringToWasaQuery()
|
|||||||
WasaQuery *
|
WasaQuery *
|
||||||
StringToWasaQuery::stringToQuery(const string& str, string& reason)
|
StringToWasaQuery::stringToQuery(const string& str, string& reason)
|
||||||
{
|
{
|
||||||
return internal ? internal->stringToQuery(str, reason) : 0;
|
if (internal == 0)
|
||||||
|
return 0;
|
||||||
|
WasaQuery *wq = internal->stringToQuery(str, reason);
|
||||||
|
DUMPQ(wq);
|
||||||
|
return wq;
|
||||||
}
|
}
|
||||||
|
|
||||||
WasaQuery *
|
WasaQuery *
|
||||||
@ -316,6 +326,7 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check for quoted or unquoted value
|
// Check for quoted or unquoted value
|
||||||
|
unsigned int mods = 0;
|
||||||
if (checkSubMatch(SMI_QUOTED, match, reason)) {
|
if (checkSubMatch(SMI_QUOTED, match, reason)) {
|
||||||
nclause->m_value = match;
|
nclause->m_value = match;
|
||||||
} else if (checkSubMatch(SMI_TERM, match, reason)) {
|
} else if (checkSubMatch(SMI_TERM, match, reason)) {
|
||||||
@ -332,7 +343,6 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
|||||||
|
|
||||||
if (checkSubMatch(SMI_MODIF, match, reason)) {
|
if (checkSubMatch(SMI_MODIF, match, reason)) {
|
||||||
DPRINT((stderr, "Got modifiers: [%s]\n", match));
|
DPRINT((stderr, "Got modifiers: [%s]\n", match));
|
||||||
unsigned int mods = 0;
|
|
||||||
for (unsigned int i = 0; i < strlen(match); i++) {
|
for (unsigned int i = 0; i < strlen(match); i++) {
|
||||||
switch (match[i]) {
|
switch (match[i]) {
|
||||||
case 'b':
|
case 'b':
|
||||||
@ -350,7 +360,19 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
|||||||
case 'f': mods |= WasaQuery::WQM_FUZZY; break;
|
case 'f': mods |= WasaQuery::WQM_FUZZY; break;
|
||||||
case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
|
case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
|
||||||
case 'L': break;
|
case 'L': break;
|
||||||
case 'o': mods |= WasaQuery::WQM_PHRASESLACK; break;
|
case 'o':
|
||||||
|
mods |= WasaQuery::WQM_PHRASESLACK;
|
||||||
|
// Default slack if specified only by 'o' is 10.
|
||||||
|
nclause->m_slack = 10;
|
||||||
|
if (i < strlen(match) - 1) {
|
||||||
|
char *endptr;
|
||||||
|
int slack = strtol(match+i+1, &endptr, 10);
|
||||||
|
if (endptr != match+i+1) {
|
||||||
|
i += endptr - (match+i+1);
|
||||||
|
nclause->m_slack = slack;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 'p': mods |= WasaQuery::WQM_PROX; break;
|
case 'p': mods |= WasaQuery::WQM_PROX; break;
|
||||||
case 'r': mods |= WasaQuery::WQM_REGEX; break;
|
case 'r': mods |= WasaQuery::WQM_REGEX; break;
|
||||||
case 's': mods |= WasaQuery::WQM_SLOPPY; break;
|
case 's': mods |= WasaQuery::WQM_SLOPPY; break;
|
||||||
@ -370,8 +392,8 @@ StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nclause->m_modifiers = WasaQuery::Modifier(mods);
|
|
||||||
}
|
}
|
||||||
|
nclause->m_modifiers = WasaQuery::Modifier(mods);
|
||||||
|
|
||||||
// Field indicator ?
|
// Field indicator ?
|
||||||
if (checkSubMatch(SMI_FIELD, match, reason)) {
|
if (checkSubMatch(SMI_FIELD, match, reason)) {
|
||||||
|
|||||||
@ -63,7 +63,7 @@ public:
|
|||||||
typedef vector<WasaQuery*> subqlist_t;
|
typedef vector<WasaQuery*> subqlist_t;
|
||||||
|
|
||||||
WasaQuery()
|
WasaQuery()
|
||||||
: m_op(OP_NULL), m_modifiers(0), m_weight(1.0)
|
: m_op(OP_NULL), m_modifiers(0), m_slack(0), m_weight(1.0)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
~WasaQuery();
|
~WasaQuery();
|
||||||
@ -86,6 +86,7 @@ public:
|
|||||||
vector<WasaQuery*> m_subs;
|
vector<WasaQuery*> m_subs;
|
||||||
|
|
||||||
unsigned int m_modifiers;
|
unsigned int m_modifiers;
|
||||||
|
int m_slack;
|
||||||
float m_weight;
|
float m_weight;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -134,8 +134,9 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
case WasaQuery::OP_LEAF: {
|
case WasaQuery::OP_LEAF: {
|
||||||
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
|
LOGDEB(("wasaQueryToRcl: leaf clause [%s]:[%s] slack %d\n",
|
||||||
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str(),
|
||||||
|
(*it)->m_slack));
|
||||||
|
|
||||||
// Change terms found in the "autosuffs" list into "ext"
|
// Change terms found in the "autosuffs" list into "ext"
|
||||||
// field queries
|
// field queries
|
||||||
@ -152,15 +153,17 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
|||||||
|
|
||||||
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
||||||
|
|
||||||
if (TextSplit::hasVisibleWhite((*it)->m_value)) {
|
// I'm not sure I understand the phrase/near detection
|
||||||
int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0;
|
// thereafter anymore, maybe it would be better to have an
|
||||||
|
// explicit flag. Mods can only be set after a double
|
||||||
|
// quote.
|
||||||
|
if (TextSplit::hasVisibleWhite((*it)->m_value) || mods) {
|
||||||
Rcl::SClType tp = Rcl::SCLT_PHRASE;
|
Rcl::SClType tp = Rcl::SCLT_PHRASE;
|
||||||
if (mods & WasaQuery::WQM_PROX) {
|
if (mods & WasaQuery::WQM_PROX) {
|
||||||
tp = Rcl::SCLT_NEAR;
|
tp = Rcl::SCLT_NEAR;
|
||||||
slack = 10;
|
|
||||||
}
|
}
|
||||||
nclause = new Rcl::SearchDataClauseDist(tp, (*it)->m_value,
|
nclause = new Rcl::SearchDataClauseDist(tp, (*it)->m_value,
|
||||||
slack,
|
(*it)->m_slack,
|
||||||
(*it)->m_fieldspec);
|
(*it)->m_fieldspec);
|
||||||
} else {
|
} else {
|
||||||
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
||||||
@ -173,7 +176,7 @@ static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (mods & WasaQuery::WQM_NOSTEM) {
|
if (mods & WasaQuery::WQM_NOSTEM) {
|
||||||
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
nclause->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
||||||
}
|
}
|
||||||
if ((*it)->m_weight != 1.0)
|
if ((*it)->m_weight != 1.0)
|
||||||
nclause->setWeight((*it)->m_weight);
|
nclause->setWeight((*it)->m_weight);
|
||||||
|
|||||||
@ -73,6 +73,9 @@ namespace Rcl {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
const string pathelt_prefix = "XP";
|
const string pathelt_prefix = "XP";
|
||||||
|
const string start_of_field_term = "XXST";
|
||||||
|
const string end_of_field_term = "XXND";
|
||||||
|
|
||||||
// This is used as a marker inside the abstract frag lists, but
|
// This is used as a marker inside the abstract frag lists, but
|
||||||
// normally doesn't remain in final output (which is built with a
|
// normally doesn't remain in final output (which is built with a
|
||||||
// custom sep. by our caller).
|
// custom sep. by our caller).
|
||||||
@ -831,6 +834,8 @@ class TextSplitDb : public TextSplit {
|
|||||||
Xapian::Document &d, StopList &_stops)
|
Xapian::Document &d, StopList &_stops)
|
||||||
: db(idb), doc(d), basepos(1), curpos(0), stops(_stops), wdfinc(1)
|
: db(idb), doc(d), basepos(1), curpos(0), stops(_stops), wdfinc(1)
|
||||||
{}
|
{}
|
||||||
|
// Reimplement text_to_words to add start and end special terms
|
||||||
|
virtual bool text_to_words(const string &in);
|
||||||
bool takeword(const std::string &term, int pos, int, int);
|
bool takeword(const std::string &term, int pos, int, int);
|
||||||
void setprefix(const string& pref) {prefix = pref;}
|
void setprefix(const string& pref) {prefix = pref;}
|
||||||
void setwdfinc(int i) {wdfinc = i;}
|
void setwdfinc(int i) {wdfinc = i;}
|
||||||
@ -843,6 +848,38 @@ private:
|
|||||||
int wdfinc;
|
int wdfinc;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
bool TextSplitDb::text_to_words(const string &in)
|
||||||
|
{
|
||||||
|
LOGDEB(("TextSplitDb::text_to_words\n"));
|
||||||
|
string ermsg;
|
||||||
|
try {
|
||||||
|
// Index the possibly prefixed start term.
|
||||||
|
doc.add_posting(prefix + start_of_field_term, basepos, wdfinc);
|
||||||
|
++basepos;
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!TextSplit::text_to_words(in)) {
|
||||||
|
LOGDEB(("TextSplitDb: TextSplit::text_to_words failed\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Index the possibly prefixed end term.
|
||||||
|
doc.add_posting(prefix + end_of_field_term, basepos+curpos+1, wdfinc);
|
||||||
|
++basepos;
|
||||||
|
} XCATCHERROR(ermsg);
|
||||||
|
if (!ermsg.empty()) {
|
||||||
|
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Get one term from the doc, remove accents and lowercase, then add posting
|
// Get one term from the doc, remove accents and lowercase, then add posting
|
||||||
bool TextSplitDb::takeword(const std::string &_term, int pos, int, int)
|
bool TextSplitDb::takeword(const std::string &_term, int pos, int, int)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -287,6 +287,8 @@ private:
|
|||||||
string version_string();
|
string version_string();
|
||||||
|
|
||||||
extern const string pathelt_prefix;
|
extern const string pathelt_prefix;
|
||||||
|
extern const string start_of_field_term;
|
||||||
|
extern const string end_of_field_term;
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
}
|
}
|
||||||
#endif // NO_NAMESPACES
|
#endif // NO_NAMESPACES
|
||||||
|
|||||||
@ -510,13 +510,13 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void expandTerm(bool dont, const string& term, list<string>& exp,
|
void expandTerm(bool dont, const string& term, list<string>& exp,
|
||||||
string& sterm, string *prefix = 0);
|
string& sterm, const string& prefix);
|
||||||
// After splitting entry on whitespace: process non-phrase element
|
// After splitting entry on whitespace: process non-phrase element
|
||||||
void processSimpleSpan(const string& span, bool nostemexp, list<Xapian::Query> &pqueries);
|
void processSimpleSpan(const string& span, bool nostemexp, list<Xapian::Query> &pqueries);
|
||||||
// Process phrase/near element
|
// Process phrase/near element
|
||||||
void processPhraseOrNear(TextSplitQ *splitData,
|
void processPhraseOrNear(TextSplitQ *splitData,
|
||||||
list<Xapian::Query> &pqueries,
|
list<Xapian::Query> &pqueries,
|
||||||
bool useNear, int slack);
|
bool useNear, int slack, int mods);
|
||||||
|
|
||||||
Db& m_db;
|
Db& m_db;
|
||||||
const string& m_field;
|
const string& m_field;
|
||||||
@ -554,7 +554,7 @@ static void listVector(const string& what, const vector<string>&l)
|
|||||||
void StringToXapianQ::expandTerm(bool nostemexp,
|
void StringToXapianQ::expandTerm(bool nostemexp,
|
||||||
const string& term,
|
const string& term,
|
||||||
list<string>& exp,
|
list<string>& exp,
|
||||||
string &sterm, string *prefix)
|
string &sterm, const string& prefix)
|
||||||
{
|
{
|
||||||
LOGDEB2(("expandTerm: field [%s] term [%s] stemlang [%s] nostemexp %d\n",
|
LOGDEB2(("expandTerm: field [%s] term [%s] stemlang [%s] nostemexp %d\n",
|
||||||
m_field.c_str(), term.c_str(), m_stemlang.c_str(), nostemexp));
|
m_field.c_str(), term.c_str(), m_stemlang.c_str(), nostemexp));
|
||||||
@ -571,29 +571,20 @@ void StringToXapianQ::expandTerm(bool nostemexp,
|
|||||||
nostemexp = true;
|
nostemexp = true;
|
||||||
|
|
||||||
if (nostemexp && !haswild) {
|
if (nostemexp && !haswild) {
|
||||||
// Neither stemming nor wildcard expansion: just the word
|
|
||||||
string pfx;
|
|
||||||
const FieldTraits *ftp;
|
|
||||||
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
|
||||||
pfx = ftp->pfx;
|
|
||||||
}
|
|
||||||
|
|
||||||
sterm = term;
|
sterm = term;
|
||||||
m_uterms.push_back(sterm);
|
m_uterms.push_back(sterm);
|
||||||
exp.push_front(pfx+term);
|
exp.push_front(prefix + term);
|
||||||
exp.resize(1);
|
exp.resize(1);
|
||||||
if (prefix)
|
|
||||||
*prefix = pfx;
|
|
||||||
} else {
|
} else {
|
||||||
TermMatchResult res;
|
TermMatchResult res;
|
||||||
if (haswild) {
|
if (haswild) {
|
||||||
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
|
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
|
||||||
m_field, prefix);
|
m_field);
|
||||||
} else {
|
} else {
|
||||||
sterm = term;
|
sterm = term;
|
||||||
m_uterms.push_back(sterm);
|
m_uterms.push_back(sterm);
|
||||||
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field,
|
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
|
||||||
prefix);
|
m_field);
|
||||||
}
|
}
|
||||||
for (list<TermMatchEntry>::const_iterator it = res.entries.begin();
|
for (list<TermMatchEntry>::const_iterator it = res.entries.begin();
|
||||||
it != res.entries.end(); it++) {
|
it != res.entries.end(); it++) {
|
||||||
@ -642,8 +633,15 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
|||||||
{
|
{
|
||||||
list<string> exp;
|
list<string> exp;
|
||||||
string sterm; // dumb version of user term
|
string sterm; // dumb version of user term
|
||||||
|
|
||||||
string prefix;
|
string prefix;
|
||||||
expandTerm(nostemexp, span, exp, sterm, &prefix);
|
const FieldTraits *ftp;
|
||||||
|
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
||||||
|
prefix = ftp->pfx;
|
||||||
|
}
|
||||||
|
|
||||||
|
expandTerm(nostemexp, span, exp, sterm, prefix);
|
||||||
|
|
||||||
// m_terms is used for highlighting, we don't want prefixes in there.
|
// m_terms is used for highlighting, we don't want prefixes in there.
|
||||||
for (list<string>::const_iterator it = exp.begin();
|
for (list<string>::const_iterator it = exp.begin();
|
||||||
it != exp.end(); it++) {
|
it != exp.end(); it++) {
|
||||||
@ -658,10 +656,9 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
|||||||
// less wqf). This does not happen if there are wildcards anywhere
|
// less wqf). This does not happen if there are wildcards anywhere
|
||||||
// in the search.
|
// in the search.
|
||||||
if (m_doBoostUserTerms && !sterm.empty()) {
|
if (m_doBoostUserTerms && !sterm.empty()) {
|
||||||
xq = Xapian::Query(Xapian::Query::OP_OR,
|
xq = Xapian::Query(Xapian::Query::OP_OR, xq,
|
||||||
xq,
|
Xapian::Query(prefix+sterm,
|
||||||
Xapian::Query(prefix+sterm,
|
original_term_wqf_booster));
|
||||||
original_term_wqf_booster));
|
|
||||||
}
|
}
|
||||||
pqueries.push_back(xq);
|
pqueries.push_back(xq);
|
||||||
}
|
}
|
||||||
@ -672,7 +669,7 @@ void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
|||||||
// don't do stemming for PHRASE though)
|
// don't do stemming for PHRASE though)
|
||||||
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
||||||
list<Xapian::Query> &pqueries,
|
list<Xapian::Query> &pqueries,
|
||||||
bool useNear, int slack)
|
bool useNear, int slack, int mods)
|
||||||
{
|
{
|
||||||
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
||||||
Xapian::Query::OP_PHRASE;
|
Xapian::Query::OP_PHRASE;
|
||||||
@ -680,6 +677,17 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|||||||
bool hadmultiple = false;
|
bool hadmultiple = false;
|
||||||
vector<vector<string> >groups;
|
vector<vector<string> >groups;
|
||||||
|
|
||||||
|
string prefix;
|
||||||
|
const FieldTraits *ftp;
|
||||||
|
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
||||||
|
prefix = ftp->pfx;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
|
||||||
|
orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
|
||||||
|
slack++;
|
||||||
|
}
|
||||||
|
|
||||||
// Go through the list and perform stem/wildcard expansion for each element
|
// Go through the list and perform stem/wildcard expansion for each element
|
||||||
vector<bool>::iterator nxit = splitData->nostemexps.begin();
|
vector<bool>::iterator nxit = splitData->nostemexps.begin();
|
||||||
for (vector<string>::iterator it = splitData->terms.begin();
|
for (vector<string>::iterator it = splitData->terms.begin();
|
||||||
@ -691,8 +699,7 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|||||||
|
|
||||||
string sterm;
|
string sterm;
|
||||||
list<string>exp;
|
list<string>exp;
|
||||||
string prefix;
|
expandTerm(nostemexp, *it, exp, sterm, prefix);
|
||||||
expandTerm(nostemexp, *it, exp, sterm, &prefix);
|
|
||||||
|
|
||||||
// groups is used for highlighting, we don't want prefixes in there.
|
// groups is used for highlighting, we don't want prefixes in there.
|
||||||
vector<string> noprefs;
|
vector<string> noprefs;
|
||||||
@ -709,6 +716,11 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mods & Rcl::SearchDataClause::SDCM_ANCHOREND) {
|
||||||
|
orqueries.push_back(Xapian::Query(prefix + end_of_field_term));
|
||||||
|
slack++;
|
||||||
|
}
|
||||||
|
|
||||||
// Generate an appropriate PHRASE/NEAR query with adjusted slack
|
// Generate an appropriate PHRASE/NEAR query with adjusted slack
|
||||||
// For phrases, give a relevance boost like we do for original terms
|
// For phrases, give a relevance boost like we do for original terms
|
||||||
LOGDEB2(("PHRASE/NEAR: alltermcount %d lastpos %d\n",
|
LOGDEB2(("PHRASE/NEAR: alltermcount %d lastpos %d\n",
|
||||||
@ -727,6 +739,23 @@ void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|||||||
m_groups.insert(m_groups.end(), allcombs.begin(), allcombs.end());
|
m_groups.insert(m_groups.end(), allcombs.begin(), allcombs.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Trim string beginning with ^ or ending with $ and convert to flags
|
||||||
|
static int stringToMods(string& s)
|
||||||
|
{
|
||||||
|
int mods = 0;
|
||||||
|
// Check for an anchored search
|
||||||
|
trimstring(s);
|
||||||
|
if (s.length() > 0 && s[0] == '^') {
|
||||||
|
mods |= Rcl::SearchDataClause::SDCM_ANCHORSTART;
|
||||||
|
s.erase(0, 1);
|
||||||
|
}
|
||||||
|
if (s.length() > 0 && s[s.length()-1] == '$') {
|
||||||
|
mods |= Rcl::SearchDataClause::SDCM_ANCHOREND;
|
||||||
|
s.erase(s.length()-1);
|
||||||
|
}
|
||||||
|
return mods;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Turn user entry string (NOT query language) into a list of xapian queries.
|
* Turn user entry string (NOT query language) into a list of xapian queries.
|
||||||
* We just separate words and phrases, and do wildcard and stem expansion,
|
* We just separate words and phrases, and do wildcard and stem expansion,
|
||||||
@ -772,7 +801,8 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
for (list<string>::iterator it = phrases.begin();
|
for (list<string>::iterator it = phrases.begin();
|
||||||
it != phrases.end(); it++) {
|
it != phrases.end(); it++) {
|
||||||
LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
|
LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
|
||||||
|
int mods = stringToMods(*it);
|
||||||
|
int terminc = mods != 0 ? 1 : 0;
|
||||||
// If there are multiple spans in this element, including
|
// If there are multiple spans in this element, including
|
||||||
// at least one composite, we have to increase the slack
|
// at least one composite, we have to increase the slack
|
||||||
// else a phrase query including a span would fail.
|
// else a phrase query including a span would fail.
|
||||||
@ -803,7 +833,7 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOGDEB0(("strToXapianQ: termcount: %d\n", splitter->terms.size()));
|
LOGDEB0(("strToXapianQ: termcount: %d\n", splitter->terms.size()));
|
||||||
switch (splitter->terms.size()) {
|
switch (splitter->terms.size() + terminc) {
|
||||||
case 0:
|
case 0:
|
||||||
continue;// ??
|
continue;// ??
|
||||||
case 1:
|
case 1:
|
||||||
@ -811,7 +841,7 @@ bool StringToXapianQ::processUserString(const string &iq,
|
|||||||
splitter->nostemexps.front(), pqueries);
|
splitter->nostemexps.front(), pqueries);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
processPhraseOrNear(splitter, pqueries, useNear, slack);
|
processPhraseOrNear(splitter, pqueries, useNear, slack, mods);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (const Xapian::Error &e) {
|
} catch (const Xapian::Error &e) {
|
||||||
|
|||||||
@ -165,7 +165,8 @@ private:
|
|||||||
|
|
||||||
class SearchDataClause {
|
class SearchDataClause {
|
||||||
public:
|
public:
|
||||||
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1};
|
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
|
||||||
|
SDCM_ANCHOREND=4};
|
||||||
|
|
||||||
SearchDataClause(SClType tp)
|
SearchDataClause(SClType tp)
|
||||||
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
||||||
@ -182,6 +183,12 @@ public:
|
|||||||
SClType getTp() {return m_tp;}
|
SClType getTp() {return m_tp;}
|
||||||
void setParent(SearchData *p) {m_parentSearch = p;}
|
void setParent(SearchData *p) {m_parentSearch = p;}
|
||||||
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
|
virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
|
||||||
|
virtual int getModifiers() {return m_modifiers;}
|
||||||
|
virtual void addModifier(Modifier mod) {
|
||||||
|
int imod = getModifiers();
|
||||||
|
imod |= mod;
|
||||||
|
setModifiers(Modifier(imod));
|
||||||
|
}
|
||||||
virtual void setWeight(float w) {m_weight = w;}
|
virtual void setWeight(float w) {m_weight = w;}
|
||||||
friend class SearchData;
|
friend class SearchData;
|
||||||
|
|
||||||
|
|||||||
31
tests/anchor/anchor.sh
Executable file
31
tests/anchor/anchor.sh
Executable file
@ -0,0 +1,31 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
topdir=`dirname $0`/..
|
||||||
|
. $topdir/shared.sh
|
||||||
|
|
||||||
|
initvariables $0
|
||||||
|
(
|
||||||
|
for q in \
|
||||||
|
'"^anchortermeaudebut"' \
|
||||||
|
'"^ anchortermeunpeuplusloin"' \
|
||||||
|
'"^anchortermeunpeuplusloin"o30' \
|
||||||
|
'"^ anchortermeunpeuplusloin"o30' \
|
||||||
|
'"anchortermenullepart"' \
|
||||||
|
'"^anchortermenullepart"' \
|
||||||
|
'"anchortermenullepart $"' \
|
||||||
|
'"anchortermeunpeumoinsloin$"o30' \
|
||||||
|
'"anchortermeunpeumoinsloin$"' \
|
||||||
|
'"anchortermealafin$"' \
|
||||||
|
'title:"^anchortitlebegin"' \
|
||||||
|
'title:"^anchortitleend"' \
|
||||||
|
'title:"anchortitleend$"' \
|
||||||
|
; do
|
||||||
|
echo $q
|
||||||
|
recollq -q $q
|
||||||
|
done
|
||||||
|
|
||||||
|
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||||
|
|
||||||
|
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
|
||||||
|
|
||||||
|
checkresult
|
||||||
34
tests/anchor/anchor.txt
Normal file
34
tests/anchor/anchor.txt
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
"^anchortermeaudebut"
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
|
"^ anchortermeunpeuplusloin"
|
||||||
|
0 results
|
||||||
|
"^anchortermeunpeuplusloin"o30
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
|
"^ anchortermeunpeuplusloin"o30
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
|
"anchortermenullepart"
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
|
"^anchortermenullepart"
|
||||||
|
0 results
|
||||||
|
"anchortermenullepart $"
|
||||||
|
0 results
|
||||||
|
"anchortermeunpeumoinsloin$"o30
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
|
"anchortermeunpeumoinsloin$"
|
||||||
|
0 results
|
||||||
|
"anchortermealafin$"
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
|
title:"^anchortitlebegin"
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
|
title:"^anchortitleend"
|
||||||
|
0 results
|
||||||
|
title:"anchortitleend$"
|
||||||
|
1 results
|
||||||
|
text/html [file:///home/dockes/projets/fulltext/testrecoll/anchor/tryanchor.html] [anchortitlebegin anchortitlemiddle anchortitleend] 1463 bytes
|
||||||
Loading…
x
Reference in New Issue
Block a user