make searchdata a more flexible struct

This commit is contained in:
dockes 2006-11-13 08:50:07 +00:00
parent 1d7f103fe7
commit cdbf026738
6 changed files with 696 additions and 522 deletions

View File

@ -8,8 +8,8 @@ LIBS = librcl.a
all: $(LIBS)
OBJS = conftree.o csguess.o debuglog.o execmd.o idfile.o md5.o wipedir.o fstreewalk.o mh_html.o mh_mail.o mh_exec.o mh_text.o htmlparse.o indexer.o internfile.o mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathhash.o pathut.o rclconfig.o rcldb.o rclinit.o stemdb.o base64.o readfile.o smallut.o textsplit.o transcode.o unacpp.o history.o docseq.o sortseq.o copyfile.o rclaspell.o
DEPS = conftree.dep.stamp csguess.dep.stamp debuglog.dep.stamp execmd.dep.stamp idfile.dep.stamp md5.dep.stamp wipedir.dep.stamp fstreewalk.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_exec.dep.stamp mh_text.dep.stamp htmlparse.dep.stamp indexer.dep.stamp internfile.dep.stamp mimehandler.dep.stamp mimeparse.dep.stamp mimetype.dep.stamp myhtmlparse.dep.stamp pathhash.dep.stamp pathut.dep.stamp rclconfig.dep.stamp rcldb.dep.stamp rclinit.dep.stamp stemdb.dep.stamp base64.dep.stamp readfile.dep.stamp smallut.dep.stamp textsplit.dep.stamp transcode.dep.stamp unacpp.dep.stamp history.dep.stamp docseq.dep.stamp sortseq.dep.stamp copyfile.dep.stamp rclaspell.dep.stamp
OBJS = conftree.o csguess.o debuglog.o execmd.o idfile.o md5.o wipedir.o fstreewalk.o mh_html.o mh_mail.o searchdata.o mh_exec.o mh_text.o htmlparse.o indexer.o internfile.o mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathhash.o pathut.o rclconfig.o rcldb.o rclinit.o stemdb.o base64.o readfile.o smallut.o textsplit.o transcode.o unacpp.o history.o docseq.o sortseq.o copyfile.o rclaspell.o
DEPS = conftree.dep.stamp csguess.dep.stamp debuglog.dep.stamp execmd.dep.stamp idfile.dep.stamp md5.dep.stamp wipedir.dep.stamp fstreewalk.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp searchdata.dep.stamp mh_exec.dep.stamp mh_text.dep.stamp htmlparse.dep.stamp indexer.dep.stamp internfile.dep.stamp mimehandler.dep.stamp mimeparse.dep.stamp mimetype.dep.stamp myhtmlparse.dep.stamp pathhash.dep.stamp pathut.dep.stamp rclconfig.dep.stamp rcldb.dep.stamp rclinit.dep.stamp stemdb.dep.stamp base64.dep.stamp readfile.dep.stamp smallut.dep.stamp textsplit.dep.stamp transcode.dep.stamp unacpp.dep.stamp history.dep.stamp docseq.dep.stamp sortseq.dep.stamp copyfile.dep.stamp rclaspell.dep.stamp
librcl.a : $(DEPS) $(OBJS) unac.o
ar ru librcl.a $(OBJS) unac.o
@ -37,6 +37,8 @@ mh_html.o : ../common/mh_html.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../common/mh_html.cpp
mh_mail.o : ../common/mh_mail.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../common/mh_mail.cpp
searchdata.o : ../common/searchdata.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../common/searchdata.cpp
mh_exec.o : ../common/mh_exec.cpp
$(CXX) $(ALL_CXXFLAGS) -c ../common/mh_exec.cpp
mh_text.o : ../common/mh_text.cpp
@ -125,6 +127,9 @@ mh_html.dep.stamp : ../common/mh_html.cpp
mh_mail.dep.stamp : ../common/mh_mail.cpp
$(CXX) -M $(ALL_CXXFLAGS) ../common/mh_mail.cpp > mh_mail.dep
touch mh_mail.dep.stamp
searchdata.dep.stamp : ../common/searchdata.cpp
$(CXX) -M $(ALL_CXXFLAGS) ../common/searchdata.cpp > searchdata.dep
touch searchdata.dep.stamp
mh_exec.dep.stamp : ../common/mh_exec.cpp
$(CXX) -M $(ALL_CXXFLAGS) ../common/mh_exec.cpp > mh_exec.dep
touch mh_exec.dep.stamp
@ -213,6 +218,7 @@ include wipedir.dep
include fstreewalk.dep
include mh_html.dep
include mh_mail.dep
include searchdata.dep
include mh_exec.dep
include mh_text.dep
include htmlparse.dep

View File

@ -8,6 +8,7 @@ SRCS="${depth}/utils/conftree.cpp ${depth}/index/csguess.cpp \
${depth}/utils/idfile.cpp ${depth}/utils/md5.cpp \
${depth}/utils/wipedir.cpp ${depth}/utils/fstreewalk.cpp \
${depth}/common/mh_html.cpp ${depth}/common/mh_mail.cpp \
${depth}/common/searchdata.cpp \
${depth}/common/mh_exec.cpp ${depth}/common/mh_text.cpp \
${depth}/common/htmlparse.cpp ${depth}/index/indexer.cpp \
${depth}/common/internfile.cpp ${depth}/common/mimehandler.cpp \

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.90 2006-11-12 08:35:11 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.91 2006-11-13 08:49:44 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
@ -174,6 +174,229 @@ bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids)
return false;
}
bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
int qopts,
Xapian::docid docid, const list<string>& terms)
{
LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str()));
ConfSimple parms(&data);
if (!parms.ok())
return false;
parms.get(string("url"), doc.url);
parms.get(string("mtype"), doc.mimetype);
parms.get(string("fmtime"), doc.fmtime);
parms.get(string("dmtime"), doc.dmtime);
parms.get(string("origcharset"), doc.origcharset);
parms.get(string("caption"), doc.title);
parms.get(string("keywords"), doc.keywords);
parms.get(string("abstract"), doc.abstract);
// Possibly remove synthetic abstract indicator (if it's there, we
// used to index the beginning of the text as abstract).
bool syntabs = false;
if (doc.abstract.find(rclSyntAbs) == 0) {
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
syntabs = true;
}
// If the option is set and the abstract is synthetic or empty , build
// abstract from position data.
if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
LOGDEB(("dbDataToRclDoc:: building abstract from position data\n"));
if (doc.abstract.empty() || syntabs ||
(qopts & Db::QO_REPLACE_ABSTRACT))
doc.abstract = makeAbstract(docid, terms);
}
parms.get(string("ipath"), doc.ipath);
parms.get(string("fbytes"), doc.fbytes);
parms.get(string("dbytes"), doc.dbytes);
doc.xdocid = docid;
return true;
}
// We build a possibly full size but sparsely populated (only around
// the search term occurrences) reconstruction of the document. It
// would be possible to compress the array, by having only multiple
// chunks around the terms, but this would seriously complicate the
// data structure.
string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
{
LOGDEB(("Native::makeAbstract: maxlen %d wWidth %d\n",
m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
Chrono chron;
// For each of the query terms, query xapian for its positions
// list in the document. For each position entry, remember it in qtermposs
// and insert it and its neighbours in the set of 'interesting' positions
// The terms 'array' that we partially populate with the document
// terms, at their positions around the search terms positions:
map<unsigned int, string> sparseDoc;
// All the query term positions. We remember this mainly because we are
// going to random-shuffle it for selecting the chunks that we actually
// print.
vector<unsigned int> qtermposs;
// Limit the total number of slots we populate.
const unsigned int maxtotaloccs = 300;
// Max occurrences per term. We initially know nothing about the
// occurrences repartition (it would be possible that only one
// term in the list occurs, or that all do). So this is a rather
// arbitrary choice.
const unsigned int maxoccperterm = maxtotaloccs / 10;
unsigned int totaloccs = 0;
for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();
qit++) {
Xapian::PositionIterator pos;
// There may be query terms not in this doc. This raises an
// exception when requesting the position list, we catch it.
string emptys;
try {
unsigned int occurrences = 0;
for (pos = db.positionlist_begin(docid, *qit);
pos != db.positionlist_end(docid, *qit); pos++) {
unsigned int ipos = *pos;
LOGDEB2(("Abstract: [%s] at %d\n", qit->c_str(), ipos));
// Remember the term position
qtermposs.push_back(ipos);
// Add adjacent slots to the set to populate at next step
unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen;
for (unsigned int ii = sta; ii <= sto; ii++) {
if (ii == ipos)
sparseDoc[ii] = *qit;
else
sparseDoc[ii] = emptys;
}
// Limit the number of occurences we keep for each
// term. The abstract has a finite length anyway !
if (occurrences++ > maxoccperterm)
break;
}
} catch (...) {
// Term does not occur. No problem.
}
// Limit total size
if (totaloccs++ > maxtotaloccs)
break;
}
LOGDEB(("Abstract:%d:chosen number of positions %d. Populating\n",
chron.millis(), qtermposs.size()));
// Walk the full document position list (for each term walk
// position list) and populate slots around the query terms. We
// arbitrarily truncate the list to avoid taking forever. If we do
// cutoff, the abstract may be inconsistant, which is bad...
{
Xapian::TermIterator term;
int cutoff = 500 * 1000;
for (term = db.termlist_begin(docid);
term != db.termlist_end(docid); term++) {
if (cutoff-- < 0) {
LOGDEB(("Abstract: max term count cutoff\n"));
break;
}
Xapian::PositionIterator pos;
for (pos = db.positionlist_begin(docid, *term);
pos != db.positionlist_end(docid, *term); pos++) {
if (cutoff-- < 0) {
LOGDEB(("Abstract: max term count cutoff\n"));
break;
}
map<unsigned int, string>::iterator vit;
if ((vit=sparseDoc.find(*pos)) != sparseDoc.end()) {
// Don't replace a term: the terms list is in
// alphabetic order, and we may have several terms
// at the same position, we want to keep only the
// first one (ie: dockes and dockes@wanadoo.fr)
if (vit->second.empty()) {
LOGDEB2(("Abstract: populating: [%s] at %d\n",
(*term).c_str(), *pos));
sparseDoc[*pos] = *term;
}
}
}
}
}
#if 0
// Debug only: output the full term[position] vector
bool epty = false;
int ipos = 0;
for (map<unsigned int, string>::iterator it = sparseDoc.begin();
it != sparseDoc.end();
it++, ipos++) {
if (it->empty()) {
if (!epty)
LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
epty=true;
} else {
epty = false;
LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
}
}
#endif
LOGDEB(("Abstract:%d: randomizing and extracting\n", chron.millis()));
// We randomize the selection of term positions, from which we
// shall pull, starting at the beginning, until the abstract is
// big enough. The abstract is finally built in correct position
// order, thanks to the position map.
random_shuffle(qtermposs.begin(), qtermposs.end());
map<unsigned int, string> mabs;
unsigned int abslen = 0;
// Extract data around the N first (in random order) query term
// positions, and store the terms in the map. Don't concatenate
// immediately into chunks because there might be overlaps
for (vector<unsigned int>::const_iterator pos = qtermposs.begin();
pos != qtermposs.end(); pos++) {
if (int(abslen) > m_db->m_synthAbsLen)
break;
unsigned int sta = MAX(0, *pos - m_db->m_synthAbsWordCtxLen);
unsigned int sto = *pos + m_db->m_synthAbsWordCtxLen;
LOGDEB2(("Abstract: %d<-%d->%d\n", sta, *pos, sto));
for (unsigned int ii = sta; ii <= sto; ii++) {
if (int(abslen) > m_db->m_synthAbsLen)
break;
map<unsigned int, string>::const_iterator vit =
sparseDoc.find(ii);
if (vit != sparseDoc.end() && !vit->second.empty()) {
LOGDEB2(("Abstract: position %d -> [%s]\n",
ii, vit->second.c_str()));
mabs[ii] = vit->second;
abslen += vit->second.length();
} else {
LOGDEB2(("Abstract: empty position at %d\n", ii));
}
}
// Possibly add a ... at the end of chunk if it's not
// overlapping
if (mabs.find(sto+1) == mabs.end())
mabs[sto+1] = "...";
}
// Build the abstract by walking the map (in order of position)
string abstract;
for (map<unsigned int, string>::const_iterator it = mabs.begin();
it != mabs.end(); it++) {
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
abstract += it->second + " ";
}
LOGDEB(("Abtract: done in %d mS\n", chron.millis()));
return abstract;
}
/* Rcl::Db methods ///////////////////////////////// */
@ -909,279 +1132,67 @@ bool Db::purgeFile(const string &fn)
return false;
}
// Splitter callback for breaking query into terms
class wsQData : public TextSplitCB {
public:
vector<string> terms;
string catterms() {
string s;
for (unsigned int i=0;i<terms.size();i++) {
s += "[" + terms[i] + "] ";
}
return s;
}
bool takeword(const std::string &term, int , int, int) {
LOGDEB1(("wsQData::takeword: %s\n", term.c_str()));
terms.push_back(term);
return true;
}
void dumball() {
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
string dumb;
dumb_string(*it, dumb);
*it = dumb;
}
}
};
// Turn string into list of xapian queries. There is little
// interpretation done on the string (no +term -term or filename:term
// stuff). We just separate words and phrases, and interpret
// capitalized terms as wanting no stem expansion.
// The final list contains one query for each term or phrase
// - Elements corresponding to a stem-expanded part are an OP_OR
// composition of the stem-expanded terms (or a single term query).
// - Elements corresponding to a phrase are an OP_PHRASE composition of the
// phrase terms (no stem expansion in this case)
static void stringToXapianQueries(const string &iq,
const string& stemlang,
Db *db,
list<Xapian::Query> &pqueries,
unsigned int opts = Db::QO_NONE)
bool Db::filenameWildExp(const string& fnexp, list<string>& names)
{
string qstring = iq;
// File name search, with possible wildcards.
// We expand wildcards by scanning the filename terms (prefixed
// with XSFN) from the database.
// We build an OR query with the expanded values if any.
string pattern;
dumb_string(fnexp, pattern);
// Split into (possibly single word) phrases ("this is a phrase"):
list<string> phrases;
stringToStrings(qstring, phrases);
// If pattern is not quoted, and has no wildcards, we add * at
// each end: match any substring
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
pattern = pattern.substr(1, pattern.size() -2);
} else if (pattern.find_first_of("*?[") == string::npos) {
pattern = "*" + pattern + "*";
} // else let it be
// Then process each phrase: split into terms and transform into
// appropriate Xapian Query
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
for (list<string>::iterator it=phrases.begin(); it !=phrases.end(); it++) {
LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
// If there are both spans and single words in this element,
// we need to use a word split, else a phrase query including
// a span would fail if we didn't adjust the proximity to
// account for the additional span term which is complicated.
wsQData splitDataS, splitDataW;
TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS);
splitterS.text_to_words(*it);
TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS);
splitterW.text_to_words(*it);
wsQData& splitData = splitDataS;
if (splitDataS.terms.size() > 1 && splitDataS.terms.size() !=
splitDataW.terms.size())
splitData = splitDataW;
LOGDEB1(("strToXapianQ: splitter term count: %d\n",
splitData.terms.size()));
switch(splitData.terms.size()) {
case 0: continue;// ??
case 1: // Not a real phrase: one term
{
string term = splitData.terms.front();
bool nostemexp = false;
// Check if the first letter is a majuscule in which
// case we do not want to do stem expansion. Note that
// the test is convoluted and possibly problematic
if (term.length() > 0) {
string noacterm,noaclowterm;
if (unacmaybefold(term, noacterm, "UTF-8", false) &&
unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) {
Utf8Iter it1(noacterm);
Utf8Iter it2(noaclowterm);
if (*it1 != *it2)
nostemexp = true;
}
}
LOGDEB1(("Term: %s stem expansion: %s\n",
term.c_str(), nostemexp?"no":"yes"));
list<string> exp;
string term1;
dumb_string(term, term1);
// Possibly perform stem compression/expansion
if (!nostemexp && (opts & Db::QO_STEM)) {
exp = db->stemExpand(stemlang, term1);
} else {
exp.push_back(term1);
}
// Push either term or OR of stem-expanded set
pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
exp.begin(), exp.end()));
}
// Match pattern against all file names in the db
Xapian::TermIterator it = m_ndb->db.allterms_begin();
it.skip_to("XSFN");
for (;it != m_ndb->db.allterms_end(); it++) {
if ((*it).find("XSFN") != 0)
break;
string fn = (*it).substr(4);
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
names.push_back((*it).c_str());
}
// Limit the match count
if (names.size() > 1000) {
LOGERR(("Db::SetQuery: too many matched file names\n"));
break;
default:
// Phrase: no stem expansion
splitData.dumball();
LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str()));
pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
splitData.terms.begin(),
splitData.terms.end()));
}
}
if (names.empty()) {
// Build an impossible query: we know its impossible because we
// control the prefixes!
names.push_back("XIMPOSSIBLE");
}
return true;
}
// Prepare query out of "advanced search" data
bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
bool Db::setQuery(RefCntr<SearchData> sdata, int opts,
const string& stemlang)
{
LOGDEB(("Db::setQuery: adv:\n"));
LOGDEB((" allwords: %s\n", sdata.allwords.c_str()));
LOGDEB((" phrase: %s\n", sdata.phrase.c_str()));
LOGDEB((" orwords: %s\n", sdata.orwords.c_str()));
LOGDEB((" orwords1: %s\n", sdata.orwords1.c_str()));
LOGDEB((" nowords: %s\n", sdata.nowords.c_str()));
LOGDEB((" filename: %s\n", sdata.filename.c_str()));
string ft;
for (list<string>::iterator it = sdata.filetypes.begin();
it != sdata.filetypes.end(); it++) {ft += *it + " ";}
if (!ft.empty())
LOGDEB((" searched file types: %s\n", ft.c_str()));
if (!sdata.topdir.empty())
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
LOGDEB((" Options: 0x%x\n", opts));
m_filterTopDir = sdata.topdir;
m_dbindices.clear();
if (!m_ndb)
if (!m_ndb) {
LOGERR(("Db::setQuery: no db!\n"));
return false;
list<Xapian::Query> pqueries;
Xapian::Query xq;
}
LOGDEB(("Db::setQuery:\n"));
m_filterTopDir = sdata->m_topdir;
m_dbindices.clear();
m_qOpts = opts;
if (!sdata.filename.empty()) {
LOGDEB((" filename search\n"));
// File name search, with possible wildcards.
// We expand wildcards by scanning the filename terms (prefixed
// with XSFN) from the database.
// We build an OR query with the expanded values if any.
string pattern;
dumb_string(sdata.filename, pattern);
// If pattern is not quoted, and has no wildcards, we add * at
// each end: match any substring
if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') {
pattern = pattern.substr(1, pattern.size() -2);
} else if (pattern.find_first_of("*?[") == string::npos) {
pattern = "*" + pattern + "*";
} // else let it be
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
// Match pattern against all file names in the db
Xapian::TermIterator it = m_ndb->db.allterms_begin();
it.skip_to("XSFN");
list<string> names;
for (;it != m_ndb->db.allterms_end(); it++) {
if ((*it).find("XSFN") != 0)
break;
string fn = (*it).substr(4);
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
names.push_back((*it).c_str());
}
// Limit the match count
if (names.size() > 1000) {
LOGERR(("Db::SetQuery: too many matched file names\n"));
break;
}
}
if (names.empty()) {
// Build an impossible query: we know its impossible because we
// control the prefixes!
names.push_back("XIMPOSSIBLE");
}
// Build a query out of the matching file name terms.
xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
}
if (!sdata.allwords.empty()) {
stringToXapianQueries(sdata.allwords, stemlang, this,pqueries,m_qOpts);
if (!pqueries.empty()) {
Xapian::Query nq =
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
pqueries.end());
xq = xq.empty() ? nq :
Xapian::Query(Xapian::Query::OP_AND, xq, nq);
pqueries.clear();
}
}
if (!sdata.orwords.empty()) {
stringToXapianQueries(sdata.orwords, stemlang, this,pqueries,m_qOpts);
if (!pqueries.empty()) {
Xapian::Query nq =
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
pqueries.end());
xq = xq.empty() ? nq :
Xapian::Query(Xapian::Query::OP_AND, xq, nq);
pqueries.clear();
}
}
if (!sdata.orwords1.empty()) {
stringToXapianQueries(sdata.orwords1, stemlang, this,pqueries,m_qOpts);
if (!pqueries.empty()) {
Xapian::Query nq =
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
pqueries.end());
xq = xq.empty() ? nq :
Xapian::Query(Xapian::Query::OP_AND, xq, nq);
pqueries.clear();
}
}
if (!sdata.phrase.empty()) {
Xapian::Query nq;
string s = string("\"") + sdata.phrase + string("\"");
stringToXapianQueries(s, stemlang, this, pqueries);
if (!pqueries.empty()) {
// There should be a single list element phrase query.
xq = xq.empty() ? *pqueries.begin() :
Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
pqueries.clear();
}
}
if (!sdata.filetypes.empty()) {
Xapian::Query tq;
for (list<string>::iterator it = sdata.filetypes.begin();
it != sdata.filetypes.end(); it++) {
string term = "T" + *it;
LOGDEB(("Adding file type term: [%s]\n", term.c_str()));
tq = tq.empty() ? Xapian::Query(term) :
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
}
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
}
// "And not" part. Must come last, as we have to check it's not
// the only term in the query. We do no stem expansion on 'No'
// words. Should we ?
if (!sdata.nowords.empty()) {
stringToXapianQueries(sdata.nowords, stemlang, this, pqueries);
if (!pqueries.empty()) {
Xapian::Query nq;
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
pqueries.end());
if (xq.empty()) {
// Xapian cant do this currently. Have to have a positive
// part!
sdata.description = "Error: pure negative query\n";
LOGERR(("Rcl::Db::setQuery: error: pure negative query\n"));
return false;
}
xq = Xapian::Query(Xapian::Query::OP_AND_NOT, xq, nq);
pqueries.clear();
}
}
Xapian::Query xq;
sdata->toNativeQuery(*this, &xq, (opts & Db::QO_STEM) ? stemlang : "");
m_ndb->query = xq;
delete m_ndb->enquire;
@ -1189,10 +1200,11 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang)
m_ndb->enquire->set_query(m_ndb->query);
m_ndb->mset = Xapian::MSet();
// Get the query description and trim the "Xapian::Query"
sdata.description = m_ndb->query.get_description();
if (sdata.description.find("Xapian::Query") == 0)
sdata.description = sdata.description.substr(strlen("Xapian::Query"));
LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
sdata->m_description = m_ndb->query.get_description();
if (sdata->m_description.find("Xapian::Query") == 0)
sdata->m_description =
sdata->m_description.substr(strlen("Xapian::Query"));
LOGDEB(("Db::SetQuery: Q: %s\n", sdata->m_description.c_str()));
return true;
}
@ -1422,43 +1434,6 @@ int Db::getResCnt()
return m_ndb->mset.get_matches_lower_bound();
}
bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
int qopts,
Xapian::docid docid, const list<string>& terms)
{
LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str()));
ConfSimple parms(&data);
if (!parms.ok())
return false;
parms.get(string("url"), doc.url);
parms.get(string("mtype"), doc.mimetype);
parms.get(string("fmtime"), doc.fmtime);
parms.get(string("dmtime"), doc.dmtime);
parms.get(string("origcharset"), doc.origcharset);
parms.get(string("caption"), doc.title);
parms.get(string("keywords"), doc.keywords);
parms.get(string("abstract"), doc.abstract);
// Possibly remove synthetic abstract indicator (if it's there, we
// used to index the beginning of the text as abstract).
bool syntabs = false;
if (doc.abstract.find(rclSyntAbs) == 0) {
doc.abstract = doc.abstract.substr(rclSyntAbs.length());
syntabs = true;
}
// If the option is set and the abstract is synthetic or empty , build
// abstract from position data.
if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
LOGDEB(("dbDataToRclDoc:: building abstract from position data\n"));
if (doc.abstract.empty() || syntabs ||
(qopts & Db::QO_REPLACE_ABSTRACT))
doc.abstract = makeAbstract(docid, terms);
}
parms.get(string("ipath"), doc.ipath);
parms.get(string("fbytes"), doc.fbytes);
parms.get(string("dbytes"), doc.dbytes);
doc.xdocid = docid;
return true;
}
// Get document at rank i in query (i is the index in the whole result
// set, as in the enquire class. We check if the current mset has the
@ -1641,191 +1616,6 @@ list<string> Db::expand(const Doc &doc)
}
// We build a possibly full size but sparsely populated (only around
// the search term occurrences) reconstruction of the document. It
// would be possible to compress the array, by having only multiple
// chunks around the terms, but this would seriously complicate the
// data structure.
string Native::makeAbstract(Xapian::docid docid, const list<string>& terms)
{
LOGDEB(("Native::makeAbstract: maxlen %d wWidth %d\n",
m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
Chrono chron;
// For each of the query terms, query xapian for its positions
// list in the document. For each position entry, remember it in qtermposs
// and insert it and its neighbours in the set of 'interesting' positions
// The terms 'array' that we partially populate with the document
// terms, at their positions around the search terms positions:
map<unsigned int, string> sparseDoc;
// All the query term positions. We remember this mainly because we are
// going to random-shuffle it for selecting the chunks that we actually
// print.
vector<unsigned int> qtermposs;
// Limit the total number of slots we populate.
const unsigned int maxtotaloccs = 300;
// Max occurrences per term. We initially know nothing about the
// occurrences repartition (it would be possible that only one
// term in the list occurs, or that all do). So this is a rather
// arbitrary choice.
const unsigned int maxoccperterm = maxtotaloccs / 10;
unsigned int totaloccs = 0;
for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();
qit++) {
Xapian::PositionIterator pos;
// There may be query terms not in this doc. This raises an
// exception when requesting the position list, we catch it.
string emptys;
try {
unsigned int occurrences = 0;
for (pos = db.positionlist_begin(docid, *qit);
pos != db.positionlist_end(docid, *qit); pos++) {
unsigned int ipos = *pos;
LOGDEB2(("Abstract: [%s] at %d\n", qit->c_str(), ipos));
// Remember the term position
qtermposs.push_back(ipos);
// Add adjacent slots to the set to populate at next step
unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen;
for (unsigned int ii = sta; ii <= sto; ii++) {
if (ii == ipos)
sparseDoc[ii] = *qit;
else
sparseDoc[ii] = emptys;
}
// Limit the number of occurences we keep for each
// term. The abstract has a finite length anyway !
if (occurrences++ > maxoccperterm)
break;
}
} catch (...) {
// Term does not occur. No problem.
}
// Limit total size
if (totaloccs++ > maxtotaloccs)
break;
}
LOGDEB(("Abstract:%d:chosen number of positions %d. Populating\n",
chron.millis(), qtermposs.size()));
// Walk the full document position list (for each term walk
// position list) and populate slots around the query terms. We
// arbitrarily truncate the list to avoid taking forever. If we do
// cutoff, the abstract may be inconsistant, which is bad...
{
Xapian::TermIterator term;
int cutoff = 500 * 1000;
for (term = db.termlist_begin(docid);
term != db.termlist_end(docid); term++) {
if (cutoff-- < 0) {
LOGDEB(("Abstract: max term count cutoff\n"));
break;
}
Xapian::PositionIterator pos;
for (pos = db.positionlist_begin(docid, *term);
pos != db.positionlist_end(docid, *term); pos++) {
if (cutoff-- < 0) {
LOGDEB(("Abstract: max term count cutoff\n"));
break;
}
map<unsigned int, string>::iterator vit;
if ((vit=sparseDoc.find(*pos)) != sparseDoc.end()) {
// Don't replace a term: the terms list is in
// alphabetic order, and we may have several terms
// at the same position, we want to keep only the
// first one (ie: dockes and dockes@wanadoo.fr)
if (vit->second.empty()) {
LOGDEB2(("Abstract: populating: [%s] at %d\n",
(*term).c_str(), *pos));
sparseDoc[*pos] = *term;
}
}
}
}
}
#if 0
// Debug only: output the full term[position] vector
bool epty = false;
int ipos = 0;
for (map<unsigned int, string>::iterator it = sparseDoc.begin();
it != sparseDoc.end();
it++, ipos++) {
if (it->empty()) {
if (!epty)
LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
epty=true;
} else {
epty = false;
LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str()));
}
}
#endif
LOGDEB(("Abstract:%d: randomizing and extracting\n", chron.millis()));
// We randomize the selection of term positions, from which we
// shall pull, starting at the beginning, until the abstract is
// big enough. The abstract is finally built in correct position
// order, thanks to the position map.
random_shuffle(qtermposs.begin(), qtermposs.end());
map<unsigned int, string> mabs;
unsigned int abslen = 0;
// Extract data around the N first (in random order) query term
// positions, and store the terms in the map. Don't concatenate
// immediately into chunks because there might be overlaps
for (vector<unsigned int>::const_iterator pos = qtermposs.begin();
pos != qtermposs.end(); pos++) {
if (int(abslen) > m_db->m_synthAbsLen)
break;
unsigned int sta = MAX(0, *pos - m_db->m_synthAbsWordCtxLen);
unsigned int sto = *pos + m_db->m_synthAbsWordCtxLen;
LOGDEB2(("Abstract: %d<-%d->%d\n", sta, *pos, sto));
for (unsigned int ii = sta; ii <= sto; ii++) {
if (int(abslen) > m_db->m_synthAbsLen)
break;
map<unsigned int, string>::const_iterator vit =
sparseDoc.find(ii);
if (vit != sparseDoc.end() && !vit->second.empty()) {
LOGDEB2(("Abstract: position %d -> [%s]\n",
ii, vit->second.c_str()));
mabs[ii] = vit->second;
abslen += vit->second.length();
} else {
LOGDEB2(("Abstract: empty position at %d\n", ii));
}
}
// Possibly add a ... at the end of chunk if it's not
// overlapping
if (mabs.find(sto+1) == mabs.end())
mabs[sto+1] = "...";
}
// Build the abstract by walking the map (in order of position)
string abstract;
for (map<unsigned int, string>::const_iterator it = mabs.begin();
it != mabs.end(); it++) {
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
abstract += it->second + " ";
}
LOGDEB(("Abtract: done in %d mS\n", chron.millis()));
return abstract;
}
#ifndef NO_NAMESPACES
}
#endif

View File

@ -16,12 +16,14 @@
*/
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.40 2006-10-30 12:59:44 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.41 2006-11-13 08:49:44 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
#include <vector>
#include "refcntr.h"
#ifndef NO_NAMESPACES
using std::string;
using std::list;
@ -103,7 +105,7 @@ class Doc {
}
};
class AdvSearchData;
class SearchData;
class Native;
class TermIter;
@ -155,7 +157,7 @@ class Db {
/* Query-related functions */
// Parse query string and initialize query
bool setQuery(AdvSearchData &q, int opts = QO_NONE,
bool setQuery(RefCntr<SearchData> q, int opts = QO_NONE,
const string& stemlang = "english");
bool getQueryTerms(list<string>& terms);
bool getMatchTerms(const Doc& doc, list<string>& terms);
@ -213,6 +215,9 @@ class Db {
/** Perform stem expansion across all dbs configured for searching */
list<string> stemExpand(const string& lang, const string& term);
/** Filename wildcard expansion */
bool filenameWildExp(const string& exp, list<string>& names);
private:
string m_filterTopDir; // Current query filter on subtree top directory
@ -248,6 +253,7 @@ private:
vector<bool> updated;
bool reOpen(); // Close/open, same mode/opts
/* Copyconst and assignemt private and forbidden */
Db(const Db &) {}
Db & operator=(const Db &) {return *this;};

299
src/rcldb/searchdata.cpp Normal file
View File

@ -0,0 +1,299 @@
#ifndef lint
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.1 2006-11-13 08:49:44 dockes Exp $ (C) 2006 J.F.Dockes";
#endif
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
// Handle translation from rcl's SearchData structures to Xapian Queries
#include <string>
#include <list>
#ifndef NO_NAMESPACES
using namespace std;
#endif
#include "xapian.h"
#include "rcldb.h"
#include "searchdata.h"
#include "debuglog.h"
#include "smallut.h"
#include "textsplit.h"
#include "unacpp.h"
#include "utf8iter.h"
namespace Rcl {
typedef list<SearchDataClause *>::iterator qlist_it_t;
bool SearchData::toNativeQuery(Rcl::Db &db, void *d, const string& stemlang)
{
Xapian::Query xq;
// Walk the clause list translating each in turn and building the
// Xapian query tree
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
Xapian::Query nq;
(*it)->toNativeQuery(db, &nq, stemlang);
Xapian::Query::op op;
// If this structure is an AND list, must use AND_NOT for excl clauses.
// Else this is an OR list, and there can't be excl clauses
if (m_tp == SCLT_AND) {
op = (*it)->m_tp == SCLT_EXCL ?
Xapian::Query::OP_AND_NOT: Xapian::Query::OP_AND;
} else {
op = Xapian::Query::OP_OR;
}
xq = xq.empty() ? nq : Xapian::Query(op, xq, nq);
}
// Add the file type filtering clause if any
if (!m_filetypes.empty()) {
list<Xapian::Query> pqueries;
Xapian::Query tq;
for (list<string>::iterator it = m_filetypes.begin();
it != m_filetypes.end(); it++) {
string term = "T" + *it;
LOGDEB(("Adding file type term: [%s]\n", term.c_str()));
tq = tq.empty() ? Xapian::Query(term) :
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
}
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
}
*((Xapian::Query *)d) = xq;
return true;
}
// Add clause to current list. OR lists cant have EXCL clauses.
bool SearchData::addClause(SearchDataClause* cl)
{
if (m_tp == SCLT_OR && (cl->m_tp == SCLT_EXCL)) {
LOGERR(("SearchData::addClause: cant add EXCL to OR list\n"));
return false;
}
m_query.push_back(cl);
return true;
}
// Make me all new
void SearchData::erase() {
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
delete *it;
m_query.clear();
m_filetypes.clear();
m_topdir.erase();
m_description.erase();
}
// Am I a file name only search ? This is to turn off term highlighting
bool SearchData::fileNameOnly() {
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
if (!(*it)->isFileName())
return false;
return true;
}
// Splitter callback for breaking a user query string into simple
// terms and phrases
class wsQData : public TextSplitCB {
public:
vector<string> terms;
// Debug
string catterms() {
string s;
for (unsigned int i = 0; i < terms.size(); i++) {
s += "[" + terms[i] + "] ";
}
return s;
}
bool takeword(const std::string &term, int , int, int) {
LOGDEB1(("wsQData::takeword: %s\n", term.c_str()));
terms.push_back(term);
return true;
}
// Decapital + deaccent all terms
void dumball() {
for (vector<string>::iterator it=terms.begin(); it !=terms.end();it++){
string dumb;
dumb_string(*it, dumb);
*it = dumb;
}
}
};
// Turn string into list of xapian queries. There is little
// interpretation done on the string (no +term -term or filename:term
// stuff). We just separate words and phrases, and interpret
// capitalized terms as wanting no stem expansion.
// The final list contains one query for each term or phrase
// - Elements corresponding to a stem-expanded part are an OP_OR
// composition of the stem-expanded terms (or a single term query).
// - Elements corresponding to a phrase are an OP_PHRASE composition of the
// phrase terms (no stem expansion in this case)
static void stringToXapianQueries(const string &iq,
const string& stemlang,
Db& db,
list<Xapian::Query> &pqueries)
{
string qstring = iq;
bool opt_stemexp = !stemlang.empty();
// Split into (possibly single word) phrases ("this is a phrase"):
list<string> phrases;
stringToStrings(qstring, phrases);
// Then process each phrase: split into terms and transform into
// appropriate Xapian Query
for (list<string>::iterator it=phrases.begin(); it !=phrases.end(); it++) {
LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
// If there are both spans and single words in this element,
// we need to use a word split, else a phrase query including
// a span would fail if we didn't adjust the proximity to
// account for the additional span term which is complicated.
wsQData splitDataS, splitDataW;
TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS);
splitterS.text_to_words(*it);
TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS);
splitterW.text_to_words(*it);
wsQData& splitData = splitDataS;
if (splitDataS.terms.size() > 1 && splitDataS.terms.size() !=
splitDataW.terms.size())
splitData = splitDataW;
LOGDEB1(("strToXapianQ: splitter term count: %d\n",
splitData.terms.size()));
switch(splitData.terms.size()) {
case 0: continue;// ??
case 1: // Not a real phrase: one term
{
string term = splitData.terms.front();
bool nostemexp = false;
// Check if the first letter is a majuscule in which
// case we do not want to do stem expansion. Note that
// the test is convoluted and possibly problematic
if (term.length() > 0) {
string noacterm,noaclowterm;
if (unacmaybefold(term, noacterm, "UTF-8", false) &&
unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) {
Utf8Iter it1(noacterm);
Utf8Iter it2(noaclowterm);
if (*it1 != *it2)
nostemexp = true;
}
}
LOGDEB1(("Term: %s stem expansion: %s\n",
term.c_str(), nostemexp?"no":"yes"));
list<string> exp;
string term1;
dumb_string(term, term1);
// Possibly perform stem compression/expansion
if (!nostemexp && opt_stemexp) {
exp = db.stemExpand(stemlang, term1);
} else {
exp.push_back(term1);
}
// Push either term or OR of stem-expanded set
pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
exp.begin(), exp.end()));
}
break;
default:
// Phrase: no stem expansion
splitData.dumball();
LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str()));
pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE,
splitData.terms.begin(),
splitData.terms.end()));
}
}
}
// Translate a simple OR, AND, or EXCL search clause.
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
const string& stemlang)
{
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
Xapian::Query::op op;
switch (m_tp) {
case SCLT_AND: op = Xapian::Query::OP_AND; break;
case SCLT_OR:
case SCLT_EXCL: op = Xapian::Query::OP_OR; break;
default:
LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
return false;
}
list<Xapian::Query> pqueries;
stringToXapianQueries(m_text, stemlang, db, pqueries);
if (pqueries.empty()) {
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
return true;
}
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
return true;
}
// Translate a FILENAME search clause.
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
const string& stemlang)
{
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
list<string> names;
db.filenameWildExp(m_text, names);
// Build a query out of the matching file name terms.
*qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
return true;
}
// Translate NEAR or PHRASE clause. We're not handling the distance parameter
// yet.
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
const string& stemlang)
{
Xapian::Query *qp = (Xapian::Query *)p;
*qp = Xapian::Query();
Xapian::Query::op op = m_tp == SCLT_PHRASE ? Xapian::Query::OP_PHRASE :
Xapian::Query::OP_NEAR;
list<Xapian::Query> pqueries;
Xapian::Query nq;
string s = string("\"") + m_text + string("\"");
// Use stringToXapianQueries anyway to lowercase and simplify the
// phrase terms etc. The result should be a single element list
stringToXapianQueries(s, stemlang, db, pqueries);
if (pqueries.empty()) {
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
return true;
}
*qp = *pqueries.begin();
return true;
}
} // Namespace Rcl

View File

@ -1,40 +1,112 @@
#ifndef _SEARCHDATA_H_INCLUDED_
#define _SEARCHDATA_H_INCLUDED_
/* @(#$Id: searchdata.h,v 1.2 2006-04-22 06:27:37 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: searchdata.h,v 1.3 2006-11-13 08:49:45 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
#include <list>
#include "rcldb.h"
#ifndef NO_NAMESPACES
using std::list;
using std::string;
#endif
namespace Rcl {
/**
* Holder for query data
*/
class AdvSearchData {
public:
string allwords;
string phrase;
string orwords;
string orwords1; // Have two instances of orwords for and'ing them
string nowords;
string filename;
list<string> filetypes; // restrict to types. Empty if inactive
string topdir; // restrict to subtree. Empty if inactive
string description; // Printable expanded version of the complete query
// returned after setQuery.
void erase() {
allwords.erase();
phrase.erase();
orwords.erase();
orwords1.erase();
nowords.erase();
filetypes.clear();
topdir.erase();
filename.erase();
description.erase();
}
bool fileNameOnly() {
return allwords.empty() && phrase.empty() && orwords.empty() &&
orwords1.empty() && nowords.empty();
}
/** Search clause types */
enum SClType {
SCLT_AND,
SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
SCLT_SUB
};
}
class SearchDataClause;
/**
* Holder for a list of search clauses. Some of the clauses can be comples
* subqueries.
*/
class SearchData {
public:
SClType m_tp; // Only SCLT_AND or SCLT_OR here
list<SearchDataClause *> m_query;
list<string> m_filetypes; // Restrict to filetypes if set.
string m_topdir; // Restrict to subtree.
// Printable expanded version of the complete query, obtained from Xapian
// valid after setQuery() call
string m_description;
SearchData(SClType tp) : m_tp(tp) {}
~SearchData() {erase();}
/** Make pristine */
void erase();
/** Is there anything but a file name search in here ? */
bool fileNameOnly();
/** Translate to Xapian query. rcldb knows about the void* */
bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
/** We become the owner of cl and will delete it */
bool addClause(SearchDataClause *cl);
private:
/* Copyconst and assignment private and forbidden */
SearchData(const SearchData &) {}
SearchData& operator=(const SearchData&) {return *this;};
};
class SearchDataClause {
public:
SClType m_tp;
SearchDataClause(SClType tp) : m_tp(tp) {}
virtual ~SearchDataClause() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
virtual bool isFileName() {return m_tp == SCLT_FILENAME ? true : false;}
};
class SearchDataClauseSimple : public SearchDataClause {
public:
SearchDataClauseSimple(SClType tp, string txt)
: SearchDataClause(tp), m_text(txt) {}
virtual ~SearchDataClauseSimple() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
protected:
string m_text;
};
class SearchDataClauseFilename : public SearchDataClauseSimple {
public:
SearchDataClauseFilename(string txt)
: SearchDataClauseSimple(SCLT_FILENAME, m_text) {}
virtual ~SearchDataClauseFilename() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
};
class SearchDataClauseDist : public SearchDataClauseSimple {
public:
SearchDataClauseDist(SClType tp, string txt, int dist)
: SearchDataClauseSimple(tp, txt), m_distance(dist) {}
virtual ~SearchDataClauseDist() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
protected:
int m_distance;
};
class SearchDataClauseSub : public SearchDataClause {
public:
SearchDataClauseSub(SClType tp, SClType stp)
: SearchDataClause(tp), m_sub(stp) {}
virtual ~SearchDataClauseSub() {}
virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
protected:
SearchData m_sub;
};
} // Namespace Rcl
#endif /* _SEARCHDATA_H_INCLUDED_ */