diff --git a/src/lib/Makefile b/src/lib/Makefile index f2586b51..df866aff 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -8,8 +8,8 @@ LIBS = librcl.a all: $(LIBS) -OBJS = conftree.o csguess.o debuglog.o execmd.o idfile.o md5.o wipedir.o fstreewalk.o mh_html.o mh_mail.o mh_exec.o mh_text.o htmlparse.o indexer.o internfile.o mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathhash.o pathut.o rclconfig.o rcldb.o rclinit.o stemdb.o base64.o readfile.o smallut.o textsplit.o transcode.o unacpp.o history.o docseq.o sortseq.o copyfile.o rclaspell.o -DEPS = conftree.dep.stamp csguess.dep.stamp debuglog.dep.stamp execmd.dep.stamp idfile.dep.stamp md5.dep.stamp wipedir.dep.stamp fstreewalk.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_exec.dep.stamp mh_text.dep.stamp htmlparse.dep.stamp indexer.dep.stamp internfile.dep.stamp mimehandler.dep.stamp mimeparse.dep.stamp mimetype.dep.stamp myhtmlparse.dep.stamp pathhash.dep.stamp pathut.dep.stamp rclconfig.dep.stamp rcldb.dep.stamp rclinit.dep.stamp stemdb.dep.stamp base64.dep.stamp readfile.dep.stamp smallut.dep.stamp textsplit.dep.stamp transcode.dep.stamp unacpp.dep.stamp history.dep.stamp docseq.dep.stamp sortseq.dep.stamp copyfile.dep.stamp rclaspell.dep.stamp +OBJS = conftree.o csguess.o debuglog.o execmd.o idfile.o md5.o wipedir.o fstreewalk.o mh_html.o mh_mail.o searchdata.o mh_exec.o mh_text.o htmlparse.o indexer.o internfile.o mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathhash.o pathut.o rclconfig.o rcldb.o rclinit.o stemdb.o base64.o readfile.o smallut.o textsplit.o transcode.o unacpp.o history.o docseq.o sortseq.o copyfile.o rclaspell.o +DEPS = conftree.dep.stamp csguess.dep.stamp debuglog.dep.stamp execmd.dep.stamp idfile.dep.stamp md5.dep.stamp wipedir.dep.stamp fstreewalk.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp searchdata.dep.stamp mh_exec.dep.stamp mh_text.dep.stamp htmlparse.dep.stamp indexer.dep.stamp internfile.dep.stamp mimehandler.dep.stamp mimeparse.dep.stamp mimetype.dep.stamp myhtmlparse.dep.stamp pathhash.dep.stamp pathut.dep.stamp rclconfig.dep.stamp rcldb.dep.stamp rclinit.dep.stamp stemdb.dep.stamp base64.dep.stamp readfile.dep.stamp smallut.dep.stamp textsplit.dep.stamp transcode.dep.stamp unacpp.dep.stamp history.dep.stamp docseq.dep.stamp sortseq.dep.stamp copyfile.dep.stamp rclaspell.dep.stamp librcl.a : $(DEPS) $(OBJS) unac.o ar ru librcl.a $(OBJS) unac.o @@ -37,6 +37,8 @@ mh_html.o : ../common/mh_html.cpp $(CXX) $(ALL_CXXFLAGS) -c ../common/mh_html.cpp mh_mail.o : ../common/mh_mail.cpp $(CXX) $(ALL_CXXFLAGS) -c ../common/mh_mail.cpp +searchdata.o : ../common/searchdata.cpp + $(CXX) $(ALL_CXXFLAGS) -c ../common/searchdata.cpp mh_exec.o : ../common/mh_exec.cpp $(CXX) $(ALL_CXXFLAGS) -c ../common/mh_exec.cpp mh_text.o : ../common/mh_text.cpp @@ -125,6 +127,9 @@ mh_html.dep.stamp : ../common/mh_html.cpp mh_mail.dep.stamp : ../common/mh_mail.cpp $(CXX) -M $(ALL_CXXFLAGS) ../common/mh_mail.cpp > mh_mail.dep touch mh_mail.dep.stamp +searchdata.dep.stamp : ../common/searchdata.cpp + $(CXX) -M $(ALL_CXXFLAGS) ../common/searchdata.cpp > searchdata.dep + touch searchdata.dep.stamp mh_exec.dep.stamp : ../common/mh_exec.cpp $(CXX) -M $(ALL_CXXFLAGS) ../common/mh_exec.cpp > mh_exec.dep touch mh_exec.dep.stamp @@ -213,6 +218,7 @@ include wipedir.dep include fstreewalk.dep include mh_html.dep include mh_mail.dep +include searchdata.dep include mh_exec.dep include mh_text.dep include htmlparse.dep diff --git a/src/lib/mkMake b/src/lib/mkMake index 97b388c4..fee70451 100755 --- a/src/lib/mkMake +++ b/src/lib/mkMake @@ -8,6 +8,7 @@ SRCS="${depth}/utils/conftree.cpp ${depth}/index/csguess.cpp \ ${depth}/utils/idfile.cpp ${depth}/utils/md5.cpp \ ${depth}/utils/wipedir.cpp ${depth}/utils/fstreewalk.cpp \ ${depth}/common/mh_html.cpp ${depth}/common/mh_mail.cpp \ + ${depth}/common/searchdata.cpp \ ${depth}/common/mh_exec.cpp ${depth}/common/mh_text.cpp \ ${depth}/common/htmlparse.cpp ${depth}/index/indexer.cpp \ ${depth}/common/internfile.cpp ${depth}/common/mimehandler.cpp \ diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 24a282d7..79431d9c 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.90 2006-11-12 08:35:11 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.91 2006-11-13 08:49:44 dockes Exp $ (C) 2004 J.F.Dockes"; #endif /* * This program is free software; you can redistribute it and/or modify @@ -174,6 +174,229 @@ bool Native::subDocs(const string &hash, vector& docids) return false; } +bool Native::dbDataToRclDoc(std::string &data, Doc &doc, + int qopts, + Xapian::docid docid, const list& terms) +{ + LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str())); + ConfSimple parms(&data); + if (!parms.ok()) + return false; + parms.get(string("url"), doc.url); + parms.get(string("mtype"), doc.mimetype); + parms.get(string("fmtime"), doc.fmtime); + parms.get(string("dmtime"), doc.dmtime); + parms.get(string("origcharset"), doc.origcharset); + parms.get(string("caption"), doc.title); + parms.get(string("keywords"), doc.keywords); + parms.get(string("abstract"), doc.abstract); + // Possibly remove synthetic abstract indicator (if it's there, we + // used to index the beginning of the text as abstract). + bool syntabs = false; + if (doc.abstract.find(rclSyntAbs) == 0) { + doc.abstract = doc.abstract.substr(rclSyntAbs.length()); + syntabs = true; + } + // If the option is set and the abstract is synthetic or empty , build + // abstract from position data. + if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) { + LOGDEB(("dbDataToRclDoc:: building abstract from position data\n")); + if (doc.abstract.empty() || syntabs || + (qopts & Db::QO_REPLACE_ABSTRACT)) + doc.abstract = makeAbstract(docid, terms); + } + parms.get(string("ipath"), doc.ipath); + parms.get(string("fbytes"), doc.fbytes); + parms.get(string("dbytes"), doc.dbytes); + doc.xdocid = docid; + return true; +} + +// We build a possibly full size but sparsely populated (only around +// the search term occurrences) reconstruction of the document. It +// would be possible to compress the array, by having only multiple +// chunks around the terms, but this would seriously complicate the +// data structure. +string Native::makeAbstract(Xapian::docid docid, const list& terms) +{ + LOGDEB(("Native::makeAbstract: maxlen %d wWidth %d\n", + m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen)); + + Chrono chron; + + // For each of the query terms, query xapian for its positions + // list in the document. For each position entry, remember it in qtermposs + // and insert it and its neighbours in the set of 'interesting' positions + + // The terms 'array' that we partially populate with the document + // terms, at their positions around the search terms positions: + map sparseDoc; + + // All the query term positions. We remember this mainly because we are + // going to random-shuffle it for selecting the chunks that we actually + // print. + vector qtermposs; + + // Limit the total number of slots we populate. + const unsigned int maxtotaloccs = 300; + // Max occurrences per term. We initially know nothing about the + // occurrences repartition (it would be possible that only one + // term in the list occurs, or that all do). So this is a rather + // arbitrary choice. + const unsigned int maxoccperterm = maxtotaloccs / 10; + unsigned int totaloccs = 0; + + for (list::const_iterator qit = terms.begin(); qit != terms.end(); + qit++) { + Xapian::PositionIterator pos; + // There may be query terms not in this doc. This raises an + // exception when requesting the position list, we catch it. + string emptys; + try { + unsigned int occurrences = 0; + for (pos = db.positionlist_begin(docid, *qit); + pos != db.positionlist_end(docid, *qit); pos++) { + unsigned int ipos = *pos; + LOGDEB2(("Abstract: [%s] at %d\n", qit->c_str(), ipos)); + // Remember the term position + qtermposs.push_back(ipos); + // Add adjacent slots to the set to populate at next step + unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen); + unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen; + for (unsigned int ii = sta; ii <= sto; ii++) { + if (ii == ipos) + sparseDoc[ii] = *qit; + else + sparseDoc[ii] = emptys; + } + // Limit the number of occurences we keep for each + // term. The abstract has a finite length anyway ! + if (occurrences++ > maxoccperterm) + break; + } + } catch (...) { + // Term does not occur. No problem. + } + // Limit total size + if (totaloccs++ > maxtotaloccs) + break; + } + + LOGDEB(("Abstract:%d:chosen number of positions %d. Populating\n", + chron.millis(), qtermposs.size())); + + // Walk the full document position list (for each term walk + // position list) and populate slots around the query terms. We + // arbitrarily truncate the list to avoid taking forever. If we do + // cutoff, the abstract may be inconsistant, which is bad... + { + Xapian::TermIterator term; + int cutoff = 500 * 1000; + + for (term = db.termlist_begin(docid); + term != db.termlist_end(docid); term++) { + if (cutoff-- < 0) { + LOGDEB(("Abstract: max term count cutoff\n")); + break; + } + + Xapian::PositionIterator pos; + for (pos = db.positionlist_begin(docid, *term); + pos != db.positionlist_end(docid, *term); pos++) { + if (cutoff-- < 0) { + LOGDEB(("Abstract: max term count cutoff\n")); + break; + } + map::iterator vit; + if ((vit=sparseDoc.find(*pos)) != sparseDoc.end()) { + // Don't replace a term: the terms list is in + // alphabetic order, and we may have several terms + // at the same position, we want to keep only the + // first one (ie: dockes and dockes@wanadoo.fr) + if (vit->second.empty()) { + LOGDEB2(("Abstract: populating: [%s] at %d\n", + (*term).c_str(), *pos)); + sparseDoc[*pos] = *term; + } + } + } + } + } + +#if 0 + // Debug only: output the full term[position] vector + bool epty = false; + int ipos = 0; + for (map::iterator it = sparseDoc.begin(); + it != sparseDoc.end(); + it++, ipos++) { + if (it->empty()) { + if (!epty) + LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str())); + epty=true; + } else { + epty = false; + LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str())); + } + } +#endif + + LOGDEB(("Abstract:%d: randomizing and extracting\n", chron.millis())); + + // We randomize the selection of term positions, from which we + // shall pull, starting at the beginning, until the abstract is + // big enough. The abstract is finally built in correct position + // order, thanks to the position map. + random_shuffle(qtermposs.begin(), qtermposs.end()); + map mabs; + unsigned int abslen = 0; + + // Extract data around the N first (in random order) query term + // positions, and store the terms in the map. Don't concatenate + // immediately into chunks because there might be overlaps + for (vector::const_iterator pos = qtermposs.begin(); + pos != qtermposs.end(); pos++) { + + if (int(abslen) > m_db->m_synthAbsLen) + break; + + unsigned int sta = MAX(0, *pos - m_db->m_synthAbsWordCtxLen); + unsigned int sto = *pos + m_db->m_synthAbsWordCtxLen; + + LOGDEB2(("Abstract: %d<-%d->%d\n", sta, *pos, sto)); + + for (unsigned int ii = sta; ii <= sto; ii++) { + + if (int(abslen) > m_db->m_synthAbsLen) + break; + map::const_iterator vit = + sparseDoc.find(ii); + if (vit != sparseDoc.end() && !vit->second.empty()) { + LOGDEB2(("Abstract: position %d -> [%s]\n", + ii, vit->second.c_str())); + mabs[ii] = vit->second; + abslen += vit->second.length(); + } else { + LOGDEB2(("Abstract: empty position at %d\n", ii)); + } + } + + // Possibly add a ... at the end of chunk if it's not + // overlapping + if (mabs.find(sto+1) == mabs.end()) + mabs[sto+1] = "..."; + } + + // Build the abstract by walking the map (in order of position) + string abstract; + for (map::const_iterator it = mabs.begin(); + it != mabs.end(); it++) { + LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str())); + abstract += it->second + " "; + } + LOGDEB(("Abtract: done in %d mS\n", chron.millis())); + return abstract; +} /* Rcl::Db methods ///////////////////////////////// */ @@ -909,279 +1132,67 @@ bool Db::purgeFile(const string &fn) return false; } -// Splitter callback for breaking query into terms -class wsQData : public TextSplitCB { - public: - vector terms; - string catterms() { - string s; - for (unsigned int i=0;i::iterator it=terms.begin(); it !=terms.end();it++){ - string dumb; - dumb_string(*it, dumb); - *it = dumb; - } - } -}; - -// Turn string into list of xapian queries. There is little -// interpretation done on the string (no +term -term or filename:term -// stuff). We just separate words and phrases, and interpret -// capitalized terms as wanting no stem expansion. -// The final list contains one query for each term or phrase -// - Elements corresponding to a stem-expanded part are an OP_OR -// composition of the stem-expanded terms (or a single term query). -// - Elements corresponding to a phrase are an OP_PHRASE composition of the -// phrase terms (no stem expansion in this case) -static void stringToXapianQueries(const string &iq, - const string& stemlang, - Db *db, - list &pqueries, - unsigned int opts = Db::QO_NONE) +bool Db::filenameWildExp(const string& fnexp, list& names) { - string qstring = iq; + // File name search, with possible wildcards. + // We expand wildcards by scanning the filename terms (prefixed + // with XSFN) from the database. + // We build an OR query with the expanded values if any. + string pattern; + dumb_string(fnexp, pattern); - // Split into (possibly single word) phrases ("this is a phrase"): - list phrases; - stringToStrings(qstring, phrases); + // If pattern is not quoted, and has no wildcards, we add * at + // each end: match any substring + if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') { + pattern = pattern.substr(1, pattern.size() -2); + } else if (pattern.find_first_of("*?[") == string::npos) { + pattern = "*" + pattern + "*"; + } // else let it be - // Then process each phrase: split into terms and transform into - // appropriate Xapian Query + LOGDEB((" pattern: [%s]\n", pattern.c_str())); - for (list::iterator it=phrases.begin(); it !=phrases.end(); it++) { - LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str())); - - // If there are both spans and single words in this element, - // we need to use a word split, else a phrase query including - // a span would fail if we didn't adjust the proximity to - // account for the additional span term which is complicated. - wsQData splitDataS, splitDataW; - TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS); - splitterS.text_to_words(*it); - TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS); - splitterW.text_to_words(*it); - wsQData& splitData = splitDataS; - if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != - splitDataW.terms.size()) - splitData = splitDataW; - - LOGDEB1(("strToXapianQ: splitter term count: %d\n", - splitData.terms.size())); - switch(splitData.terms.size()) { - case 0: continue;// ?? - case 1: // Not a real phrase: one term - { - string term = splitData.terms.front(); - bool nostemexp = false; - // Check if the first letter is a majuscule in which - // case we do not want to do stem expansion. Note that - // the test is convoluted and possibly problematic - if (term.length() > 0) { - string noacterm,noaclowterm; - if (unacmaybefold(term, noacterm, "UTF-8", false) && - unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) { - Utf8Iter it1(noacterm); - Utf8Iter it2(noaclowterm); - if (*it1 != *it2) - nostemexp = true; - } - } - LOGDEB1(("Term: %s stem expansion: %s\n", - term.c_str(), nostemexp?"no":"yes")); - - list exp; - string term1; - dumb_string(term, term1); - // Possibly perform stem compression/expansion - if (!nostemexp && (opts & Db::QO_STEM)) { - exp = db->stemExpand(stemlang, term1); - } else { - exp.push_back(term1); - } - - // Push either term or OR of stem-expanded set - pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, - exp.begin(), exp.end())); - } + // Match pattern against all file names in the db + Xapian::TermIterator it = m_ndb->db.allterms_begin(); + it.skip_to("XSFN"); + for (;it != m_ndb->db.allterms_end(); it++) { + if ((*it).find("XSFN") != 0) + break; + string fn = (*it).substr(4); + LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str())); + if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) { + names.push_back((*it).c_str()); + } + // Limit the match count + if (names.size() > 1000) { + LOGERR(("Db::SetQuery: too many matched file names\n")); break; - - default: - // Phrase: no stem expansion - splitData.dumball(); - LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str())); - pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, - splitData.terms.begin(), - splitData.terms.end())); } } + if (names.empty()) { + // Build an impossible query: we know its impossible because we + // control the prefixes! + names.push_back("XIMPOSSIBLE"); + } + return true; } // Prepare query out of "advanced search" data -bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang) +bool Db::setQuery(RefCntr sdata, int opts, + const string& stemlang) { - LOGDEB(("Db::setQuery: adv:\n")); - LOGDEB((" allwords: %s\n", sdata.allwords.c_str())); - LOGDEB((" phrase: %s\n", sdata.phrase.c_str())); - LOGDEB((" orwords: %s\n", sdata.orwords.c_str())); - LOGDEB((" orwords1: %s\n", sdata.orwords1.c_str())); - LOGDEB((" nowords: %s\n", sdata.nowords.c_str())); - LOGDEB((" filename: %s\n", sdata.filename.c_str())); - - string ft; - for (list::iterator it = sdata.filetypes.begin(); - it != sdata.filetypes.end(); it++) {ft += *it + " ";} - if (!ft.empty()) - LOGDEB((" searched file types: %s\n", ft.c_str())); - if (!sdata.topdir.empty()) - LOGDEB((" restricted to: %s\n", sdata.topdir.c_str())); - LOGDEB((" Options: 0x%x\n", opts)); - - m_filterTopDir = sdata.topdir; - m_dbindices.clear(); - - if (!m_ndb) + if (!m_ndb) { + LOGERR(("Db::setQuery: no db!\n")); return false; - list pqueries; - Xapian::Query xq; + } + LOGDEB(("Db::setQuery:\n")); + + m_filterTopDir = sdata->m_topdir; + m_dbindices.clear(); m_qOpts = opts; - if (!sdata.filename.empty()) { - LOGDEB((" filename search\n")); - // File name search, with possible wildcards. - // We expand wildcards by scanning the filename terms (prefixed - // with XSFN) from the database. - // We build an OR query with the expanded values if any. - string pattern; - dumb_string(sdata.filename, pattern); - - // If pattern is not quoted, and has no wildcards, we add * at - // each end: match any substring - if (pattern[0] == '"' && pattern[pattern.size()-1] == '"') { - pattern = pattern.substr(1, pattern.size() -2); - } else if (pattern.find_first_of("*?[") == string::npos) { - pattern = "*" + pattern + "*"; - } // else let it be - - LOGDEB((" pattern: [%s]\n", pattern.c_str())); - - // Match pattern against all file names in the db - Xapian::TermIterator it = m_ndb->db.allterms_begin(); - it.skip_to("XSFN"); - list names; - for (;it != m_ndb->db.allterms_end(); it++) { - if ((*it).find("XSFN") != 0) - break; - string fn = (*it).substr(4); - LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str())); - if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) { - names.push_back((*it).c_str()); - } - // Limit the match count - if (names.size() > 1000) { - LOGERR(("Db::SetQuery: too many matched file names\n")); - break; - } - } - if (names.empty()) { - // Build an impossible query: we know its impossible because we - // control the prefixes! - names.push_back("XIMPOSSIBLE"); - } - // Build a query out of the matching file name terms. - xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end()); - } - - if (!sdata.allwords.empty()) { - stringToXapianQueries(sdata.allwords, stemlang, this,pqueries,m_qOpts); - if (!pqueries.empty()) { - Xapian::Query nq = - Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(), - pqueries.end()); - xq = xq.empty() ? nq : - Xapian::Query(Xapian::Query::OP_AND, xq, nq); - pqueries.clear(); - } - } - - if (!sdata.orwords.empty()) { - stringToXapianQueries(sdata.orwords, stemlang, this,pqueries,m_qOpts); - if (!pqueries.empty()) { - Xapian::Query nq = - Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(), - pqueries.end()); - xq = xq.empty() ? nq : - Xapian::Query(Xapian::Query::OP_AND, xq, nq); - pqueries.clear(); - } - } - - if (!sdata.orwords1.empty()) { - stringToXapianQueries(sdata.orwords1, stemlang, this,pqueries,m_qOpts); - if (!pqueries.empty()) { - Xapian::Query nq = - Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(), - pqueries.end()); - xq = xq.empty() ? nq : - Xapian::Query(Xapian::Query::OP_AND, xq, nq); - pqueries.clear(); - } - } - - if (!sdata.phrase.empty()) { - Xapian::Query nq; - string s = string("\"") + sdata.phrase + string("\""); - stringToXapianQueries(s, stemlang, this, pqueries); - if (!pqueries.empty()) { - // There should be a single list element phrase query. - xq = xq.empty() ? *pqueries.begin() : - Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin()); - pqueries.clear(); - } - } - - if (!sdata.filetypes.empty()) { - Xapian::Query tq; - for (list::iterator it = sdata.filetypes.begin(); - it != sdata.filetypes.end(); it++) { - string term = "T" + *it; - LOGDEB(("Adding file type term: [%s]\n", term.c_str())); - tq = tq.empty() ? Xapian::Query(term) : - Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); - } - xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq); - } - - // "And not" part. Must come last, as we have to check it's not - // the only term in the query. We do no stem expansion on 'No' - // words. Should we ? - if (!sdata.nowords.empty()) { - stringToXapianQueries(sdata.nowords, stemlang, this, pqueries); - if (!pqueries.empty()) { - Xapian::Query nq; - nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(), - pqueries.end()); - if (xq.empty()) { - // Xapian cant do this currently. Have to have a positive - // part! - sdata.description = "Error: pure negative query\n"; - LOGERR(("Rcl::Db::setQuery: error: pure negative query\n")); - return false; - } - xq = Xapian::Query(Xapian::Query::OP_AND_NOT, xq, nq); - pqueries.clear(); - } - } + Xapian::Query xq; + sdata->toNativeQuery(*this, &xq, (opts & Db::QO_STEM) ? stemlang : ""); m_ndb->query = xq; delete m_ndb->enquire; @@ -1189,10 +1200,11 @@ bool Db::setQuery(AdvSearchData &sdata, int opts, const string& stemlang) m_ndb->enquire->set_query(m_ndb->query); m_ndb->mset = Xapian::MSet(); // Get the query description and trim the "Xapian::Query" - sdata.description = m_ndb->query.get_description(); - if (sdata.description.find("Xapian::Query") == 0) - sdata.description = sdata.description.substr(strlen("Xapian::Query")); - LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str())); + sdata->m_description = m_ndb->query.get_description(); + if (sdata->m_description.find("Xapian::Query") == 0) + sdata->m_description = + sdata->m_description.substr(strlen("Xapian::Query")); + LOGDEB(("Db::SetQuery: Q: %s\n", sdata->m_description.c_str())); return true; } @@ -1422,43 +1434,6 @@ int Db::getResCnt() return m_ndb->mset.get_matches_lower_bound(); } -bool Native::dbDataToRclDoc(std::string &data, Doc &doc, - int qopts, - Xapian::docid docid, const list& terms) -{ - LOGDEB1(("Db::dbDataToRclDoc: opts %x data: %s\n", qopts, data.c_str())); - ConfSimple parms(&data); - if (!parms.ok()) - return false; - parms.get(string("url"), doc.url); - parms.get(string("mtype"), doc.mimetype); - parms.get(string("fmtime"), doc.fmtime); - parms.get(string("dmtime"), doc.dmtime); - parms.get(string("origcharset"), doc.origcharset); - parms.get(string("caption"), doc.title); - parms.get(string("keywords"), doc.keywords); - parms.get(string("abstract"), doc.abstract); - // Possibly remove synthetic abstract indicator (if it's there, we - // used to index the beginning of the text as abstract). - bool syntabs = false; - if (doc.abstract.find(rclSyntAbs) == 0) { - doc.abstract = doc.abstract.substr(rclSyntAbs.length()); - syntabs = true; - } - // If the option is set and the abstract is synthetic or empty , build - // abstract from position data. - if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) { - LOGDEB(("dbDataToRclDoc:: building abstract from position data\n")); - if (doc.abstract.empty() || syntabs || - (qopts & Db::QO_REPLACE_ABSTRACT)) - doc.abstract = makeAbstract(docid, terms); - } - parms.get(string("ipath"), doc.ipath); - parms.get(string("fbytes"), doc.fbytes); - parms.get(string("dbytes"), doc.dbytes); - doc.xdocid = docid; - return true; -} // Get document at rank i in query (i is the index in the whole result // set, as in the enquire class. We check if the current mset has the @@ -1641,191 +1616,6 @@ list Db::expand(const Doc &doc) } -// We build a possibly full size but sparsely populated (only around -// the search term occurrences) reconstruction of the document. It -// would be possible to compress the array, by having only multiple -// chunks around the terms, but this would seriously complicate the -// data structure. -string Native::makeAbstract(Xapian::docid docid, const list& terms) -{ - LOGDEB(("Native::makeAbstract: maxlen %d wWidth %d\n", - m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen)); - - Chrono chron; - - // For each of the query terms, query xapian for its positions - // list in the document. For each position entry, remember it in qtermposs - // and insert it and its neighbours in the set of 'interesting' positions - - // The terms 'array' that we partially populate with the document - // terms, at their positions around the search terms positions: - map sparseDoc; - - // All the query term positions. We remember this mainly because we are - // going to random-shuffle it for selecting the chunks that we actually - // print. - vector qtermposs; - - // Limit the total number of slots we populate. - const unsigned int maxtotaloccs = 300; - // Max occurrences per term. We initially know nothing about the - // occurrences repartition (it would be possible that only one - // term in the list occurs, or that all do). So this is a rather - // arbitrary choice. - const unsigned int maxoccperterm = maxtotaloccs / 10; - unsigned int totaloccs = 0; - - for (list::const_iterator qit = terms.begin(); qit != terms.end(); - qit++) { - Xapian::PositionIterator pos; - // There may be query terms not in this doc. This raises an - // exception when requesting the position list, we catch it. - string emptys; - try { - unsigned int occurrences = 0; - for (pos = db.positionlist_begin(docid, *qit); - pos != db.positionlist_end(docid, *qit); pos++) { - unsigned int ipos = *pos; - LOGDEB2(("Abstract: [%s] at %d\n", qit->c_str(), ipos)); - // Remember the term position - qtermposs.push_back(ipos); - // Add adjacent slots to the set to populate at next step - unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen); - unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen; - for (unsigned int ii = sta; ii <= sto; ii++) { - if (ii == ipos) - sparseDoc[ii] = *qit; - else - sparseDoc[ii] = emptys; - } - // Limit the number of occurences we keep for each - // term. The abstract has a finite length anyway ! - if (occurrences++ > maxoccperterm) - break; - } - } catch (...) { - // Term does not occur. No problem. - } - // Limit total size - if (totaloccs++ > maxtotaloccs) - break; - } - - LOGDEB(("Abstract:%d:chosen number of positions %d. Populating\n", - chron.millis(), qtermposs.size())); - - // Walk the full document position list (for each term walk - // position list) and populate slots around the query terms. We - // arbitrarily truncate the list to avoid taking forever. If we do - // cutoff, the abstract may be inconsistant, which is bad... - { - Xapian::TermIterator term; - int cutoff = 500 * 1000; - - for (term = db.termlist_begin(docid); - term != db.termlist_end(docid); term++) { - if (cutoff-- < 0) { - LOGDEB(("Abstract: max term count cutoff\n")); - break; - } - - Xapian::PositionIterator pos; - for (pos = db.positionlist_begin(docid, *term); - pos != db.positionlist_end(docid, *term); pos++) { - if (cutoff-- < 0) { - LOGDEB(("Abstract: max term count cutoff\n")); - break; - } - map::iterator vit; - if ((vit=sparseDoc.find(*pos)) != sparseDoc.end()) { - // Don't replace a term: the terms list is in - // alphabetic order, and we may have several terms - // at the same position, we want to keep only the - // first one (ie: dockes and dockes@wanadoo.fr) - if (vit->second.empty()) { - LOGDEB2(("Abstract: populating: [%s] at %d\n", - (*term).c_str(), *pos)); - sparseDoc[*pos] = *term; - } - } - } - } - } - -#if 0 - // Debug only: output the full term[position] vector - bool epty = false; - int ipos = 0; - for (map::iterator it = sparseDoc.begin(); - it != sparseDoc.end(); - it++, ipos++) { - if (it->empty()) { - if (!epty) - LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str())); - epty=true; - } else { - epty = false; - LOGDEB(("Abstract:vec[%d]: [%s]\n", ipos, it->c_str())); - } - } -#endif - - LOGDEB(("Abstract:%d: randomizing and extracting\n", chron.millis())); - - // We randomize the selection of term positions, from which we - // shall pull, starting at the beginning, until the abstract is - // big enough. The abstract is finally built in correct position - // order, thanks to the position map. - random_shuffle(qtermposs.begin(), qtermposs.end()); - map mabs; - unsigned int abslen = 0; - - // Extract data around the N first (in random order) query term - // positions, and store the terms in the map. Don't concatenate - // immediately into chunks because there might be overlaps - for (vector::const_iterator pos = qtermposs.begin(); - pos != qtermposs.end(); pos++) { - - if (int(abslen) > m_db->m_synthAbsLen) - break; - - unsigned int sta = MAX(0, *pos - m_db->m_synthAbsWordCtxLen); - unsigned int sto = *pos + m_db->m_synthAbsWordCtxLen; - - LOGDEB2(("Abstract: %d<-%d->%d\n", sta, *pos, sto)); - - for (unsigned int ii = sta; ii <= sto; ii++) { - - if (int(abslen) > m_db->m_synthAbsLen) - break; - map::const_iterator vit = - sparseDoc.find(ii); - if (vit != sparseDoc.end() && !vit->second.empty()) { - LOGDEB2(("Abstract: position %d -> [%s]\n", - ii, vit->second.c_str())); - mabs[ii] = vit->second; - abslen += vit->second.length(); - } else { - LOGDEB2(("Abstract: empty position at %d\n", ii)); - } - } - - // Possibly add a ... at the end of chunk if it's not - // overlapping - if (mabs.find(sto+1) == mabs.end()) - mabs[sto+1] = "..."; - } - - // Build the abstract by walking the map (in order of position) - string abstract; - for (map::const_iterator it = mabs.begin(); - it != mabs.end(); it++) { - LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str())); - abstract += it->second + " "; - } - LOGDEB(("Abtract: done in %d mS\n", chron.millis())); - return abstract; -} #ifndef NO_NAMESPACES } #endif diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index 7e90eb5c..46948d74 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -16,12 +16,14 @@ */ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.40 2006-10-30 12:59:44 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.41 2006-11-13 08:49:44 dockes Exp $ (C) 2004 J.F.Dockes */ #include #include #include +#include "refcntr.h" + #ifndef NO_NAMESPACES using std::string; using std::list; @@ -103,7 +105,7 @@ class Doc { } }; -class AdvSearchData; +class SearchData; class Native; class TermIter; @@ -155,7 +157,7 @@ class Db { /* Query-related functions */ // Parse query string and initialize query - bool setQuery(AdvSearchData &q, int opts = QO_NONE, + bool setQuery(RefCntr q, int opts = QO_NONE, const string& stemlang = "english"); bool getQueryTerms(list& terms); bool getMatchTerms(const Doc& doc, list& terms); @@ -213,6 +215,9 @@ class Db { /** Perform stem expansion across all dbs configured for searching */ list stemExpand(const string& lang, const string& term); + /** Filename wildcard expansion */ + bool filenameWildExp(const string& exp, list& names); + private: string m_filterTopDir; // Current query filter on subtree top directory @@ -248,6 +253,7 @@ private: vector updated; bool reOpen(); // Close/open, same mode/opts + /* Copyconst and assignemt private and forbidden */ Db(const Db &) {} Db & operator=(const Db &) {return *this;}; diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp new file mode 100644 index 00000000..16bde44c --- /dev/null +++ b/src/rcldb/searchdata.cpp @@ -0,0 +1,299 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.1 2006-11-13 08:49:44 dockes Exp $ (C) 2006 J.F.Dockes"; +#endif +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +// Handle translation from rcl's SearchData structures to Xapian Queries + +#include +#include +#ifndef NO_NAMESPACES +using namespace std; +#endif + +#include "xapian.h" + +#include "rcldb.h" +#include "searchdata.h" +#include "debuglog.h" +#include "smallut.h" +#include "textsplit.h" +#include "unacpp.h" +#include "utf8iter.h" + +namespace Rcl { + +typedef list::iterator qlist_it_t; + +bool SearchData::toNativeQuery(Rcl::Db &db, void *d, const string& stemlang) +{ + Xapian::Query xq; + + // Walk the clause list translating each in turn and building the + // Xapian query tree + for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) { + Xapian::Query nq; + (*it)->toNativeQuery(db, &nq, stemlang); + Xapian::Query::op op; + + // If this structure is an AND list, must use AND_NOT for excl clauses. + // Else this is an OR list, and there can't be excl clauses + if (m_tp == SCLT_AND) { + op = (*it)->m_tp == SCLT_EXCL ? + Xapian::Query::OP_AND_NOT: Xapian::Query::OP_AND; + } else { + op = Xapian::Query::OP_OR; + } + xq = xq.empty() ? nq : Xapian::Query(op, xq, nq); + } + + // Add the file type filtering clause if any + if (!m_filetypes.empty()) { + list pqueries; + Xapian::Query tq; + for (list::iterator it = m_filetypes.begin(); + it != m_filetypes.end(); it++) { + string term = "T" + *it; + LOGDEB(("Adding file type term: [%s]\n", term.c_str())); + tq = tq.empty() ? Xapian::Query(term) : + Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); + } + xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq); + } + + *((Xapian::Query *)d) = xq; + return true; +} + +// Add clause to current list. OR lists cant have EXCL clauses. +bool SearchData::addClause(SearchDataClause* cl) +{ + if (m_tp == SCLT_OR && (cl->m_tp == SCLT_EXCL)) { + LOGERR(("SearchData::addClause: cant add EXCL to OR list\n")); + return false; + } + m_query.push_back(cl); + return true; +} + +// Make me all new +void SearchData::erase() { + for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) + delete *it; + m_query.clear(); + m_filetypes.clear(); + m_topdir.erase(); + m_description.erase(); +} + +// Am I a file name only search ? This is to turn off term highlighting +bool SearchData::fileNameOnly() { + for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) + if (!(*it)->isFileName()) + return false; + return true; +} + +// Splitter callback for breaking a user query string into simple +// terms and phrases +class wsQData : public TextSplitCB { + public: + vector terms; + // Debug + string catterms() { + string s; + for (unsigned int i = 0; i < terms.size(); i++) { + s += "[" + terms[i] + "] "; + } + return s; + } + bool takeword(const std::string &term, int , int, int) { + LOGDEB1(("wsQData::takeword: %s\n", term.c_str())); + terms.push_back(term); + return true; + } + // Decapital + deaccent all terms + void dumball() { + for (vector::iterator it=terms.begin(); it !=terms.end();it++){ + string dumb; + dumb_string(*it, dumb); + *it = dumb; + } + } +}; + + +// Turn string into list of xapian queries. There is little +// interpretation done on the string (no +term -term or filename:term +// stuff). We just separate words and phrases, and interpret +// capitalized terms as wanting no stem expansion. +// The final list contains one query for each term or phrase +// - Elements corresponding to a stem-expanded part are an OP_OR +// composition of the stem-expanded terms (or a single term query). +// - Elements corresponding to a phrase are an OP_PHRASE composition of the +// phrase terms (no stem expansion in this case) +static void stringToXapianQueries(const string &iq, + const string& stemlang, + Db& db, + list &pqueries) +{ + string qstring = iq; + bool opt_stemexp = !stemlang.empty(); + + // Split into (possibly single word) phrases ("this is a phrase"): + list phrases; + stringToStrings(qstring, phrases); + + // Then process each phrase: split into terms and transform into + // appropriate Xapian Query + + for (list::iterator it=phrases.begin(); it !=phrases.end(); it++) { + LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str())); + + // If there are both spans and single words in this element, + // we need to use a word split, else a phrase query including + // a span would fail if we didn't adjust the proximity to + // account for the additional span term which is complicated. + wsQData splitDataS, splitDataW; + TextSplit splitterS(&splitDataS, TextSplit::TXTS_ONLYSPANS); + splitterS.text_to_words(*it); + TextSplit splitterW(&splitDataW, TextSplit::TXTS_NOSPANS); + splitterW.text_to_words(*it); + wsQData& splitData = splitDataS; + if (splitDataS.terms.size() > 1 && splitDataS.terms.size() != + splitDataW.terms.size()) + splitData = splitDataW; + + LOGDEB1(("strToXapianQ: splitter term count: %d\n", + splitData.terms.size())); + switch(splitData.terms.size()) { + case 0: continue;// ?? + case 1: // Not a real phrase: one term + { + string term = splitData.terms.front(); + bool nostemexp = false; + // Check if the first letter is a majuscule in which + // case we do not want to do stem expansion. Note that + // the test is convoluted and possibly problematic + if (term.length() > 0) { + string noacterm,noaclowterm; + if (unacmaybefold(term, noacterm, "UTF-8", false) && + unacmaybefold(noacterm, noaclowterm, "UTF-8", true)) { + Utf8Iter it1(noacterm); + Utf8Iter it2(noaclowterm); + if (*it1 != *it2) + nostemexp = true; + } + } + LOGDEB1(("Term: %s stem expansion: %s\n", + term.c_str(), nostemexp?"no":"yes")); + + list exp; + string term1; + dumb_string(term, term1); + // Possibly perform stem compression/expansion + if (!nostemexp && opt_stemexp) { + exp = db.stemExpand(stemlang, term1); + } else { + exp.push_back(term1); + } + + // Push either term or OR of stem-expanded set + pqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, + exp.begin(), exp.end())); + } + break; + + default: + // Phrase: no stem expansion + splitData.dumball(); + LOGDEB(("Pushing phrase: [%s]\n", splitData.catterms().c_str())); + pqueries.push_back(Xapian::Query(Xapian::Query::OP_PHRASE, + splitData.terms.begin(), + splitData.terms.end())); + } + } +} + +// Translate a simple OR, AND, or EXCL search clause. +bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, + const string& stemlang) +{ + Xapian::Query *qp = (Xapian::Query *)p; + *qp = Xapian::Query(); + + Xapian::Query::op op; + switch (m_tp) { + case SCLT_AND: op = Xapian::Query::OP_AND; break; + case SCLT_OR: + case SCLT_EXCL: op = Xapian::Query::OP_OR; break; + default: + LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp)); + return false; + } + list pqueries; + stringToXapianQueries(m_text, stemlang, db, pqueries); + if (pqueries.empty()) { + LOGERR(("SearchDataClauseSimple: resolved to null query\n")); + return true; + } + *qp = Xapian::Query(op, pqueries.begin(), pqueries.end()); + return true; +} + +// Translate a FILENAME search clause. +bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p, + const string& stemlang) +{ + Xapian::Query *qp = (Xapian::Query *)p; + *qp = Xapian::Query(); + + list names; + db.filenameWildExp(m_text, names); + // Build a query out of the matching file name terms. + *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end()); + return true; +} + +// Translate NEAR or PHRASE clause. We're not handling the distance parameter +// yet. +bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, + const string& stemlang) +{ + Xapian::Query *qp = (Xapian::Query *)p; + *qp = Xapian::Query(); + + Xapian::Query::op op = m_tp == SCLT_PHRASE ? Xapian::Query::OP_PHRASE : + Xapian::Query::OP_NEAR; + + list pqueries; + Xapian::Query nq; + string s = string("\"") + m_text + string("\""); + + // Use stringToXapianQueries anyway to lowercase and simplify the + // phrase terms etc. The result should be a single element list + stringToXapianQueries(s, stemlang, db, pqueries); + if (pqueries.empty()) { + LOGERR(("SearchDataClauseDist: resolved to null query\n")); + return true; + } + *qp = *pqueries.begin(); + return true; +} + +} // Namespace Rcl diff --git a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h index c68c6099..852127ee 100644 --- a/src/rcldb/searchdata.h +++ b/src/rcldb/searchdata.h @@ -1,40 +1,112 @@ #ifndef _SEARCHDATA_H_INCLUDED_ #define _SEARCHDATA_H_INCLUDED_ -/* @(#$Id: searchdata.h,v 1.2 2006-04-22 06:27:37 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: searchdata.h,v 1.3 2006-11-13 08:49:45 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include +#include + +#include "rcldb.h" + +#ifndef NO_NAMESPACES +using std::list; +using std::string; +#endif namespace Rcl { -/** - * Holder for query data - */ -class AdvSearchData { - public: - string allwords; - string phrase; - string orwords; - string orwords1; // Have two instances of orwords for and'ing them - string nowords; - string filename; - list filetypes; // restrict to types. Empty if inactive - string topdir; // restrict to subtree. Empty if inactive - string description; // Printable expanded version of the complete query - // returned after setQuery. - void erase() { - allwords.erase(); - phrase.erase(); - orwords.erase(); - orwords1.erase(); - nowords.erase(); - filetypes.clear(); - topdir.erase(); - filename.erase(); - description.erase(); - } - bool fileNameOnly() { - return allwords.empty() && phrase.empty() && orwords.empty() && - orwords1.empty() && nowords.empty(); - } + +/** Search clause types */ +enum SClType { + SCLT_AND, + SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR, + SCLT_SUB }; -} +class SearchDataClause; +/** + * Holder for a list of search clauses. Some of the clauses can be comples + * subqueries. + */ +class SearchData { + public: + SClType m_tp; // Only SCLT_AND or SCLT_OR here + list m_query; + list m_filetypes; // Restrict to filetypes if set. + string m_topdir; // Restrict to subtree. + // Printable expanded version of the complete query, obtained from Xapian + // valid after setQuery() call + string m_description; + + SearchData(SClType tp) : m_tp(tp) {} + ~SearchData() {erase();} + + /** Make pristine */ + void erase(); + + /** Is there anything but a file name search in here ? */ + bool fileNameOnly(); + + /** Translate to Xapian query. rcldb knows about the void* */ + bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); + + /** We become the owner of cl and will delete it */ + bool addClause(SearchDataClause *cl); + + private: + /* Copyconst and assignment private and forbidden */ + SearchData(const SearchData &) {} + SearchData& operator=(const SearchData&) {return *this;}; +}; + +class SearchDataClause { + public: + SClType m_tp; + + SearchDataClause(SClType tp) : m_tp(tp) {} + virtual ~SearchDataClause() {} + virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0; + virtual bool isFileName() {return m_tp == SCLT_FILENAME ? true : false;} +}; + +class SearchDataClauseSimple : public SearchDataClause { +public: + SearchDataClauseSimple(SClType tp, string txt) + : SearchDataClause(tp), m_text(txt) {} + virtual ~SearchDataClauseSimple() {} + virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); +protected: + string m_text; +}; + +class SearchDataClauseFilename : public SearchDataClauseSimple { + public: + SearchDataClauseFilename(string txt) + : SearchDataClauseSimple(SCLT_FILENAME, m_text) {} + virtual ~SearchDataClauseFilename() {} + virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); +}; + +class SearchDataClauseDist : public SearchDataClauseSimple { +public: + SearchDataClauseDist(SClType tp, string txt, int dist) + : SearchDataClauseSimple(tp, txt), m_distance(dist) {} + virtual ~SearchDataClauseDist() {} + virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); + +protected: + int m_distance; +}; + +class SearchDataClauseSub : public SearchDataClause { + public: + SearchDataClauseSub(SClType tp, SClType stp) + : SearchDataClause(tp), m_sub(stp) {} + virtual ~SearchDataClauseSub() {} + virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang); + +protected: + SearchData m_sub; +}; + +} // Namespace Rcl #endif /* _SEARCHDATA_H_INCLUDED_ */