diff --git a/src/internfile/uncomp.cpp b/src/internfile/uncomp.cpp index 46bc608d..b6eedde9 100644 --- a/src/internfile/uncomp.cpp +++ b/src/internfile/uncomp.cpp @@ -36,36 +36,36 @@ using std::vector; Uncomp::UncompCache Uncomp::o_cache; Uncomp::Uncomp(bool docache) - : m_docache(docache) + : m_docache(docache) { LOGDEB0("Uncomp::Uncomp: m_docache: " << m_docache << "\n"); } bool Uncomp::uncompressfile(const string& ifn, - const vector& cmdv, string& tfile) + const vector& cmdv, string& tfile) { if (m_docache) { std::unique_lock lock(o_cache.m_lock); - if (!o_cache.m_srcpath.compare(ifn)) { - m_dir = o_cache.m_dir; - m_tfile = tfile = o_cache.m_tfile; - m_srcpath = ifn; - o_cache.m_dir = 0; - o_cache.m_srcpath.clear(); - return true; - } + if (!o_cache.m_srcpath.compare(ifn)) { + m_dir = o_cache.m_dir; + m_tfile = tfile = o_cache.m_tfile; + m_srcpath = ifn; + o_cache.m_dir = 0; + o_cache.m_srcpath.clear(); + return true; + } } m_srcpath.clear(); m_tfile.clear(); if (m_dir == 0) { - m_dir = new TempDir; + m_dir = new TempDir; } // Make sure tmp dir is empty. we guarantee this to filters if (!m_dir || !m_dir->ok() || !m_dir->wipe()) { - LOGERR("uncompressfile: can't clear temp dir " << m_dir->dirname() << + LOGERR("uncompressfile: can't clear temp dir " << m_dir->dirname() << "\n"); - return false; + return false; } // Check that we have enough available space to have some hope of @@ -77,7 +77,7 @@ bool Uncomp::uncompressfile(const string& ifn, m_dir->dirname() << "\n"); // Hope for the best } else { - long long fsize = path_filesize(ifn); + long long fsize = path_filesize(ifn); if (fsize < 0) { LOGERR("uncompressfile: stat input file " << ifn << " errno " << errno << "\n"); @@ -109,22 +109,22 @@ bool Uncomp::uncompressfile(const string& ifn, subs['f'] = ifn; subs['t'] = m_dir->dirname(); for (; it != cmdv.end(); it++) { - string ns; - pcSubst(*it, ns, subs); - args.push_back(ns); + string ns; + pcSubst(*it, ns, subs); + args.push_back(ns); } // Execute command and retrieve output file name, check that it exists ExecCmd ex; int status = ex.doexec(cmd, args, 0, &tfile); if (status || tfile.empty()) { - LOGERR("uncompressfile: doexec: " << cmd << " " << + LOGERR("uncompressfile: doexec: " << cmd << " " << stringsToString(args) << " failed for [" << ifn << "] status 0x" << status << "\n"); - if (!m_dir->wipe()) { - LOGERR("uncompressfile: wipedir failed\n"); - } - return false; + if (!m_dir->wipe()) { + LOGERR("uncompressfile: wipedir failed\n"); + } + return false; } rtrimstring(tfile, "\n\r"); m_tfile = tfile; @@ -138,12 +138,12 @@ Uncomp::~Uncomp() (m_dir?m_dir->dirname():"(null)") << "\n"); if (m_docache) { std::unique_lock lock(o_cache.m_lock); - delete o_cache.m_dir; - o_cache.m_dir = m_dir; - o_cache.m_tfile = m_tfile; - o_cache.m_srcpath = m_srcpath; + delete o_cache.m_dir; + o_cache.m_dir = m_dir; + o_cache.m_tfile = m_tfile; + o_cache.m_srcpath = m_srcpath; } else { - delete m_dir; + delete m_dir; } } diff --git a/src/internfile/uncomp.h b/src/internfile/uncomp.h index a1f93898..7d51bca3 100644 --- a/src/internfile/uncomp.h +++ b/src/internfile/uncomp.h @@ -36,8 +36,8 @@ public: * temporary directory). */ bool uncompressfile(const std::string& ifn, - const std::vector& cmdv, - std::string& tfile); + const std::vector& cmdv, + std::string& tfile); static void clearcache(); private: @@ -48,14 +48,14 @@ private: class UncompCache { public: - UncompCache() {} - ~UncompCache() { - delete m_dir; - } + UncompCache() {} + ~UncompCache() { + delete m_dir; + } std::mutex m_lock; - TempDir *m_dir{0}; - std::string m_tfile; - std::string m_srcpath; + TempDir *m_dir{0}; + std::string m_tfile; + std::string m_srcpath; }; static UncompCache o_cache; }; diff --git a/src/rcldb/searchdatatox.cpp b/src/rcldb/searchdatatox.cpp index 8fb788d0..be281cbf 100644 --- a/src/rcldb/searchdatatox.cpp +++ b/src/rcldb/searchdatatox.cpp @@ -61,31 +61,31 @@ bool SearchData::expandFileTypes(Db &db, vector& tps) { const RclConfig *cfg = db.getConf(); if (!cfg) { - LOGFATAL("Db::expandFileTypes: null configuration!!\n"); - return false; + LOGFATAL("Db::expandFileTypes: null configuration!!\n"); + return false; } vector exptps; for (vector::iterator it = tps.begin(); it != tps.end(); it++) { - if (cfg->isMimeCategory(*it)) { - vectortps; - cfg->getMimeCatTypes(*it, tps); - exptps.insert(exptps.end(), tps.begin(), tps.end()); - } else { - TermMatchResult res; - string mt = stringtolower((const string&)*it); - // We set casesens|diacsens to get an equivalent of ixTermMatch() - db.termMatch(Db::ET_WILD|Db::ET_CASESENS|Db::ET_DIACSENS, string(), - mt, res, -1, "mtype"); - if (res.entries.empty()) { - exptps.push_back(it->c_str()); - } else { - for (vector::const_iterator rit = - res.entries.begin(); rit != res.entries.end(); rit++) { - exptps.push_back(strip_prefix(rit->term)); - } - } - } + if (cfg->isMimeCategory(*it)) { + vectortps; + cfg->getMimeCatTypes(*it, tps); + exptps.insert(exptps.end(), tps.begin(), tps.end()); + } else { + TermMatchResult res; + string mt = stringtolower((const string&)*it); + // We set casesens|diacsens to get an equivalent of ixTermMatch() + db.termMatch(Db::ET_WILD|Db::ET_CASESENS|Db::ET_DIACSENS, string(), + mt, res, -1, "mtype"); + if (res.entries.empty()) { + exptps.push_back(it->c_str()); + } else { + for (vector::const_iterator rit = + res.entries.begin(); rit != res.entries.end(); rit++) { + exptps.push_back(strip_prefix(rit->term)); + } + } + } } sort(exptps.begin(), exptps.end()); exptps.erase(unique(exptps.begin(), exptps.end()), exptps.end()); @@ -105,35 +105,35 @@ static const char *maxXapClauseCaseDiacMsg = // Walk the clauses list, translate each and add to top Xapian Query bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, - vector& query, - string& reason, void *d) + vector& query, + string& reason, void *d) { Xapian::Query xq; for (auto& clausep : query) { - Xapian::Query nq; - if (!clausep->toNativeQuery(db, &nq)) { - LOGERR("SearchData::clausesToQuery: toNativeQuery failed: " + Xapian::Query nq; + if (!clausep->toNativeQuery(db, &nq)) { + LOGERR("SearchData::clausesToQuery: toNativeQuery failed: " << clausep->getReason() << "\n"); - reason += clausep->getReason() + " "; - return false; - } + reason += clausep->getReason() + " "; + return false; + } if (nq.empty()) { LOGDEB("SearchData::clausesToQuery: skipping empty clause\n"); continue; } - // If this structure is an AND list, must use AND_NOT for excl clauses. - // Else this is an OR list, and there can't be excl clauses (checked by - // addClause()) - Xapian::Query::op op; - if (tp == SCLT_AND) { + // If this structure is an AND list, must use AND_NOT for excl clauses. + // Else this is an OR list, and there can't be excl clauses (checked by + // addClause()) + Xapian::Query::op op; + if (tp == SCLT_AND) { if (clausep->getexclude()) { op = Xapian::Query::OP_AND_NOT; } else { op = Xapian::Query::OP_AND; } - } else { - op = Xapian::Query::OP_OR; - } + } else { + op = Xapian::Query::OP_OR; + } if (xq.empty()) { if (op == Xapian::Query::OP_AND_NOT) xq = Xapian::Query(op, Xapian::Query::MatchAll, nq); @@ -142,21 +142,21 @@ bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, } else { xq = Xapian::Query(op, xq, nq); } - if (int(xq.get_length()) >= getMaxCl()) { - LOGERR("" << maxXapClauseMsg << "\n"); - m_reason += maxXapClauseMsg; - if (!o_index_stripchars) - m_reason += maxXapClauseCaseDiacMsg; - return false; - } + if (int(xq.get_length()) >= getMaxCl()) { + LOGERR("" << maxXapClauseMsg << "\n"); + m_reason += maxXapClauseMsg; + if (!o_index_stripchars) + m_reason += maxXapClauseCaseDiacMsg; + return false; + } } LOGDEB0("SearchData::clausesToQuery: got " << xq.get_length()<<" clauses\n"); if (xq.empty()) - xq = Xapian::Query::MatchAll; + xq = Xapian::Query::MatchAll; - *((Xapian::Query *)d) = xq; + *((Xapian::Query *)d) = xq; return true; } @@ -174,9 +174,9 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) // Xapian query tree Xapian::Query xq; if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) { - LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " + LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " << m_reason << "\n"); - return false; + return false; } if (m_haveDates) { @@ -203,7 +203,7 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) "-" << m_dates.m1 << "-" << m_dates.d1 << "/" << m_dates.y2 << "-" << m_dates.m2 << "-" << m_dates.d2 << "\n"); Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1, - m_dates.y2, m_dates.m2, m_dates.d2); + m_dates.y2, m_dates.m2, m_dates.d2); if (dq.empty()) { LOGINFO("Db::toNativeQuery: date filter is empty\n"); } @@ -220,25 +220,25 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) if (m_minSize != -1 || m_maxSize != -1) { Xapian::Query sq; - string min = lltodecstr(m_minSize); - string max = lltodecstr(m_maxSize); - if (m_minSize == -1) { - string value(max); - leftzeropad(value, 12); - sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value); - } else if (m_maxSize == -1) { - string value(min); - leftzeropad(value, 12); - sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value); - } else { - string minvalue(min); - leftzeropad(minvalue, 12); - string maxvalue(max); - leftzeropad(maxvalue, 12); - sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, - minvalue, maxvalue); - } - + string min = lltodecstr(m_minSize); + string max = lltodecstr(m_maxSize); + if (m_minSize == -1) { + string value(max); + leftzeropad(value, 12); + sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value); + } else if (m_maxSize == -1) { + string value(min); + leftzeropad(value, 12); + sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value); + } else { + string minvalue(min); + leftzeropad(minvalue, 12); + string maxvalue(max); + leftzeropad(maxvalue, 12); + sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, + minvalue, maxvalue); + } + // If no probabilistic query is provided then promote the // filter to be THE query instead of filtering an empty query. if (xq.empty()) { @@ -251,41 +251,41 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) // Add the autophrase if any if (m_autophrase) { - Xapian::Query apq; - if (m_autophrase->toNativeQuery(db, &apq)) { - xq = xq.empty() ? apq : - Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq); - } + Xapian::Query apq; + if (m_autophrase->toNativeQuery(db, &apq)) { + xq = xq.empty() ? apq : + Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq); + } } // Add the file type filtering clause if any if (!m_filetypes.empty()) { - expandFileTypes(db, m_filetypes); - - Xapian::Query tq; - for (vector::iterator it = m_filetypes.begin(); - it != m_filetypes.end(); it++) { - string term = wrap_prefix(mimetype_prefix) + *it; - LOGDEB0("Adding file type term: [" << term << "]\n"); - tq = tq.empty() ? Xapian::Query(term) : - Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); - } - xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq); + expandFileTypes(db, m_filetypes); + + Xapian::Query tq; + for (vector::iterator it = m_filetypes.begin(); + it != m_filetypes.end(); it++) { + string term = wrap_prefix(mimetype_prefix) + *it; + LOGDEB0("Adding file type term: [" << term << "]\n"); + tq = tq.empty() ? Xapian::Query(term) : + Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); + } + xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq); } // Add the neg file type filtering clause if any if (!m_nfiletypes.empty()) { - expandFileTypes(db, m_nfiletypes); - - Xapian::Query tq; - for (vector::iterator it = m_nfiletypes.begin(); - it != m_nfiletypes.end(); it++) { - string term = wrap_prefix(mimetype_prefix) + *it; - LOGDEB0("Adding negative file type term: [" << term << "]\n"); - tq = tq.empty() ? Xapian::Query(term) : - Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); - } - xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq); + expandFileTypes(db, m_nfiletypes); + + Xapian::Query tq; + for (vector::iterator it = m_nfiletypes.begin(); + it != m_nfiletypes.end(); it++) { + string term = wrap_prefix(mimetype_prefix) + *it; + LOGDEB0("Adding negative file type term: [" << term << "]\n"); + tq = tq.empty() ? Xapian::Query(term) : + Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term)); + } + xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq); } *((Xapian::Query *)d) = xq; @@ -298,18 +298,18 @@ bool SearchData::toNativeQuery(Rcl::Db &db, void *d) // actually multiple terms to rcldb (ie term1,term2). Still, most of // the time, the result of our splitting will be a single term. class TextSplitQ : public TextSplitP { - public: +public: TextSplitQ(Flags flags, TermProc *prc) - : TextSplitP(prc, flags), m_nostemexp(false) { + : TextSplitP(prc, flags), m_nostemexp(false) { } bool takeword(const std::string &term, int pos, int bs, int be) { - // Check if the first letter is a majuscule in which - // case we do not want to do stem expansion. Need to do this - // before unac of course... - m_nostemexp = unaciscapital(term); + // Check if the first letter is a majuscule in which + // case we do not want to do stem expansion. Need to do this + // before unac of course... + m_nostemexp = unaciscapital(term); - return TextSplitP::takeword(term, pos, bs, be); + return TextSplitP::takeword(term, pos, bs, be); } bool nostemexp() const { @@ -332,26 +332,26 @@ public: } bool takeword(const std::string &term, int pos, int, int be) { - m_alltermcount++; - if (m_lastpos < pos) - m_lastpos = pos; - bool noexpand = be ? m_ts->nostemexp() : true; - LOGDEB1("TermProcQ::takeword: pushing [" << term << "] pos " << + m_alltermcount++; + if (m_lastpos < pos) + m_lastpos = pos; + bool noexpand = be ? m_ts->nostemexp() : true; + LOGDEB1("TermProcQ::takeword: pushing [" << term << "] pos " << pos << " noexp " << noexpand << "\n"); - if (m_terms[pos].size() < term.size()) { - m_terms[pos] = term; - m_nste[pos] = noexpand; - } - return true; + if (m_terms[pos].size() < term.size()) { + m_terms[pos] = term; + m_nste[pos] = noexpand; + } + return true; } bool flush() { - for (map::const_iterator it = m_terms.begin(); - it != m_terms.end(); it++) { - m_vterms.push_back(it->second); - m_vnostemexps.push_back(m_nste[it->first]); - } - return true; + for (map::const_iterator it = m_terms.begin(); + it != m_terms.end(); it++) { + m_vterms.push_back(it->second); + m_vnostemexps.push_back(m_nste[it->first]); + } + return true; } int alltermcount() const { @@ -387,7 +387,7 @@ static const vector expandModStrings{ {SearchDataClause::SDCM_NOTERMS, "noterms"}, {SearchDataClause::SDCM_NOSYNS, "nosyns"}, {SearchDataClause::SDCM_PATHELT, "pathelt"}, - }; +}; /** Expand term into term list, using appropriate mode: stem, wildcards, * diacritics... @@ -403,11 +403,11 @@ static const vector expandModStrings{ * expansions which should be processed as phrases. */ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, - string& ermsg, int mods, - const string& term, - vector& oexp, string &sterm, - const string& prefix, - vector* multiwords + string& ermsg, int mods, + const string& term, + vector& oexp, string &sterm, + const string& prefix, + vector* multiwords ) { LOGDEB0("expandTerm: mods: [" << flagsToString(expandModStrings, mods) << @@ -416,7 +416,7 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, sterm.clear(); oexp.clear(); if (term.empty()) - return true; + return true; if (mods & SDCM_PATHELT) { // Path element are so special. Only wildcards, and they are @@ -427,23 +427,23 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, bool maxexpissoft = false; int maxexpand = getSoftMaxExp(); if (maxexpand != -1) { - maxexpissoft = true; + maxexpissoft = true; } else { - maxexpand = getMaxExp(); + maxexpand = getMaxExp(); } bool haswild = term.find_first_of(cstr_minwilds) != string::npos; // If there are no wildcards, add term to the list of user-entered terms if (!haswild) { - m_hldata.uterms.insert(term); + m_hldata.uterms.insert(term); sterm = term; } // No stem expansion if there are wildcards or if prevented by caller bool nostemexp = (mods & SDCM_NOSTEMMING) != 0; if (haswild || getStemLang().empty()) { - LOGDEB2("expandTerm: found wildcards or stemlang empty: no exp\n"); - nostemexp = true; + LOGDEB2("expandTerm: found wildcards or stemlang empty: no exp\n"); + nostemexp = true; } bool diac_sensitive = (mods & SDCM_DIACSENS) != 0; @@ -455,82 +455,82 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, bool noexpansion = nostemexp && !haswild && !synonyms; if (o_index_stripchars) { - diac_sensitive = case_sensitive = false; + diac_sensitive = case_sensitive = false; } else { - // If we are working with a raw index, apply the rules for case and - // diacritics sensitivity. + // If we are working with a raw index, apply the rules for case and + // diacritics sensitivity. - // If any character has a diacritic, we become - // diacritic-sensitive. Note that the way that the test is - // performed (conversion+comparison) will automatically ignore - // accented characters which are actually a separate letter - if (getAutoDiac() && unachasaccents(term)) { - LOGDEB0("expandTerm: term has accents -> diac-sensitive\n"); - diac_sensitive = true; - } + // If any character has a diacritic, we become + // diacritic-sensitive. Note that the way that the test is + // performed (conversion+comparison) will automatically ignore + // accented characters which are actually a separate letter + if (getAutoDiac() && unachasaccents(term)) { + LOGDEB0("expandTerm: term has accents -> diac-sensitive\n"); + diac_sensitive = true; + } - // If any character apart the first is uppercase, we become - // case-sensitive. The first character is reserved for - // turning off stemming. You need to use a query language - // modifier to search for Floor in a case-sensitive way. - Utf8Iter it(term); - it++; - if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) { - LOGDEB0("expandTerm: term has uppercase -> case-sensitive\n"); - case_sensitive = true; - } + // If any character apart the first is uppercase, we become + // case-sensitive. The first character is reserved for + // turning off stemming. You need to use a query language + // modifier to search for Floor in a case-sensitive way. + Utf8Iter it(term); + it++; + if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) { + LOGDEB0("expandTerm: term has uppercase -> case-sensitive\n"); + case_sensitive = true; + } - // If we are sensitive to case or diacritics turn stemming off - if (diac_sensitive || case_sensitive) { - LOGDEB0("expandTerm: diac or case sens set -> stemexpand and " + // If we are sensitive to case or diacritics turn stemming off + if (diac_sensitive || case_sensitive) { + LOGDEB0("expandTerm: diac or case sens set -> stemexpand and " "synonyms off\n"); - nostemexp = true; + nostemexp = true; synonyms = false; - } + } - if (!case_sensitive || !diac_sensitive) - noexpansion = false; + if (!case_sensitive || !diac_sensitive) + noexpansion = false; } if (!m_exclude && noexpansion) { - oexp.push_back(prefix + term); - m_hldata.terms[term] = term; - LOGDEB("ExpandTerm: noexpansion: final: "<= maxexpand && !maxexpissoft) { - ermsg = "Maximum term expansion size exceeded." - " Maybe use case/diacritics sensitivity or increase maxTermExpand."; - return false; + ermsg = "Maximum term expansion size exceeded." + " Maybe use case/diacritics sensitivity or increase maxTermExpand."; + return false; } for (const auto& entry : res.entries) { - oexp.push_back(entry.term); + oexp.push_back(entry.term); } // If the term does not exist at all in the db, the return from // termMatch() is going to be empty, which is not what we want (we // would then compute an empty Xapian query) if (oexp.empty()) - oexp.push_back(prefix + term); + oexp.push_back(prefix + term); // Remember the uterm-to-expansion links if (!m_exclude) { @@ -545,7 +545,7 @@ bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, static void prefix_vector(vector& v, const string& prefix) { for (vector::iterator it = v.begin(); it != v.end(); it++) { - *it = prefix + *it; + *it = prefix + *it; } } @@ -561,14 +561,14 @@ void SearchDataClauseSimple::processSimpleSpan( string prefix; const FieldTraits *ftp; if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) { - if (ftp->noterms) - addModifier(SDCM_NOTERMS); // Don't add terms to highlight data - prefix = wrap_prefix(ftp->pfx); + if (ftp->noterms) + addModifier(SDCM_NOTERMS); // Don't add terms to highlight data + prefix = wrap_prefix(ftp->pfx); } vector multiwords; if (!expandTerm(db, ermsg, mods, span, exp, sterm, prefix, &multiwords)) - return; + return; // Set up the highlight data. No prefix should go in there if (!m_exclude) { @@ -593,28 +593,28 @@ void SearchDataClauseSimple::processSimpleSpan( // do it if there are wildcards anywhere, this would skew the results. Also // no need to do it if there was no expansion. bool doBoostUserTerm = - (m_parentSearch && !m_parentSearch->haveWildCards()) || - (m_parentSearch == 0 && !m_haveWildCards); + (m_parentSearch && !m_parentSearch->haveWildCards()) || + (m_parentSearch == 0 && !m_haveWildCards); if (exp.size() > 1 && doBoostUserTerm && !sterm.empty()) { xq = Xapian::Query(Xapian::Query::OP_OR, xq, - Xapian::Query(prefix+sterm, - original_term_wqf_booster)); + Xapian::Query(prefix+sterm, + original_term_wqf_booster)); } // Push phrases for the multi-word expansions for (vector::const_iterator mwp = multiwords.begin(); - mwp != multiwords.end(); mwp++) { - vector phr; - // We just do a basic split to keep things a bit simpler here - // (no textsplit). This means though that no punctuation is - // allowed in multi-word synonyms. - stringToTokens(*mwp, phr); - if (!prefix.empty()) - prefix_vector(phr, prefix); - xq = Xapian::Query(Xapian::Query::OP_OR, xq, - Xapian::Query(Xapian::Query::OP_PHRASE, - phr.begin(), phr.end())); - m_curcl++; + mwp != multiwords.end(); mwp++) { + vector phr; + // We just do a basic split to keep things a bit simpler here + // (no textsplit). This means though that no punctuation is + // allowed in multi-word synonyms. + stringToTokens(*mwp, phr); + if (!prefix.empty()) + prefix_vector(phr, prefix); + xq = Xapian::Query(Xapian::Query::OP_OR, xq, + Xapian::Query(Xapian::Query::OP_PHRASE, + phr.begin(), phr.end())); + m_curcl++; } pqueries.push_back(xq); @@ -625,13 +625,13 @@ void SearchDataClauseSimple::processSimpleSpan( // queries if the terms get expanded by stemming or wildcards (we // don't do stemming for PHRASE though) void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, - TermProcQ *splitData, - int mods, void *pq, - bool useNear, int slack) + TermProcQ *splitData, + int mods, void *pq, + bool useNear, int slack) { vector &pqueries(*(vector*)pq); Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : - Xapian::Query::OP_PHRASE; + Xapian::Query::OP_PHRASE; vector orqueries; #ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF bool hadmultiple = false; @@ -641,57 +641,57 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, string prefix; const FieldTraits *ftp; if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) { - prefix = wrap_prefix(ftp->pfx); + prefix = wrap_prefix(ftp->pfx); } if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) { - orqueries.push_back(Xapian::Query(prefix + start_of_field_term)); - slack++; + orqueries.push_back(Xapian::Query(prefix + start_of_field_term)); + slack++; } // Go through the list and perform stem/wildcard expansion for each element vector::const_iterator nxit = splitData->nostemexps().begin(); for (vector::const_iterator it = splitData->terms().begin(); - it != splitData->terms().end(); it++, nxit++) { - LOGDEB0("ProcessPhrase: processing [" << *it << "]\n"); - // Adjust when we do stem expansion. Not if disabled by - // caller, not inside phrases, and some versions of xapian - // will accept only one OR clause inside NEAR. - bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) + it != splitData->terms().end(); it++, nxit++) { + LOGDEB0("ProcessPhrase: processing [" << *it << "]\n"); + // Adjust when we do stem expansion. Not if disabled by + // caller, not inside phrases, and some versions of xapian + // will accept only one OR clause inside NEAR. + bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) #ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF - || hadmultiple + || hadmultiple #endif // single OR inside NEAR - ; - int lmods = mods; - if (nostemexp) - lmods |= SearchDataClause::SDCM_NOSTEMMING; - string sterm; - vector exp; - if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix)) - return; - LOGDEB0("ProcessPhraseOrNear: exp size " << exp.size() << ", exp: " << + ; + int lmods = mods; + if (nostemexp) + lmods |= SearchDataClause::SDCM_NOSTEMMING; + string sterm; + vector exp; + if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix)) + return; + LOGDEB0("ProcessPhraseOrNear: exp size " << exp.size() << ", exp: " << stringsToString(exp) << "\n"); - // groups is used for highlighting, we don't want prefixes in there. - vector noprefs; - for (vector::const_iterator it = exp.begin(); - it != exp.end(); it++) { - noprefs.push_back(it->substr(prefix.size())); - } - groups.push_back(noprefs); - orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, - exp.begin(), exp.end())); - m_curcl += exp.size(); - if (m_curcl >= getMaxCl()) - return; + // groups is used for highlighting, we don't want prefixes in there. + vector noprefs; + for (vector::const_iterator it = exp.begin(); + it != exp.end(); it++) { + noprefs.push_back(it->substr(prefix.size())); + } + groups.push_back(noprefs); + orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, + exp.begin(), exp.end())); + m_curcl += exp.size(); + if (m_curcl >= getMaxCl()) + return; #ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF - if (exp.size() > 1) - hadmultiple = true; + if (exp.size() > 1) + hadmultiple = true; #endif } if (mods & Rcl::SearchDataClause::SDCM_ANCHOREND) { - orqueries.push_back(Xapian::Query(prefix + end_of_field_term)); - slack++; + orqueries.push_back(Xapian::Query(prefix + end_of_field_term)); + slack++; } // Generate an appropriate PHRASE/NEAR query with adjusted slack @@ -699,10 +699,10 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, LOGDEB2("PHRASE/NEAR: alltermcount " << splitData->alltermcount() << " lastpos " << splitData->lastpos() << "\n"); Xapian::Query xq(op, orqueries.begin(), orqueries.end(), - orqueries.size() + slack); + orqueries.size() + slack); if (op == Xapian::Query::OP_PHRASE) - xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq, - original_term_wqf_booster); + xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq, + original_term_wqf_booster); pqueries.push_back(xq); // Insert the search groups and slacks in the highlight data, with @@ -726,12 +726,12 @@ static int stringToMods(string& s) // Check for an anchored search trimstring(s); if (s.length() > 0 && s[0] == '^') { - mods |= Rcl::SearchDataClause::SDCM_ANCHORSTART; - s.erase(0, 1); + mods |= Rcl::SearchDataClause::SDCM_ANCHORSTART; + s.erase(0, 1); } if (s.length() > 0 && s[s.length()-1] == '$') { - mods |= Rcl::SearchDataClause::SDCM_ANCHOREND; - s.erase(s.length()-1); + mods |= Rcl::SearchDataClause::SDCM_ANCHOREND; + s.erase(s.length()-1); } return mods; } @@ -761,8 +761,8 @@ static int stringToMods(string& s) * count) */ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, - string &ermsg, void *pq, - int slack, bool useNear) + string &ermsg, void *pq, + int slack, bool useNear) { vector &pqueries(*(vector*)pq); int mods = m_modifiers; @@ -786,87 +786,87 @@ bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq, // Process each element: textsplit into terms, handle stem/wildcard // expansion and transform into an appropriate Xapian::Query try { - for (vector::iterator it = phrases.begin(); - it != phrases.end(); it++) { - LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n"); - // Anchoring modifiers - int amods = stringToMods(*it); - int terminc = amods != 0 ? 1 : 0; - mods |= amods; - // If there are multiple spans in this element, including - // at least one composite, we have to increase the slack - // else a phrase query including a span would fail. - // Ex: "term0@term1 term2" is onlyspans-split as: - // 0 term0@term1 0 12 - // 2 term2 13 18 - // The position of term2 is 2, not 1, so a phrase search - // would fail. - // We used to do word split, searching for - // "term0 term1 term2" instead, which may have worse - // performance, but will succeed. - // We now adjust the phrase/near slack by comparing the term count - // and the last position + for (vector::iterator it = phrases.begin(); + it != phrases.end(); it++) { + LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n"); + // Anchoring modifiers + int amods = stringToMods(*it); + int terminc = amods != 0 ? 1 : 0; + mods |= amods; + // If there are multiple spans in this element, including + // at least one composite, we have to increase the slack + // else a phrase query including a span would fail. + // Ex: "term0@term1 term2" is onlyspans-split as: + // 0 term0@term1 0 12 + // 2 term2 13 18 + // The position of term2 is 2, not 1, so a phrase search + // would fail. + // We used to do word split, searching for + // "term0 term1 term2" instead, which may have worse + // performance, but will succeed. + // We now adjust the phrase/near slack by comparing the term count + // and the last position - // The term processing pipeline: + // The term processing pipeline: // split -> [unac/case ->] stops -> store terms - TermProcQ tpq; - TermProc *nxt = &tpq; + TermProcQ tpq; + TermProc *nxt = &tpq; TermProcStop tpstop(nxt, stops); nxt = &tpstop; //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon; //tpcommon.onlygrams(true); - TermProcPrep tpprep(nxt); - if (o_index_stripchars) - nxt = &tpprep; + TermProcPrep tpprep(nxt); + if (o_index_stripchars) + nxt = &tpprep; - TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | - TextSplit::TXTS_KEEPWILD), - nxt); - tpq.setTSQ(&splitter); - splitter.text_to_words(*it); + TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | + TextSplit::TXTS_KEEPWILD), + nxt); + tpq.setTSQ(&splitter); + splitter.text_to_words(*it); - slack += tpq.lastpos() - int(tpq.terms().size()) + 1; + slack += tpq.lastpos() - int(tpq.terms().size()) + 1; - LOGDEB0("strToXapianQ: termcount: " << tpq.terms().size() << "\n"); - switch (tpq.terms().size() + terminc) { - case 0: - continue;// ?? - case 1: { - int lmods = mods; - if (tpq.nostemexps().front()) - lmods |= SearchDataClause::SDCM_NOSTEMMING; + LOGDEB0("strToXapianQ: termcount: " << tpq.terms().size() << "\n"); + switch (tpq.terms().size() + terminc) { + case 0: + continue;// ?? + case 1: { + int lmods = mods; + if (tpq.nostemexps().front()) + lmods |= SearchDataClause::SDCM_NOSTEMMING; if (!m_exclude) { m_hldata.ugroups.push_back(tpq.terms()); } - processSimpleSpan(db, ermsg, tpq.terms().front(), - lmods, &pqueries); - } - break; - default: + processSimpleSpan(db, ermsg, tpq.terms().front(), + lmods, &pqueries); + } + break; + default: if (!m_exclude) { m_hldata.ugroups.push_back(tpq.terms()); } - processPhraseOrNear(db, ermsg, &tpq, mods, &pqueries, - useNear, slack); - } - if (m_curcl >= getMaxCl()) { - ermsg = maxXapClauseMsg; - if (!o_index_stripchars) - ermsg += maxXapClauseCaseDiacMsg; - break; - } - } + processPhraseOrNear(db, ermsg, &tpq, mods, &pqueries, + useNear, slack); + } + if (m_curcl >= getMaxCl()) { + ermsg = maxXapClauseMsg; + if (!o_index_stripchars) + ermsg += maxXapClauseCaseDiacMsg; + break; + } + } } catch (const Xapian::Error &e) { - ermsg = e.get_msg(); + ermsg = e.get_msg(); } catch (const string &s) { - ermsg = s; + ermsg = s; } catch (const char *s) { - ermsg = s; + ermsg = s; } catch (...) { - ermsg = "Caught unknown exception"; + ermsg = "Caught unknown exception"; } if (!ermsg.empty()) { - LOGERR("stringToXapianQueries: " << ermsg << "\n"); - return false; + LOGERR("stringToXapianQueries: " << ermsg << "\n"); + return false; } return true; } @@ -912,24 +912,24 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p) case SCLT_AND: op = Xapian::Query::OP_AND; break; case SCLT_OR: op = Xapian::Query::OP_OR; break; default: - LOGERR("SearchDataClauseSimple: bad m_tp " << m_tp << "\n"); + LOGERR("SearchDataClauseSimple: bad m_tp " << m_tp << "\n"); m_reason = "Internal error"; - return false; + return false; } vector pqueries; if (!processUserString(db, m_text, m_reason, &pqueries)) - return false; + return false; if (pqueries.empty()) { - LOGERR("SearchDataClauseSimple: resolved to null query\n"); + LOGERR("SearchDataClauseSimple: resolved to null query\n"); m_reason = string("Resolved to null query. Term too long ? : [" + m_text + string("]")); - return false; + return false; } *qp = Xapian::Query(op, pqueries.begin(), pqueries.end()); if (m_weight != 1.0) { - *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); + *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); } return true; } @@ -1002,14 +1002,14 @@ bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p) int maxexp = getSoftMaxExp(); if (maxexp == -1) - maxexp = getMaxExp(); + maxexp = getMaxExp(); vector names; db.filenameWildExp(m_text, names, maxexp); *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end()); if (m_weight != 1.0) { - *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); + *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); } return true; } @@ -1031,15 +1031,15 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p) #endif if (ltext.empty()) { - LOGERR("SearchDataClausePath: empty path??\n"); - m_reason = "Empty path ?"; - return false; + LOGERR("SearchDataClausePath: empty path??\n"); + m_reason = "Empty path ?"; + return false; } vector orqueries; if (path_isabsolute(ltext)) - orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix))); + orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix))); else ltext = path_tildexpand(ltext); @@ -1047,31 +1047,31 @@ bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p) stringToTokens(ltext, vpath, "/"); for (vector::const_iterator pit = vpath.begin(); - pit != vpath.end(); pit++){ + pit != vpath.end(); pit++){ - string sterm; - vector exp; - if (!expandTerm(db, m_reason, SDCM_PATHELT, - *pit, exp, sterm, wrap_prefix(pathelt_prefix))) { - return false; - } - LOGDEB0("SDataPath::toNative: exp size " << exp.size() << ". Exp: " << + string sterm; + vector exp; + if (!expandTerm(db, m_reason, SDCM_PATHELT, + *pit, exp, sterm, wrap_prefix(pathelt_prefix))) { + return false; + } + LOGDEB0("SDataPath::toNative: exp size " << exp.size() << ". Exp: " << stringsToString(exp) << "\n"); - if (exp.size() == 1) - orqueries.push_back(Xapian::Query(exp[0])); - else - orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, - exp.begin(), exp.end())); - m_curcl += exp.size(); - if (m_curcl >= getMaxCl()) - return false; + if (exp.size() == 1) + orqueries.push_back(Xapian::Query(exp[0])); + else + orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, + exp.begin(), exp.end())); + m_curcl += exp.size(); + if (m_curcl >= getMaxCl()) + return false; } *qp = Xapian::Query(Xapian::Query::OP_PHRASE, - orqueries.begin(), orqueries.end()); + orqueries.begin(), orqueries.end()); if (m_weight != 1.0) { - *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); + *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); } return true; } @@ -1091,22 +1091,22 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p) // terms etc. This will result into a single (complex) // Xapian::Query. if (m_text.find('\"') != string::npos) { - m_text = neutchars(m_text, "\""); + m_text = neutchars(m_text, "\""); } string s = cstr_dquote + m_text + cstr_dquote; bool useNear = (m_tp == SCLT_NEAR); if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear)) - return false; + return false; if (pqueries.empty()) { - LOGERR("SearchDataClauseDist: resolved to null query\n"); + LOGERR("SearchDataClauseDist: resolved to null query\n"); m_reason = string("Resolved to null query. Term too long ? : [" + m_text + string("]")); - return false; + return false; } *qp = *pqueries.begin(); if (m_weight != 1.0) { - *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); + *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight); } return true; }