/* Copyright (C) 2006 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the * Free Software Foundation, Inc., * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include using std::string; using std::list; #include "rclconfig.h" #include "wasastringtoquery.h" #include "rcldb.h" #include "searchdata.h" #include "wasatorcl.h" #include "debuglog.h" #include "smallut.h" #include "rclconfig.h" #include "refcntr.h" #include "textsplit.h" static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config, const string& stemlang, WasaQuery *wasa, const string& autosuffs, string& reason) { if (wasa == 0) { reason = "NULL query"; return 0; } if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) { reason = "Top query neither AND nor OR ?"; LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n")); return 0; } Rcl::SearchData *sdata = new Rcl::SearchData(wasa->m_op == WasaQuery::OP_AND ? Rcl::SCLT_AND : Rcl::SCLT_OR, stemlang); LOGDEB2(("wasaQueryToRcl: %s chain\n", wasa->m_op == WasaQuery::OP_AND ? "AND" : "OR")); WasaQuery::subqlist_t::iterator it; Rcl::SearchDataClause *nclause; // Walk the list of clauses. Some pseudo-field types need special // processing, which results in setting data in the top struct // instead of adding a clause. We check for these first for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) { if (!stringicmp("mime", (*it)->m_fieldspec) || !stringicmp("format", (*it)->m_fieldspec)) { if ((*it)->m_op == WasaQuery::OP_LEAF) { if ((*it)->m_exclude) { sdata->remFiletype((*it)->m_value); } else { sdata->addFiletype((*it)->m_value); } } else { reason = "internal error: mime clause not leaf??"; return 0; } continue; } // Xesam uses "type", we also support "rclcat", for broad // categories like "audio", "presentation", etc. if (!stringicmp("rclcat", (*it)->m_fieldspec) || !stringicmp("type", (*it)->m_fieldspec)) { if ((*it)->m_op != WasaQuery::OP_LEAF) { reason = "internal error: rclcat/type clause not leaf??"; return 0; } vector mtypes; if (config && config->getMimeCatTypes((*it)->m_value, mtypes) && !mtypes.empty()) { for (vector::iterator mit = mtypes.begin(); mit != mtypes.end(); mit++) { if ((*it)->m_exclude) { sdata->remFiletype(*mit); } else { sdata->addFiletype(*mit); } } } else { reason = "Unknown rclcat/type value: no mime types found"; return 0; } continue; } // Handle "date" spec if (!stringicmp("date", (*it)->m_fieldspec)) { if ((*it)->m_op != WasaQuery::OP_LEAF) { reason = "Negative date filtering not supported"; return 0; } DateInterval di; if (!parsedateinterval((*it)->m_value, &di)) { LOGERR(("wasaQueryToRcl: bad date interval format\n")); reason = "Bad date interval format"; return 0; } LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n", di.y1,di.m1,di.d1, di.y2,di.m2,di.d2)); sdata->setDateSpan(&di); continue; } // Handle "size" spec if (!stringicmp("size", (*it)->m_fieldspec)) { if ((*it)->m_op != WasaQuery::OP_LEAF) { reason = "Negative size filtering not supported"; return 0; } char *cp; size_t size = strtoll((*it)->m_value.c_str(), &cp, 10); if (*cp != 0) { switch (*cp) { case 'k': case 'K': size *= 1E3;break; case 'm': case 'M': size *= 1E6;break; case 'g': case 'G': size *= 1E9;break; case 't': case 'T': size *= 1E12;break; default: reason = string("Bad multiplier suffix: ") + *cp; return 0; } } switch ((*it)->m_rel) { case WasaQuery::REL_EQUALS: sdata->setMaxSize(size); sdata->setMinSize(size); break; case WasaQuery::REL_LT: case WasaQuery::REL_LTE: sdata->setMaxSize(size); break; case WasaQuery::REL_GT: case WasaQuery::REL_GTE: sdata->setMinSize(size); break; default: reason = "Bad relation operator with size query. Use > < or ="; return 0; } continue; } // "Regular" processing follows: unsigned int mods = (unsigned int)(*it)->m_modifiers; LOGDEB0(("wasaQueryToRcl: clause modifiers 0x%x\n", mods)); nclause = 0; switch ((*it)->m_op) { case WasaQuery::OP_NULL: case WasaQuery::OP_AND: default: reason = "Found bad NULL or AND query type in list"; LOGERR(("wasaQueryToRcl: found bad NULL or AND q type in list\n")); continue; case WasaQuery::OP_LEAF: { LOGDEB0(("wasaQueryToRcl: leaf clause [%s:%s] slack %d excl %d\n", (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str(), (*it)->m_slack, (*it)->m_exclude)); // Change terms found in the "autosuffs" list into "ext" // field queries if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) { vector asfv; if (stringToStrings(autosuffs, asfv)) { if (find_if(asfv.begin(), asfv.end(), StringIcmpPred((*it)->m_value)) != asfv.end()) { (*it)->m_fieldspec = "ext"; (*it)->m_modifiers |= WasaQuery::WQM_NOSTEM; } } } if (!stringicmp("dir", (*it)->m_fieldspec)) { // dir filtering special case nclause = new Rcl::SearchDataClausePath((*it)->m_value, (*it)->m_exclude); } else { if ((*it)->m_exclude && wasa->m_op != WasaQuery::OP_AND) { LOGERR(("wasaQueryToRcl: excl clause inside OR list!\n")); continue; } // I'm not sure I understand the phrase/near detection // thereafter anymore, maybe it would be better to have an // explicit flag. Mods can only be set after a double // quote. if (TextSplit::hasVisibleWhite((*it)->m_value) || mods) { Rcl::SClType tp = (mods & WasaQuery::WQM_PROX) ? Rcl::SCLT_NEAR : Rcl::SCLT_PHRASE; nclause = new Rcl::SearchDataClauseDist(tp, (*it)->m_value, (*it)->m_slack, (*it)->m_fieldspec); } else { Rcl::SClType tp = (*it)->m_exclude ? Rcl::SCLT_OR: Rcl::SCLT_AND; nclause = new Rcl::SearchDataClauseSimple(tp, (*it)->m_value, (*it)->m_fieldspec); } nclause->setexclude((*it)->m_exclude); } if (nclause == 0) { reason = "Out of memory"; LOGERR(("wasaQueryToRcl: out of memory\n")); return 0; } } break; case WasaQuery::OP_OR: LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n", (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str())); // Create a subquery. Rcl::SearchData *sub = wasaQueryToRcl(config, stemlang, *it, autosuffs, reason); if (sub == 0) { continue; } nclause = new Rcl::SearchDataClauseSub(RefCntr(sub)); if (nclause == 0) { LOGERR(("wasaQueryToRcl: out of memory\n")); reason = "Out of memory"; return 0; } } if (mods & WasaQuery::WQM_NOSTEM) nclause->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING); if (mods & WasaQuery::WQM_DIACSENS) nclause->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS); if (mods & WasaQuery::WQM_CASESENS) nclause->addModifier(Rcl::SearchDataClause::SDCM_CASESENS); if ((*it)->m_weight != 1.0) nclause->setWeight((*it)->m_weight); sdata->addClause(nclause); } return sdata; } Rcl::SearchData *wasaStringToRcl(const RclConfig *config, const string& stemlang, const string &qs, string &reason, const string& autosuffs) { StringToWasaQuery parser; WasaQuery *wq = parser.stringToQuery(qs, reason); if (wq == 0) return 0; return wasaQueryToRcl(config, stemlang, wq, autosuffs, reason); }