diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 0f6479d3..d5a96aaa 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -879,6 +879,21 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) } } + // Query aliases map + tps = m_fields->getNames("queryaliases"); + for (vector::const_iterator it = tps.begin(); + it != tps.end(); it++){ + string canonic = stringtolower(*it); // canonic name + string aliases; + m_fields->get(canonic, aliases, "queryaliases"); + vector l; + stringToStrings(aliases, l); + for (vector::const_iterator ait = l.begin(); + ait != l.end(); ait++) { + m_aliastoqcanon[stringtolower(*ait)] = canonic; + } + } + #if 0 for (map::const_iterator it = m_fldtotraits.begin(); it != m_fldtotraits.end(); it++) { @@ -910,10 +925,10 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) } // Return specifics for field name: -bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp) - const +bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp, + bool isquery) const { - string fld = fieldCanon(_fld); + string fld = isquery ? fieldQCanon(_fld) : fieldCanon(_fld); map::const_iterator pit = m_fldtotraits.find(fld); if (pit != m_fldtotraits.end()) { *ftpp = &pit->second; @@ -952,6 +967,18 @@ string RclConfig::fieldCanon(const string& f) const return fld; } +string RclConfig::fieldQCanon(const string& f) const +{ + string fld = stringtolower(f); + map::const_iterator it = m_aliastoqcanon.find(fld); + if (it != m_aliastoqcanon.end()) { + LOGDEB1(("RclConfig::fieldQCanon: [%s] -> [%s]\n", + f.c_str(), it->second.c_str())); + return it->second; + } + return fieldCanon(f); +} + vector RclConfig::getFieldSectNames(const string &sk, const char* patrn) const { @@ -1423,6 +1450,7 @@ void RclConfig::initFrom(const RclConfig& r) m_ptrans = new ConfSimple(*(r.m_ptrans)); m_fldtotraits = r.m_fldtotraits; m_aliastocanon = r.m_aliastocanon; + m_aliastoqcanon = r.m_aliastoqcanon; m_storedFields = r.m_storedFields; m_xattrtofld = r.m_xattrtofld; if (r.m_stopsuffixes) diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index a28b59d9..9254d5da 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -241,8 +241,10 @@ class RclConfig { /** mimeconf: get query lang frag for named filter */ bool getGuiFilter(const string& filtername, string& frag) const; - /** fields: get field prefix from field name */ - bool getFieldTraits(const string& fldname, const FieldTraits **) const; + /** fields: get field prefix from field name. Use additional query + aliases if isquery is set */ + bool getFieldTraits(const string& fldname, const FieldTraits **, + bool isquery = false) const; const set& getStoredFields() const {return m_storedFields;} @@ -251,6 +253,9 @@ class RclConfig { /** Get canonic name for possible alias */ string fieldCanon(const string& fld) const; + /** Get canonic name for possible alias, including query-only aliases */ + string fieldQCanon(const string& fld) const; + /** Get xattr name to field names translations */ const map& getXattrToField() const {return m_xattrtofld;} @@ -323,6 +328,7 @@ class RclConfig { ConfSimple *m_ptrans; // Paths translations map m_fldtotraits; // Field to field params map m_aliastocanon; + map m_aliastoqcanon; set m_storedFields; map m_xattrtofld; diff --git a/src/python/recoll/pyrecoll.cpp b/src/python/recoll/pyrecoll.cpp index 8b4d4286..2c16b74f 100644 --- a/src/python/recoll/pyrecoll.cpp +++ b/src/python/recoll/pyrecoll.cpp @@ -518,7 +518,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj) Py_RETURN_NONE; } - key = rclconfig->fieldCanon(string(name)); + key = rclconfig->fieldQCanon(string(name)); switch (key.at(0)) { case 'u': @@ -640,7 +640,7 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value) } char* uvalue = PyBytes_AsString(putf8); Py_DECREF(putf8); - string key = rclconfig->fieldCanon(string(name)); + string key = rclconfig->fieldQCanon(string(name)); LOGDEB0(("Doc_setattr: [%s] (%s) -> [%s]\n", key.c_str(), name, uvalue)); // We set the value in the meta array in all cases. Good idea ? or do it diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index a8bbc799..2b118fad 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1033,9 +1033,10 @@ bool Db::isopen() } // Try to translate field specification into field prefix. -bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp) +bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp, + bool isquery) { - if (m_config && m_config->getFieldTraits(fld, ftpp)) + if (m_config && m_config->getFieldTraits(fld, ftpp, isquery)) return true; *ftpp = 0; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index bc9b5350..eb10ae42 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -223,7 +223,8 @@ class Db { /* Return configured stop words */ const StopList& getStopList() const {return m_stops;} /* Field name to prefix translation (ie: author -> 'A') */ - bool fieldToTraits(const string& fldname, const FieldTraits **ftpp); + bool fieldToTraits(const string& fldname, const FieldTraits **ftpp, + bool isquery = false); /* Update-related methods ******************************************/ diff --git a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp index b39f4498..fdf811c7 100644 --- a/src/rcldb/rclquery.cpp +++ b/src/rcldb/rclquery.cpp @@ -160,7 +160,7 @@ void Query::setSortBy(const string& fld, bool ascending) { if (fld.empty()) { m_sortField.erase(); } else { - m_sortField = m_db->getConf()->fieldCanon(fld); + m_sortField = m_db->getConf()->fieldQCanon(fld); m_sortAscending = ascending; } LOGDEB0(("RclQuery::setSortBy: [%s] %s\n", m_sortField.c_str(), diff --git a/src/rcldb/rclterms.cpp b/src/rcldb/rclterms.cpp index e1c27c51..f9246131 100644 --- a/src/rcldb/rclterms.cpp +++ b/src/rcldb/rclterms.cpp @@ -328,7 +328,7 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root, string prefix; if (!field.empty()) { const FieldTraits *ftp = 0; - if (!fieldToTraits(field, &ftp) || ftp->pfx.empty()) { + if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) { LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n", field.c_str())); } else { diff --git a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp index 3b8ebcd9..e6807991 100644 --- a/src/rcldb/searchdata.cpp +++ b/src/rcldb/searchdata.cpp @@ -686,7 +686,7 @@ void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg, string prefix; const FieldTraits *ftp; - if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) { + if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) { prefix = wrap_prefix(ftp->pfx); } @@ -743,7 +743,7 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, string prefix; const FieldTraits *ftp; - if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) { + if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) { prefix = wrap_prefix(ftp->pfx); } diff --git a/src/sampleconf/fields b/src/sampleconf/fields index 47ea0d01..6780e136 100644 --- a/src/sampleconf/fields +++ b/src/sampleconf/fields @@ -109,6 +109,13 @@ mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format recipient = to xesam:recipient url = dc:identifier xesam:url +################## +# The queryaliases section defines aliases which are used exclusively at +# query time: there is no risk to pick up a random field from a document +# (e.g. an HTML meta field) and index it. +[queryaliases] +#filename = fn + [xattrtofields] ###################### # Processing for extended file attributes.