Add new kind of aliases for field names, to be used only at query time

This commit is contained in:
Jean-Francois Dockes 2014-07-23 16:52:08 +02:00
parent 578511c3e2
commit 19da7020d6
9 changed files with 57 additions and 14 deletions

View File

@ -879,6 +879,21 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
} }
} }
// Query aliases map
tps = m_fields->getNames("queryaliases");
for (vector<string>::const_iterator it = tps.begin();
it != tps.end(); it++){
string canonic = stringtolower(*it); // canonic name
string aliases;
m_fields->get(canonic, aliases, "queryaliases");
vector<string> l;
stringToStrings(aliases, l);
for (vector<string>::const_iterator ait = l.begin();
ait != l.end(); ait++) {
m_aliastoqcanon[stringtolower(*ait)] = canonic;
}
}
#if 0 #if 0
for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin(); for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
it != m_fldtotraits.end(); it++) { it != m_fldtotraits.end(); it++) {
@ -910,10 +925,10 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
} }
// Return specifics for field name: // Return specifics for field name:
bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp) bool RclConfig::getFieldTraits(const string& _fld, const FieldTraits **ftpp,
const bool isquery) const
{ {
string fld = fieldCanon(_fld); string fld = isquery ? fieldQCanon(_fld) : fieldCanon(_fld);
map<string, FieldTraits>::const_iterator pit = m_fldtotraits.find(fld); map<string, FieldTraits>::const_iterator pit = m_fldtotraits.find(fld);
if (pit != m_fldtotraits.end()) { if (pit != m_fldtotraits.end()) {
*ftpp = &pit->second; *ftpp = &pit->second;
@ -952,6 +967,18 @@ string RclConfig::fieldCanon(const string& f) const
return fld; return fld;
} }
string RclConfig::fieldQCanon(const string& f) const
{
string fld = stringtolower(f);
map<string, string>::const_iterator it = m_aliastoqcanon.find(fld);
if (it != m_aliastoqcanon.end()) {
LOGDEB1(("RclConfig::fieldQCanon: [%s] -> [%s]\n",
f.c_str(), it->second.c_str()));
return it->second;
}
return fieldCanon(f);
}
vector<string> RclConfig::getFieldSectNames(const string &sk, const char* patrn) vector<string> RclConfig::getFieldSectNames(const string &sk, const char* patrn)
const const
{ {
@ -1423,6 +1450,7 @@ void RclConfig::initFrom(const RclConfig& r)
m_ptrans = new ConfSimple(*(r.m_ptrans)); m_ptrans = new ConfSimple(*(r.m_ptrans));
m_fldtotraits = r.m_fldtotraits; m_fldtotraits = r.m_fldtotraits;
m_aliastocanon = r.m_aliastocanon; m_aliastocanon = r.m_aliastocanon;
m_aliastoqcanon = r.m_aliastoqcanon;
m_storedFields = r.m_storedFields; m_storedFields = r.m_storedFields;
m_xattrtofld = r.m_xattrtofld; m_xattrtofld = r.m_xattrtofld;
if (r.m_stopsuffixes) if (r.m_stopsuffixes)

View File

@ -241,8 +241,10 @@ class RclConfig {
/** mimeconf: get query lang frag for named filter */ /** mimeconf: get query lang frag for named filter */
bool getGuiFilter(const string& filtername, string& frag) const; bool getGuiFilter(const string& filtername, string& frag) const;
/** fields: get field prefix from field name */ /** fields: get field prefix from field name. Use additional query
bool getFieldTraits(const string& fldname, const FieldTraits **) const; aliases if isquery is set */
bool getFieldTraits(const string& fldname, const FieldTraits **,
bool isquery = false) const;
const set<string>& getStoredFields() const {return m_storedFields;} const set<string>& getStoredFields() const {return m_storedFields;}
@ -251,6 +253,9 @@ class RclConfig {
/** Get canonic name for possible alias */ /** Get canonic name for possible alias */
string fieldCanon(const string& fld) const; string fieldCanon(const string& fld) const;
/** Get canonic name for possible alias, including query-only aliases */
string fieldQCanon(const string& fld) const;
/** Get xattr name to field names translations */ /** Get xattr name to field names translations */
const map<string, string>& getXattrToField() const {return m_xattrtofld;} const map<string, string>& getXattrToField() const {return m_xattrtofld;}
@ -323,6 +328,7 @@ class RclConfig {
ConfSimple *m_ptrans; // Paths translations ConfSimple *m_ptrans; // Paths translations
map<string, FieldTraits> m_fldtotraits; // Field to field params map<string, FieldTraits> m_fldtotraits; // Field to field params
map<string, string> m_aliastocanon; map<string, string> m_aliastocanon;
map<string, string> m_aliastoqcanon;
set<string> m_storedFields; set<string> m_storedFields;
map<string, string> m_xattrtofld; map<string, string> m_xattrtofld;

View File

@ -518,7 +518,7 @@ Doc_getattro(recoll_DocObject *self, PyObject *nameobj)
Py_RETURN_NONE; Py_RETURN_NONE;
} }
key = rclconfig->fieldCanon(string(name)); key = rclconfig->fieldQCanon(string(name));
switch (key.at(0)) { switch (key.at(0)) {
case 'u': case 'u':
@ -640,7 +640,7 @@ Doc_setattr(recoll_DocObject *self, char *name, PyObject *value)
} }
char* uvalue = PyBytes_AsString(putf8); char* uvalue = PyBytes_AsString(putf8);
Py_DECREF(putf8); Py_DECREF(putf8);
string key = rclconfig->fieldCanon(string(name)); string key = rclconfig->fieldQCanon(string(name));
LOGDEB0(("Doc_setattr: [%s] (%s) -> [%s]\n", key.c_str(), name, uvalue)); LOGDEB0(("Doc_setattr: [%s] (%s) -> [%s]\n", key.c_str(), name, uvalue));
// We set the value in the meta array in all cases. Good idea ? or do it // We set the value in the meta array in all cases. Good idea ? or do it

View File

@ -1033,9 +1033,10 @@ bool Db::isopen()
} }
// Try to translate field specification into field prefix. // Try to translate field specification into field prefix.
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp) bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp,
bool isquery)
{ {
if (m_config && m_config->getFieldTraits(fld, ftpp)) if (m_config && m_config->getFieldTraits(fld, ftpp, isquery))
return true; return true;
*ftpp = 0; *ftpp = 0;

View File

@ -223,7 +223,8 @@ class Db {
/* Return configured stop words */ /* Return configured stop words */
const StopList& getStopList() const {return m_stops;} const StopList& getStopList() const {return m_stops;}
/* Field name to prefix translation (ie: author -> 'A') */ /* Field name to prefix translation (ie: author -> 'A') */
bool fieldToTraits(const string& fldname, const FieldTraits **ftpp); bool fieldToTraits(const string& fldname, const FieldTraits **ftpp,
bool isquery = false);
/* Update-related methods ******************************************/ /* Update-related methods ******************************************/

View File

@ -160,7 +160,7 @@ void Query::setSortBy(const string& fld, bool ascending) {
if (fld.empty()) { if (fld.empty()) {
m_sortField.erase(); m_sortField.erase();
} else { } else {
m_sortField = m_db->getConf()->fieldCanon(fld); m_sortField = m_db->getConf()->fieldQCanon(fld);
m_sortAscending = ascending; m_sortAscending = ascending;
} }
LOGDEB0(("RclQuery::setSortBy: [%s] %s\n", m_sortField.c_str(), LOGDEB0(("RclQuery::setSortBy: [%s] %s\n", m_sortField.c_str(),

View File

@ -328,7 +328,7 @@ bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
string prefix; string prefix;
if (!field.empty()) { if (!field.empty()) {
const FieldTraits *ftp = 0; const FieldTraits *ftp = 0;
if (!fieldToTraits(field, &ftp) || ftp->pfx.empty()) { if (!fieldToTraits(field, &ftp, true) || ftp->pfx.empty()) {
LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n", LOGDEB(("Db::termMatch: field is not indexed (no prefix): [%s]\n",
field.c_str())); field.c_str()));
} else { } else {

View File

@ -686,7 +686,7 @@ void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
string prefix; string prefix;
const FieldTraits *ftp; const FieldTraits *ftp;
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) { if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
prefix = wrap_prefix(ftp->pfx); prefix = wrap_prefix(ftp->pfx);
} }
@ -743,7 +743,7 @@ void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg,
string prefix; string prefix;
const FieldTraits *ftp; const FieldTraits *ftp;
if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) { if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
prefix = wrap_prefix(ftp->pfx); prefix = wrap_prefix(ftp->pfx);
} }

View File

@ -109,6 +109,13 @@ mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
recipient = to xesam:recipient recipient = to xesam:recipient
url = dc:identifier xesam:url url = dc:identifier xesam:url
##################
# The queryaliases section defines aliases which are used exclusively at
# query time: there is no risk to pick up a random field from a document
# (e.g. an HTML meta field) and index it.
[queryaliases]
#filename = fn
[xattrtofields] [xattrtofields]
###################### ######################
# Processing for extended file attributes. # Processing for extended file attributes.