implement dynamic field name to prefix translation, query side

This commit is contained in:
dockes 2007-06-18 13:04:15 +00:00
parent fcf027b22f
commit 18b3573358
6 changed files with 116 additions and 66 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.45 2007-06-08 12:31:54 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.46 2007-06-18 13:04:14 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -373,6 +373,15 @@ string RclConfig::getMimeHandlerDef(const std::string &mtype)
return hs; return hs;
} }
string RclConfig::getFieldPrefix(const string& fld)
{
string hs;
if (!mimeconf->get(fld, hs, "prefixes")) {
LOGDEB(("getFieldPrefix: no prefix defined for '%s'\n", fld.c_str()));
}
return hs;
}
string RclConfig::getMimeViewerDef(const string &mtype) string RclConfig::getMimeViewerDef(const string &mtype)
{ {
string hs; string hs;

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _RCLCONFIG_H_INCLUDED_ #ifndef _RCLCONFIG_H_INCLUDED_
#define _RCLCONFIG_H_INCLUDED_ #define _RCLCONFIG_H_INCLUDED_
/* @(#$Id: rclconfig.h,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rclconfig.h,v 1.34 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes */
#include <list> #include <list>
#include <string> #include <string>
@ -35,15 +35,21 @@ using std::pair;
class RclConfig { class RclConfig {
public: public:
// Constructor: we normally look for a configuration file, except
// if this was specified on the command line and passed through
// argcnf
RclConfig(const string *argcnf = 0); RclConfig(const string *argcnf = 0);
// Main programs should implement this, it avoids having to carry
// Main programs must implement this, it avoids having to carry
// the configuration parameter everywhere. Places where several // the configuration parameter everywhere. Places where several
// instances might be needed will take care of themselves. // RclConfig instances might be needed will take care of
// themselves.
static RclConfig* getMainConfig(); static RclConfig* getMainConfig();
bool ok() {return m_ok;} bool ok() {return m_ok;}
const string &getReason() {return m_reason;} const string &getReason() {return m_reason;}
/** Return the directory where this config is stored */
/** Return the directory where this configuration is stored */
string getConfDir() {return m_confdir;} string getConfDir() {return m_confdir;}
/** Set current directory reference, and fetch automatic parameters. */ /** Set current directory reference, and fetch automatic parameters. */
@ -113,30 +119,32 @@ class RclConfig {
*/ */
bool getUncompressor(const string &mtpe, list<string>& cmd); bool getUncompressor(const string &mtpe, list<string>& cmd);
/** Use mimemap to compute mimetype */ /** mimemap: compute mimetype */
string getMimeTypeFromSuffix(const string &suffix); string getMimeTypeFromSuffix(const string &suffix);
/** mimemap: get a list of all indexable mime types defined */
/** Get appropriate suffix for mime type. This is inefficient */ list<string> getAllMimeTypes();
/** mimemap: Get appropriate suffix for mime type. This is inefficient */
string getSuffixFromMimeType(const string &mt); string getSuffixFromMimeType(const string &mt);
/** Get input filter from mimeconf for mimetype */ /** mimeconf: get input filter for mimetype */
string getMimeHandlerDef(const string &mimetype); string getMimeHandlerDef(const string &mimetype);
/** Get external viewer exec string from mimeconf for mimetype */ /** mimeconf: get icon name for mimetype */
string getMimeIconName(const string &mtype, string *path = 0);
/** mimeconf: get list of file categories */
bool getMimeCategories(list<string>&);
/** mimeconf: get list of mime types for category */
bool getMimeCatTypes(const string& cat, list<string>&);
/** mimeconf: get field prefix from field name */
string getFieldPrefix(const string& fldname);
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
string getMimeViewerDef(const string &mimetype); string getMimeViewerDef(const string &mimetype);
bool getMimeViewerDefs(vector<pair<string, string> >&); bool getMimeViewerDefs(vector<pair<string, string> >&);
bool setMimeViewerDef(const string& mimetype, const string& cmd); bool setMimeViewerDef(const string& mimetype, const string& cmd);
/** Get icon name from mimeconf for mimetype */
string getMimeIconName(const string &mtype, string *path = 0);
/** Get list of file categories from mimeconf */
bool getMimeCategories(list<string>&);
/** Get list of mime types for category from mimeconf */
bool getMimeCatTypes(const string& cat, list<string>&);
/** Get a list of all indexable mime types defined in mimemap */
list<string> getAllMimeTypes();
/** Find exec file for external filter. cmd is the command name from the /** Find exec file for external filter. cmd is the command name from the
* command string returned by getMimeHandlerDef */ * command string returned by getMimeHandlerDef */

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.113 2007-06-14 08:20:13 dockes Exp $ (C) 2004 J.F.Dockes"; static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.114 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -740,6 +740,43 @@ bool Db::isopen()
return m_ndb->m_isopen; return m_ndb->m_isopen;
} }
// Try to translate field specification into field prefix. We have a
// default table used if translations are not in the config for some
// reason (old config not updated ?). We use it only if the config
// translation fails
string Db::fieldToPrefix(const string& fldname)
{
// This is the default table
static map<string, string> fldToPrefs;
if (fldToPrefs.empty()) {
fldToPrefs["title"] = "S";
fldToPrefs["caption"] = "S";
fldToPrefs["subject"] = "S";
fldToPrefs["author"] = "A";
fldToPrefs["creator"] = "A";
fldToPrefs["from"] = "A";
fldToPrefs["keyword"] = "K";
fldToPrefs["tag"] = "K";
fldToPrefs["keywords"] = "K";
fldToPrefs["tags"] = "K";
}
string fld(fldname), pfx;
stringtolower(fld);
RclConfig *config = RclConfig::getMainConfig();
if (config)
pfx = config->getFieldPrefix(fld);
if (pfx.empty()) {
map<string, string>::const_iterator it = fldToPrefs.find(fld);
if (it != fldToPrefs.end())
fld = it->second;
}
return pfx;
}
// The text splitter callback class which receives words from the // The text splitter callback class which receives words from the
// splitter and adds postings to the Xapian document. // splitter and adds postings to the Xapian document.
class mySplitterCB : public TextSplitCB { class mySplitterCB : public TextSplitCB {
@ -882,7 +919,13 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
TextSplit splitter(&splitData); TextSplit splitter(&splitData);
// /////// Split and index terms in document body and auxiliary fields // Index the title, document text, keywords and other textual
// metadata. These are all indexed as text with positions, as we
// may want to do phrase searches with them (this makes no sense
// for keywords by the way, but wtf).
/
// The order has no importance, and we set a position gap of 100
// between fields to avoid false proximity matches.
string noacc; string noacc;
// Split and index file name as document term(s) // Split and index file name as document term(s)

View File

@ -16,7 +16,7 @@
*/ */
#ifndef _DB_H_INCLUDED_ #ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_ #define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.50 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */ /* @(#$Id: rcldb.h,v 1.51 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string> #include <string>
#include <list> #include <list>
@ -94,6 +94,8 @@ class Db {
/** Return list of configured stop words */ /** Return list of configured stop words */
const StopList& getStopList() const {return m_stops;} const StopList& getStopList() const {return m_stops;}
/** Field name to prefix translation (ie: author -> 'A') */
string fieldToPrefix(const string& fldname);
/* Update-related methods ******************************************/ /* Update-related methods ******************************************/

View File

@ -1,5 +1,5 @@
#ifndef lint #ifndef lint
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.14 2007-06-02 08:30:42 dockes Exp $ (C) 2006 J.F.Dockes"; static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.15 2007-06-18 13:04:15 dockes Exp $ (C) 2006 J.F.Dockes";
#endif #endif
/* /*
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
@ -33,6 +33,7 @@ static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.14 2007-06-02 08:30:42 dockes
#include "unacpp.h" #include "unacpp.h"
#include "utf8iter.h" #include "utf8iter.h"
#include "stoplist.h" #include "stoplist.h"
#include "rclconfig.h"
#ifndef NO_NAMESPACES #ifndef NO_NAMESPACES
using namespace std; using namespace std;
@ -460,34 +461,6 @@ bool StringToXapianQ::processUserString(const string &iq,
return true; return true;
} }
// Try to translate field specification into field prefix. This should
// probably be an Rcl::Db method and much more configurable (store
// prefix translation list in config ?)
static string fieldToPrefix(const string& i_field)
{
static map<string, string> fldToPrefs;
if (fldToPrefs.empty()) {
fldToPrefs["title"] = "S";
fldToPrefs["caption"] = "S";
fldToPrefs["subject"] = "S";
fldToPrefs["author"] = "A";
fldToPrefs["creator"] = "A";
fldToPrefs["from"] = "A";
fldToPrefs["keyword"] = "K";
fldToPrefs["tag"] = "K";
fldToPrefs["keywords"] = "K";
fldToPrefs["tags"] = "K";
}
string fld(i_field);
stringtolower(fld);
map<string, string>::const_iterator it = fldToPrefs.find(fld);
if (it != fldToPrefs.end())
return it->second;
return "";
}
static const string nullstemlang; static const string nullstemlang;
// Translate a simple OR, AND, or EXCL search clause. // Translate a simple OR, AND, or EXCL search clause.
@ -514,7 +487,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
} }
string prefix; string prefix;
if (!m_field.empty()) if (!m_field.empty())
prefix = fieldToPrefix(m_field); prefix = db.fieldToPrefix(m_field);
list<Xapian::Query> pqueries; list<Xapian::Query> pqueries;
// We normally boost the original term in the stem expansion list. Don't // We normally boost the original term in the stem expansion list. Don't
@ -568,7 +541,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
string prefix; string prefix;
if (!m_field.empty()) if (!m_field.empty())
prefix = fieldToPrefix(m_field); prefix = db.fieldToPrefix(m_field);
// We normally boost the original term in the stem expansion list. Don't // We normally boost the original term in the stem expansion list. Don't
// do it if there are wildcards anywhere, this would skew the results. // do it if there are wildcards anywhere, this would skew the results.

View File

@ -1,4 +1,4 @@
# @(#$Id: mimeconf,v 1.28 2007-06-15 11:41:50 dockes Exp $ (C) 2004 J.F.Dockes # @(#$Id: mimeconf,v 1.29 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll : associations of mime types to processing filters. # Recoll : associations of mime types to processing filters.
# There are different sections for decompression, 'interning' for indexing # There are different sections for decompression, 'interning' for indexing
@ -109,23 +109,38 @@ texts = \
text/rtf text/rtf
spreadsheets = application/vnd.ms-excel \ spreadsheets = application/vnd.ms-excel \
application/vnd.sun.xml.calc \ application/vnd.sun.xml.calc \
application/vnd.sun.xml.calc.template application/vnd.sun.xml.calc.template
presentations = application/vnd.ms-powerpoint \ presentations = application/vnd.ms-powerpoint \
application/vnd.sun.xml.impress \ application/vnd.sun.xml.impress \
application/vnd.sun.xml.impress.template application/vnd.sun.xml.impress.template
media = audio/mpeg \ media = audio/mpeg \
image/jpeg \ image/jpeg \
image/png \ image/png \
messages = message/rfc822 \ messages = message/rfc822 \
text/x-gaim-log \ text/x-gaim-log \
text/x-mail \ text/x-mail \
other = application/vnd.sun.xml.draw \ other = application/vnd.sun.xml.draw \
application/vnd.sun.xml.draw.template \ application/vnd.sun.xml.draw.template \
application/vnd.sun.xml.math \ application/vnd.sun.xml.math \
application/x-fsdirectory application/x-fsdirectory
[prefixes]
title = S
caption = S
subject = S
author = A
creator = A
from = A
keyword = K
tag = K
keywords = K
tags = K