implement dynamic field name to prefix translation, query side
This commit is contained in:
parent
fcf027b22f
commit
18b3573358
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.45 2007-06-08 12:31:54 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.46 2007-06-18 13:04:14 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -373,6 +373,15 @@ string RclConfig::getMimeHandlerDef(const std::string &mtype)
|
||||
return hs;
|
||||
}
|
||||
|
||||
string RclConfig::getFieldPrefix(const string& fld)
|
||||
{
|
||||
string hs;
|
||||
if (!mimeconf->get(fld, hs, "prefixes")) {
|
||||
LOGDEB(("getFieldPrefix: no prefix defined for '%s'\n", fld.c_str()));
|
||||
}
|
||||
return hs;
|
||||
}
|
||||
|
||||
string RclConfig::getMimeViewerDef(const string &mtype)
|
||||
{
|
||||
string hs;
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||
#define _RCLCONFIG_H_INCLUDED_
|
||||
/* @(#$Id: rclconfig.h,v 1.33 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rclconfig.h,v 1.34 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
@ -35,15 +35,21 @@ using std::pair;
|
||||
class RclConfig {
|
||||
public:
|
||||
|
||||
// Constructor: we normally look for a configuration file, except
|
||||
// if this was specified on the command line and passed through
|
||||
// argcnf
|
||||
RclConfig(const string *argcnf = 0);
|
||||
// Main programs should implement this, it avoids having to carry
|
||||
|
||||
// Main programs must implement this, it avoids having to carry
|
||||
// the configuration parameter everywhere. Places where several
|
||||
// instances might be needed will take care of themselves.
|
||||
// RclConfig instances might be needed will take care of
|
||||
// themselves.
|
||||
static RclConfig* getMainConfig();
|
||||
|
||||
bool ok() {return m_ok;}
|
||||
const string &getReason() {return m_reason;}
|
||||
/** Return the directory where this config is stored */
|
||||
|
||||
/** Return the directory where this configuration is stored */
|
||||
string getConfDir() {return m_confdir;}
|
||||
|
||||
/** Set current directory reference, and fetch automatic parameters. */
|
||||
@ -113,30 +119,32 @@ class RclConfig {
|
||||
*/
|
||||
bool getUncompressor(const string &mtpe, list<string>& cmd);
|
||||
|
||||
/** Use mimemap to compute mimetype */
|
||||
/** mimemap: compute mimetype */
|
||||
string getMimeTypeFromSuffix(const string &suffix);
|
||||
|
||||
/** Get appropriate suffix for mime type. This is inefficient */
|
||||
/** mimemap: get a list of all indexable mime types defined */
|
||||
list<string> getAllMimeTypes();
|
||||
/** mimemap: Get appropriate suffix for mime type. This is inefficient */
|
||||
string getSuffixFromMimeType(const string &mt);
|
||||
|
||||
/** Get input filter from mimeconf for mimetype */
|
||||
/** mimeconf: get input filter for mimetype */
|
||||
string getMimeHandlerDef(const string &mimetype);
|
||||
|
||||
/** Get external viewer exec string from mimeconf for mimetype */
|
||||
/** mimeconf: get icon name for mimetype */
|
||||
string getMimeIconName(const string &mtype, string *path = 0);
|
||||
|
||||
/** mimeconf: get list of file categories */
|
||||
bool getMimeCategories(list<string>&);
|
||||
/** mimeconf: get list of mime types for category */
|
||||
bool getMimeCatTypes(const string& cat, list<string>&);
|
||||
|
||||
/** mimeconf: get field prefix from field name */
|
||||
string getFieldPrefix(const string& fldname);
|
||||
|
||||
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
|
||||
string getMimeViewerDef(const string &mimetype);
|
||||
bool getMimeViewerDefs(vector<pair<string, string> >&);
|
||||
bool setMimeViewerDef(const string& mimetype, const string& cmd);
|
||||
|
||||
/** Get icon name from mimeconf for mimetype */
|
||||
string getMimeIconName(const string &mtype, string *path = 0);
|
||||
|
||||
/** Get list of file categories from mimeconf */
|
||||
bool getMimeCategories(list<string>&);
|
||||
/** Get list of mime types for category from mimeconf */
|
||||
bool getMimeCatTypes(const string& cat, list<string>&);
|
||||
|
||||
/** Get a list of all indexable mime types defined in mimemap */
|
||||
list<string> getAllMimeTypes();
|
||||
|
||||
/** Find exec file for external filter. cmd is the command name from the
|
||||
* command string returned by getMimeHandlerDef */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.113 2007-06-14 08:20:13 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.114 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -740,6 +740,43 @@ bool Db::isopen()
|
||||
return m_ndb->m_isopen;
|
||||
}
|
||||
|
||||
// Try to translate field specification into field prefix. We have a
|
||||
// default table used if translations are not in the config for some
|
||||
// reason (old config not updated ?). We use it only if the config
|
||||
// translation fails
|
||||
string Db::fieldToPrefix(const string& fldname)
|
||||
{
|
||||
// This is the default table
|
||||
static map<string, string> fldToPrefs;
|
||||
if (fldToPrefs.empty()) {
|
||||
fldToPrefs["title"] = "S";
|
||||
fldToPrefs["caption"] = "S";
|
||||
fldToPrefs["subject"] = "S";
|
||||
|
||||
fldToPrefs["author"] = "A";
|
||||
fldToPrefs["creator"] = "A";
|
||||
fldToPrefs["from"] = "A";
|
||||
|
||||
fldToPrefs["keyword"] = "K";
|
||||
fldToPrefs["tag"] = "K";
|
||||
fldToPrefs["keywords"] = "K";
|
||||
fldToPrefs["tags"] = "K";
|
||||
}
|
||||
|
||||
string fld(fldname), pfx;
|
||||
stringtolower(fld);
|
||||
RclConfig *config = RclConfig::getMainConfig();
|
||||
if (config)
|
||||
pfx = config->getFieldPrefix(fld);
|
||||
if (pfx.empty()) {
|
||||
map<string, string>::const_iterator it = fldToPrefs.find(fld);
|
||||
if (it != fldToPrefs.end())
|
||||
fld = it->second;
|
||||
}
|
||||
return pfx;
|
||||
}
|
||||
|
||||
|
||||
// The text splitter callback class which receives words from the
|
||||
// splitter and adds postings to the Xapian document.
|
||||
class mySplitterCB : public TextSplitCB {
|
||||
@ -882,7 +919,13 @@ bool Db::add(const string &fn, const Doc &idoc, const struct stat *stp)
|
||||
|
||||
TextSplit splitter(&splitData);
|
||||
|
||||
// /////// Split and index terms in document body and auxiliary fields
|
||||
// Index the title, document text, keywords and other textual
|
||||
// metadata. These are all indexed as text with positions, as we
|
||||
// may want to do phrase searches with them (this makes no sense
|
||||
// for keywords by the way, but wtf).
|
||||
/
|
||||
// The order has no importance, and we set a position gap of 100
|
||||
// between fields to avoid false proximity matches.
|
||||
string noacc;
|
||||
|
||||
// Split and index file name as document term(s)
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.50 2007-06-08 16:47:19 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.51 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
@ -94,6 +94,8 @@ class Db {
|
||||
/** Return list of configured stop words */
|
||||
const StopList& getStopList() const {return m_stops;}
|
||||
|
||||
/** Field name to prefix translation (ie: author -> 'A') */
|
||||
string fieldToPrefix(const string& fldname);
|
||||
|
||||
/* Update-related methods ******************************************/
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.14 2007-06-02 08:30:42 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.15 2007-06-18 13:04:15 dockes Exp $ (C) 2006 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -33,6 +33,7 @@ static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.14 2007-06-02 08:30:42 dockes
|
||||
#include "unacpp.h"
|
||||
#include "utf8iter.h"
|
||||
#include "stoplist.h"
|
||||
#include "rclconfig.h"
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using namespace std;
|
||||
@ -460,34 +461,6 @@ bool StringToXapianQ::processUserString(const string &iq,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to translate field specification into field prefix. This should
|
||||
// probably be an Rcl::Db method and much more configurable (store
|
||||
// prefix translation list in config ?)
|
||||
static string fieldToPrefix(const string& i_field)
|
||||
{
|
||||
static map<string, string> fldToPrefs;
|
||||
if (fldToPrefs.empty()) {
|
||||
fldToPrefs["title"] = "S";
|
||||
fldToPrefs["caption"] = "S";
|
||||
fldToPrefs["subject"] = "S";
|
||||
|
||||
fldToPrefs["author"] = "A";
|
||||
fldToPrefs["creator"] = "A";
|
||||
fldToPrefs["from"] = "A";
|
||||
|
||||
fldToPrefs["keyword"] = "K";
|
||||
fldToPrefs["tag"] = "K";
|
||||
fldToPrefs["keywords"] = "K";
|
||||
fldToPrefs["tags"] = "K";
|
||||
}
|
||||
string fld(i_field);
|
||||
stringtolower(fld);
|
||||
map<string, string>::const_iterator it = fldToPrefs.find(fld);
|
||||
if (it != fldToPrefs.end())
|
||||
return it->second;
|
||||
return "";
|
||||
}
|
||||
|
||||
static const string nullstemlang;
|
||||
|
||||
// Translate a simple OR, AND, or EXCL search clause.
|
||||
@ -514,7 +487,7 @@ bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
||||
}
|
||||
string prefix;
|
||||
if (!m_field.empty())
|
||||
prefix = fieldToPrefix(m_field);
|
||||
prefix = db.fieldToPrefix(m_field);
|
||||
list<Xapian::Query> pqueries;
|
||||
|
||||
// We normally boost the original term in the stem expansion list. Don't
|
||||
@ -568,7 +541,7 @@ bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
||||
|
||||
string prefix;
|
||||
if (!m_field.empty())
|
||||
prefix = fieldToPrefix(m_field);
|
||||
prefix = db.fieldToPrefix(m_field);
|
||||
|
||||
// We normally boost the original term in the stem expansion list. Don't
|
||||
// do it if there are wildcards anywhere, this would skew the results.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: mimeconf,v 1.28 2007-06-15 11:41:50 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: mimeconf,v 1.29 2007-06-18 13:04:15 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll : associations of mime types to processing filters.
|
||||
# There are different sections for decompression, 'interning' for indexing
|
||||
@ -109,23 +109,38 @@ texts = \
|
||||
text/rtf
|
||||
|
||||
spreadsheets = application/vnd.ms-excel \
|
||||
application/vnd.sun.xml.calc \
|
||||
application/vnd.sun.xml.calc.template
|
||||
application/vnd.sun.xml.calc \
|
||||
application/vnd.sun.xml.calc.template
|
||||
|
||||
presentations = application/vnd.ms-powerpoint \
|
||||
application/vnd.sun.xml.impress \
|
||||
application/vnd.sun.xml.impress.template
|
||||
application/vnd.sun.xml.impress \
|
||||
application/vnd.sun.xml.impress.template
|
||||
|
||||
media = audio/mpeg \
|
||||
image/jpeg \
|
||||
image/png \
|
||||
image/jpeg \
|
||||
image/png \
|
||||
|
||||
messages = message/rfc822 \
|
||||
text/x-gaim-log \
|
||||
text/x-mail \
|
||||
text/x-gaim-log \
|
||||
text/x-mail \
|
||||
|
||||
other = application/vnd.sun.xml.draw \
|
||||
application/vnd.sun.xml.draw.template \
|
||||
application/vnd.sun.xml.math \
|
||||
application/x-fsdirectory
|
||||
application/vnd.sun.xml.draw.template \
|
||||
application/vnd.sun.xml.math \
|
||||
application/x-fsdirectory
|
||||
|
||||
|
||||
[prefixes]
|
||||
|
||||
title = S
|
||||
caption = S
|
||||
subject = S
|
||||
|
||||
author = A
|
||||
creator = A
|
||||
from = A
|
||||
|
||||
keyword = K
|
||||
tag = K
|
||||
keywords = K
|
||||
tags = K
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user