simplified field config a bit by moving some hard coded values from the c++ to the fields file

This commit is contained in:
Jean-Francois Dockes 2012-08-28 14:44:53 +02:00
parent 776800f47a
commit 2d6e11c0aa
2 changed files with 34 additions and 105 deletions

View File

@ -93,75 +93,12 @@ string version_string(){
// found in document)
static const string cstr_syntAbs("?!#@");
// Only ONE field name inside the index data record differs from the
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
// omega
// Static/Default table for field->prefix/weight translation.
// This is logically const after initialization. Can't use a
// static object to init this as the static std::string objects may
// not be ready.
//
// This map is searched if a match is not found in the dynamic
// "fields" configuration (cf: Db::fieldToTraits()), meaning that the
// entries can be overriden in the configuration, but not
// suppressed.
static map<string, FieldTraits> fldToTraits;
static PTMutexInit o_fldToTraits_mutex;
// A bogus fldToTraits key (bogus because not a real field) used to
// retrieve the prefix used for specific filename searches (unsplit
// filename, not "filename as 'filename:' field" searches)
static const string keySysFilenamePrefix("rclUnsplitFN");
// The prefix for regular "filename:" field searches.
static const string cstr_fnAsFieldPrefix("XSFN");
// The prefix for unsplit filename terms used with specific -f or
// "File Name" GUI entries. There is a compile option to use the same prefix
// for both.
// #define UNSPLIT_FN_PREFIX_SAME_AS_SPLIT
#if defined(UNSPLIT_FN_PREFIX_SAME_AS_SPLIT)
static const string cstr_fnUnsplitPrefix(cstr_fnAsFieldPrefix);
#else
static const string cstr_fnUnsplitPrefix("XSFS");
#endif
static void initFldToTraits()
{
PTMutexLocker locker(o_fldToTraits_mutex);
// As we perform non-locked testing of initialization, check again with
// the lock held
if (fldToTraits.size())
return;
// Can't remember why "abstract" is indexed without a prefix
// (result: it's indexed twice actually). Maybe I'll dare change
// this one day
fldToTraits[Doc::keyabs] = FieldTraits();
fldToTraits["ext"] = FieldTraits("XE");
fldToTraits[Doc::keyfn] = FieldTraits(cstr_fnAsFieldPrefix);
fldToTraits[keySysFilenamePrefix] = FieldTraits(cstr_fnUnsplitPrefix);
fldToTraits[cstr_caption] = FieldTraits("S");
fldToTraits[Doc::keytt] = FieldTraits("S");
fldToTraits["subject"] = FieldTraits("S");
fldToTraits[Doc::keyau] = FieldTraits("A");
fldToTraits["creator"] = FieldTraits("A");
fldToTraits["from"] = FieldTraits("A");
fldToTraits[Doc::keykw] = FieldTraits("K");
fldToTraits["keyword"] = FieldTraits("K");
fldToTraits["tag"] = FieldTraits("K");
fldToTraits["tags"] = FieldTraits("K");
fldToTraits["xapyear"] = FieldTraits("Y");
fldToTraits["xapyearmon"] = FieldTraits("M");
fldToTraits["xapdate"] = FieldTraits("D");
fldToTraits[Doc::keytp] = FieldTraits("T");
}
// Compute the unique term used to link documents to their origin.
// "Q" + external udi
@ -171,6 +108,7 @@ static inline string make_uniterm(const string& udi)
uniterm.append(udi);
return uniterm;
}
// Compute parent term used to link documents to their parent document (if any)
// "F" + parent external udi
static inline string make_parentterm(const string& udi)
@ -685,9 +623,6 @@ Db::Db(RclConfig *cfp)
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
m_maxFsOccupPc(0), m_mode(Db::DbRO)
{
if (!fldToTraits.size())
initFldToTraits();
m_ndb = new Native(this);
if (m_config) {
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
@ -976,22 +911,12 @@ bool Db::isopen()
return m_ndb->m_isopen;
}
// Try to translate field specification into field prefix. We have a
// default table used if translations are not in the config for some
// reason (old config not updated ?). We use it only if the config
// translation fails. Also we add in there fields which should be
// indexed with no prefix (ie: abstract)
// Try to translate field specification into field prefix.
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp)
{
if (m_config && m_config->getFieldTraits(fld, ftpp))
return true;
// No data in rclconfig? Check default values
map<string, FieldTraits>::const_iterator it = fldToTraits.find(fld);
if (it != fldToTraits.end()) {
*ftpp = &it->second;
return true;
}
*ftpp = 0;
return false;
}
@ -1310,9 +1235,10 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
// Mime type
newdocument.add_term("T" + doc.mimetype);
// Simple file name indexed unsplit for file name searches with a
// term prefix We also add a term for the filename extension if
// any.
// Simple file name indexed unsplit for specific "file name"
// searches. This is not the same as a filename: clause inside the
// query language.
// We also add a term for the filename extension if any.
string utf8fn;
if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
string fn;

View File

@ -8,14 +8,17 @@
# - the field names MUST be all lowercase alphabetic ascii here. They can
# be anycased in the documents.
[prefixes]
#####################################################
# This section defines what prefix the terms inside named fields will be
# indexed with (in addition to prefix-less indexing for general search)
# ALL prefixes MUST be all UPPERCASE. Extension prefixes begin with X
# ALL prefixes MUST be all UPPERCASE.
#
# The choice of field names is rather arbitrary. Use of any of the aliases
# defined in the following section will yield exactly the same results,
# (both for indexing and search).
# The field names should be the canonic ones, not the aliases defined in
# the following section. Don't change those which are predefined here,
# quite a few are hard-coded in the c++. But you can add more (for new
# fields emitted by filters).
#
# Fields can have two relevance boost factors defined, such as in:
# caption = S ; wdfinc=10
@ -27,22 +30,30 @@
# caption-based field query (ie: caption:mytitle or title:mytitle) at query
# time.
[prefixes]
# Native fields matching omega uses, which we index without an X first
# letter. Don't change these. Caption is used for 'title' to keep a last
# remnant of omega compatibility inside the data record.
# Also reserved/hardcoded: D(ate), M(onth), Y(ear),
# F(parentid), Q(uniqueid), T(mime type)
caption = S ; wdfinc = 10
# The following ones are probably hard-coded in the c code, can't change at
# all.
# Also reserved: F(parentid), Q(uniqueid)
title = S ; wdfinc = 10
author = A
keywords = K
abstract =
caption = S
title = S
subject = S
author = A
keywords= K
xapyear = Y
xapyearmon = M
xapdate = D
mtype = T
rclUnsplitFN = XSFS
filename = XSFN
ext = XE
# Extension examples. These are actually used by default by Recoll, you can
# add your own to search for fields produced by the filters and not handled
# by default.
# Some values are internally reserved by recoll:
# XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND,
# XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, XXPG
# Using XX was not a good idea.
#
# I hereby commit to not using XY for Recoll:
@ -50,6 +61,7 @@ keywords = K
# myfield = XYMYPREF
recipient = XTO
[stored]
############################
# Some fields are stored in the document data record inside the index and
# can be returned in result lists. There is no necessity that stored fields
@ -64,12 +76,12 @@ recipient = XTO
# "rclaptg" is used for viewer specialization (depending on local config)
# "rclbes" defines the backend type (ie normal fs, firefox cache). Should
# probably be hardcoded, don't remove it
[stored]
author=
rclaptg=
rclbes=
filename=
[aliases]
##########################
# This section defines field names aliases or synonyms. Any right hand side
# value will be turned into the lhs canonic name before further treatment
@ -80,8 +92,6 @@ filename=
#
# Filters should only add canonic names to the meta array when indexing,
# not aliases.
[aliases]
abstract = summary dc:summary description xesam:description
author = creator dc:creator xesam:author xesam:creator from
title = caption title dc:title subject
@ -97,18 +107,11 @@ mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
recipient = to xesam:recipient
url = dc:identifier xesam:url
#########################
# This section defines a hierarchy for field names. Searching for a lhs
# ancestor will be expanded to a search for itself and all rhs descendants
# This is not used for now
[specialisations]
author = from
[xattrtofields]
######################
# Section to define translations from extended file attribute names to
# field names. xattr use must be enabled at compile time for this to be
# used. Enter translations as "xattrname = fieldname". Case matters.
[xattrtofields]
########################