From 2d6e11c0aaea0f32823f903e9d44d0e5edcfe97e Mon Sep 17 00:00:00 2001
From: Jean-Francois Dockes <jfd@recoll.org>
Date: Tue, 28 Aug 2012 14:44:53 +0200
Subject: [PATCH] simplified field config a bit by moving some hard coded
 values from the c++ to the fields file

---
 src/rcldb/rcldb.cpp   | 86 +++----------------------------------------
 src/sampleconf/fields | 53 +++++++++++++-------------
 2 files changed, 34 insertions(+), 105 deletions(-)

diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
index 8a00cb95..77a804c6 100644
--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -93,75 +93,12 @@ string version_string(){
 // found in document)
 static const string cstr_syntAbs("?!#@");
 
-// Only ONE field name inside the index data record differs from the
-// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
-// omega
-
-// Static/Default table for field->prefix/weight translation. 
-// This is logically const after initialization. Can't use a
-// static object to init this as the static std::string objects may
-// not be ready.
-//
-// This map is searched if a match is not found in the dynamic
-// "fields" configuration (cf: Db::fieldToTraits()), meaning that the
-// entries can be overriden in the configuration, but not
-// suppressed. 
-
-static map<string, FieldTraits> fldToTraits;
-static PTMutexInit o_fldToTraits_mutex;
 
 // A bogus fldToTraits key (bogus because not a real field) used to
 // retrieve the prefix used for specific filename searches (unsplit
 // filename, not "filename as 'filename:' field" searches)
 static const string keySysFilenamePrefix("rclUnsplitFN");
-// The prefix for regular "filename:" field searches.
-static const string cstr_fnAsFieldPrefix("XSFN");
-// The prefix for unsplit filename terms used with specific -f or
-// "File Name" GUI entries. There is a compile option to use the same prefix 
-// for both.
-// #define UNSPLIT_FN_PREFIX_SAME_AS_SPLIT
-#if defined(UNSPLIT_FN_PREFIX_SAME_AS_SPLIT)
-static const string cstr_fnUnsplitPrefix(cstr_fnAsFieldPrefix);
-#else
 static const string cstr_fnUnsplitPrefix("XSFS");
-#endif
-
-static void initFldToTraits() 
-{
-    PTMutexLocker locker(o_fldToTraits_mutex);
-    // As we perform non-locked testing of initialization, check again with
-    // the lock held
-    if (fldToTraits.size())
-	return;
-
-    // Can't remember why "abstract" is indexed without a prefix
-    // (result: it's indexed twice actually). Maybe I'll dare change
-    // this one day
-    fldToTraits[Doc::keyabs] = FieldTraits();
-
-    fldToTraits["ext"] = FieldTraits("XE");
-
-    fldToTraits[Doc::keyfn] = FieldTraits(cstr_fnAsFieldPrefix);
-    fldToTraits[keySysFilenamePrefix] = FieldTraits(cstr_fnUnsplitPrefix);
-
-    fldToTraits[cstr_caption] = FieldTraits("S");
-    fldToTraits[Doc::keytt] = FieldTraits("S");
-    fldToTraits["subject"] = FieldTraits("S");
-
-    fldToTraits[Doc::keyau] = FieldTraits("A");
-    fldToTraits["creator"] = FieldTraits("A");
-    fldToTraits["from"] = FieldTraits("A");
-
-    fldToTraits[Doc::keykw] = FieldTraits("K");
-    fldToTraits["keyword"] = FieldTraits("K");
-    fldToTraits["tag"] = FieldTraits("K");
-    fldToTraits["tags"] = FieldTraits("K");
-
-    fldToTraits["xapyear"] = FieldTraits("Y");
-    fldToTraits["xapyearmon"] = FieldTraits("M");
-    fldToTraits["xapdate"] = FieldTraits("D");
-    fldToTraits[Doc::keytp] = FieldTraits("T");
-}
 
 // Compute the unique term used to link documents to their origin. 
 // "Q" + external udi
@@ -171,6 +108,7 @@ static inline string make_uniterm(const string& udi)
     uniterm.append(udi);
     return uniterm;
 }
+
 // Compute parent term used to link documents to their parent document (if any)
 // "F" + parent external udi
 static inline string make_parentterm(const string& udi)
@@ -685,9 +623,6 @@ Db::Db(RclConfig *cfp)
       m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
       m_maxFsOccupPc(0), m_mode(Db::DbRO)
 {
-    if (!fldToTraits.size())
-	initFldToTraits();
-
     m_ndb = new Native(this);
     if (m_config) {
 	m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
@@ -976,22 +911,12 @@ bool Db::isopen()
     return m_ndb->m_isopen;
 }
 
-// Try to translate field specification into field prefix.  We have a
-// default table used if translations are not in the config for some
-// reason (old config not updated ?). We use it only if the config
-// translation fails. Also we add in there fields which should be
-// indexed with no prefix (ie: abstract)
+// Try to translate field specification into field prefix. 
 bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp)
 {
     if (m_config && m_config->getFieldTraits(fld, ftpp))
 	return true;
 
-    // No data in rclconfig? Check default values
-    map<string, FieldTraits>::const_iterator it = fldToTraits.find(fld);
-    if (it != fldToTraits.end()) {
-	*ftpp = &it->second;
-	return true;
-    }
     *ftpp = 0;
     return false;
 }
@@ -1310,9 +1235,10 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi,
     // Mime type
     newdocument.add_term("T" + doc.mimetype);
 
-    // Simple file name indexed unsplit for file name searches with a
-    // term prefix We also add a term for the filename extension if
-    // any.
+    // Simple file name indexed unsplit for specific "file name"
+    // searches. This is not the same as a filename: clause inside the
+    // query language.
+    // We also add a term for the filename extension if any.
     string utf8fn;
     if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
 	string fn;
diff --git a/src/sampleconf/fields b/src/sampleconf/fields
index 50ad8155..018bc219 100644
--- a/src/sampleconf/fields
+++ b/src/sampleconf/fields
@@ -8,14 +8,17 @@
 #   - the field names MUST be all lowercase alphabetic ascii here. They can
 #     be anycased in the documents.
 
+[prefixes]
+
 #####################################################
 # This section defines what prefix the terms inside named fields will be
 # indexed with (in addition to prefix-less indexing for general search)
-# ALL prefixes MUST be all UPPERCASE. Extension prefixes begin with X 
+# ALL prefixes MUST be all UPPERCASE. 
 # 
-# The choice of field names is rather arbitrary. Use of any of the aliases
-# defined in the following section will yield exactly the same results,
-# (both for indexing and search).
+# The field names should be the canonic ones, not the aliases defined in
+# the following section. Don't change those which are predefined here, 
+# quite a few are hard-coded in the c++. But you can add more (for new
+# fields emitted by filters).
 #
 # Fields can have two relevance boost factors defined, such as in:
 # caption = S ; wdfinc=10
@@ -27,22 +30,30 @@
 # caption-based field query (ie: caption:mytitle or title:mytitle) at query
 # time.
 
-[prefixes]
-
-# Native fields matching omega uses, which we index without an X first
-# letter. Don't change these. Caption is used for 'title' to keep a last
-# remnant of omega compatibility inside the data record. 
-# Also reserved/hardcoded: D(ate), M(onth), Y(ear), 
-#           F(parentid), Q(uniqueid), T(mime type)
-caption = S ; wdfinc = 10
+# The following ones are probably hard-coded in the c code, can't change at
+# all.
+# Also reserved: F(parentid), Q(uniqueid)
+title = S ; wdfinc = 10
 author = A
-keywords = K
+abstract =
+caption = S
+title = S
+subject = S
+author = A
+keywords= K
+xapyear = Y
+xapyearmon = M
+xapdate = D
+mtype = T
+rclUnsplitFN = XSFS
+filename = XSFN
+ext = XE
 
 # Extension examples. These are actually used by default by Recoll, you can
 # add your own to search for fields produced by the filters and not handled
 # by default. 
 # Some values are internally reserved by recoll: 
-#   XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, 
+#   XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, XXPG
 # Using XX was not a good idea. 
 #
 # I hereby commit to not using XY for Recoll:
@@ -50,6 +61,7 @@ keywords = K
 # myfield = XYMYPREF
 recipient = XTO
 
+[stored]
 ############################
 # Some fields are stored in the document data record inside the index and
 # can be returned in result lists. There is no necessity that stored fields
@@ -64,12 +76,12 @@ recipient = XTO
 # "rclaptg" is used for viewer specialization (depending on local config)
 # "rclbes" defines the backend type (ie normal fs, firefox cache). Should
 #   probably be hardcoded, don't remove it
-[stored]
 author=
 rclaptg=
 rclbes=
 filename=
 
+[aliases]
 ##########################
 # This section defines field names aliases or synonyms. Any right hand side
 # value will be turned into the lhs canonic name before further treatment
@@ -80,8 +92,6 @@ filename=
 #
 # Filters should only add canonic names to the meta array when indexing,
 # not aliases.
-
-[aliases]
 abstract = summary dc:summary description xesam:description
 author = creator dc:creator xesam:author xesam:creator from
 title = caption title dc:title subject
@@ -97,18 +107,11 @@ mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
 recipient = to xesam:recipient
 url = dc:identifier xesam:url
 
-#########################
-# This section defines a hierarchy for field names. Searching for a lhs
-# ancestor will be expanded to a search for itself and all rhs descendants
-# This is not used for now
-[specialisations]
-author = from
-
+[xattrtofields]
 ######################
 # Section to define translations from extended file attribute names to
 # field names. xattr use must be enabled at compile time for this to be
 # used. Enter translations as "xattrname = fieldname". Case matters.
-[xattrtofields]
 
 
 ########################