From 2d6e11c0aaea0f32823f903e9d44d0e5edcfe97e Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Tue, 28 Aug 2012 14:44:53 +0200 Subject: [PATCH] simplified field config a bit by moving some hard coded values from the c++ to the fields file --- src/rcldb/rcldb.cpp | 86 +++---------------------------------------- src/sampleconf/fields | 53 +++++++++++++------------- 2 files changed, 34 insertions(+), 105 deletions(-) diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 8a00cb95..77a804c6 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -93,75 +93,12 @@ string version_string(){ // found in document) static const string cstr_syntAbs("?!#@"); -// Only ONE field name inside the index data record differs from the -// Rcl::Doc ones: caption<->title, for a remnant of compatibility with -// omega - -// Static/Default table for field->prefix/weight translation. -// This is logically const after initialization. Can't use a -// static object to init this as the static std::string objects may -// not be ready. -// -// This map is searched if a match is not found in the dynamic -// "fields" configuration (cf: Db::fieldToTraits()), meaning that the -// entries can be overriden in the configuration, but not -// suppressed. - -static map fldToTraits; -static PTMutexInit o_fldToTraits_mutex; // A bogus fldToTraits key (bogus because not a real field) used to // retrieve the prefix used for specific filename searches (unsplit // filename, not "filename as 'filename:' field" searches) static const string keySysFilenamePrefix("rclUnsplitFN"); -// The prefix for regular "filename:" field searches. -static const string cstr_fnAsFieldPrefix("XSFN"); -// The prefix for unsplit filename terms used with specific -f or -// "File Name" GUI entries. There is a compile option to use the same prefix -// for both. -// #define UNSPLIT_FN_PREFIX_SAME_AS_SPLIT -#if defined(UNSPLIT_FN_PREFIX_SAME_AS_SPLIT) -static const string cstr_fnUnsplitPrefix(cstr_fnAsFieldPrefix); -#else static const string cstr_fnUnsplitPrefix("XSFS"); -#endif - -static void initFldToTraits() -{ - PTMutexLocker locker(o_fldToTraits_mutex); - // As we perform non-locked testing of initialization, check again with - // the lock held - if (fldToTraits.size()) - return; - - // Can't remember why "abstract" is indexed without a prefix - // (result: it's indexed twice actually). Maybe I'll dare change - // this one day - fldToTraits[Doc::keyabs] = FieldTraits(); - - fldToTraits["ext"] = FieldTraits("XE"); - - fldToTraits[Doc::keyfn] = FieldTraits(cstr_fnAsFieldPrefix); - fldToTraits[keySysFilenamePrefix] = FieldTraits(cstr_fnUnsplitPrefix); - - fldToTraits[cstr_caption] = FieldTraits("S"); - fldToTraits[Doc::keytt] = FieldTraits("S"); - fldToTraits["subject"] = FieldTraits("S"); - - fldToTraits[Doc::keyau] = FieldTraits("A"); - fldToTraits["creator"] = FieldTraits("A"); - fldToTraits["from"] = FieldTraits("A"); - - fldToTraits[Doc::keykw] = FieldTraits("K"); - fldToTraits["keyword"] = FieldTraits("K"); - fldToTraits["tag"] = FieldTraits("K"); - fldToTraits["tags"] = FieldTraits("K"); - - fldToTraits["xapyear"] = FieldTraits("Y"); - fldToTraits["xapyearmon"] = FieldTraits("M"); - fldToTraits["xapdate"] = FieldTraits("D"); - fldToTraits[Doc::keytp] = FieldTraits("T"); -} // Compute the unique term used to link documents to their origin. // "Q" + external udi @@ -171,6 +108,7 @@ static inline string make_uniterm(const string& udi) uniterm.append(udi); return uniterm; } + // Compute parent term used to link documents to their parent document (if any) // "F" + parent external udi static inline string make_parentterm(const string& udi) @@ -685,9 +623,6 @@ Db::Db(RclConfig *cfp) m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1), m_maxFsOccupPc(0), m_mode(Db::DbRO) { - if (!fldToTraits.size()) - initFldToTraits(); - m_ndb = new Native(this); if (m_config) { m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc); @@ -976,22 +911,12 @@ bool Db::isopen() return m_ndb->m_isopen; } -// Try to translate field specification into field prefix. We have a -// default table used if translations are not in the config for some -// reason (old config not updated ?). We use it only if the config -// translation fails. Also we add in there fields which should be -// indexed with no prefix (ie: abstract) +// Try to translate field specification into field prefix. bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp) { if (m_config && m_config->getFieldTraits(fld, ftpp)) return true; - // No data in rclconfig? Check default values - map::const_iterator it = fldToTraits.find(fld); - if (it != fldToTraits.end()) { - *ftpp = &it->second; - return true; - } *ftpp = 0; return false; } @@ -1310,9 +1235,10 @@ bool Db::addOrUpdate(const string &udi, const string &parent_udi, // Mime type newdocument.add_term("T" + doc.mimetype); - // Simple file name indexed unsplit for file name searches with a - // term prefix We also add a term for the filename extension if - // any. + // Simple file name indexed unsplit for specific "file name" + // searches. This is not the same as a filename: clause inside the + // query language. + // We also add a term for the filename extension if any. string utf8fn; if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) { string fn; diff --git a/src/sampleconf/fields b/src/sampleconf/fields index 50ad8155..018bc219 100644 --- a/src/sampleconf/fields +++ b/src/sampleconf/fields @@ -8,14 +8,17 @@ # - the field names MUST be all lowercase alphabetic ascii here. They can # be anycased in the documents. +[prefixes] + ##################################################### # This section defines what prefix the terms inside named fields will be # indexed with (in addition to prefix-less indexing for general search) -# ALL prefixes MUST be all UPPERCASE. Extension prefixes begin with X +# ALL prefixes MUST be all UPPERCASE. # -# The choice of field names is rather arbitrary. Use of any of the aliases -# defined in the following section will yield exactly the same results, -# (both for indexing and search). +# The field names should be the canonic ones, not the aliases defined in +# the following section. Don't change those which are predefined here, +# quite a few are hard-coded in the c++. But you can add more (for new +# fields emitted by filters). # # Fields can have two relevance boost factors defined, such as in: # caption = S ; wdfinc=10 @@ -27,22 +30,30 @@ # caption-based field query (ie: caption:mytitle or title:mytitle) at query # time. -[prefixes] - -# Native fields matching omega uses, which we index without an X first -# letter. Don't change these. Caption is used for 'title' to keep a last -# remnant of omega compatibility inside the data record. -# Also reserved/hardcoded: D(ate), M(onth), Y(ear), -# F(parentid), Q(uniqueid), T(mime type) -caption = S ; wdfinc = 10 +# The following ones are probably hard-coded in the c code, can't change at +# all. +# Also reserved: F(parentid), Q(uniqueid) +title = S ; wdfinc = 10 author = A -keywords = K +abstract = +caption = S +title = S +subject = S +author = A +keywords= K +xapyear = Y +xapyearmon = M +xapdate = D +mtype = T +rclUnsplitFN = XSFS +filename = XSFN +ext = XE # Extension examples. These are actually used by default by Recoll, you can # add your own to search for fields produced by the filters and not handled # by default. # Some values are internally reserved by recoll: -# XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, +# XE (file ext), XP (for path elements), XSFN, XSFS, XXST, XXND, XXPG # Using XX was not a good idea. # # I hereby commit to not using XY for Recoll: @@ -50,6 +61,7 @@ keywords = K # myfield = XYMYPREF recipient = XTO +[stored] ############################ # Some fields are stored in the document data record inside the index and # can be returned in result lists. There is no necessity that stored fields @@ -64,12 +76,12 @@ recipient = XTO # "rclaptg" is used for viewer specialization (depending on local config) # "rclbes" defines the backend type (ie normal fs, firefox cache). Should # probably be hardcoded, don't remove it -[stored] author= rclaptg= rclbes= filename= +[aliases] ########################## # This section defines field names aliases or synonyms. Any right hand side # value will be turned into the lhs canonic name before further treatment @@ -80,8 +92,6 @@ filename= # # Filters should only add canonic names to the meta array when indexing, # not aliases. - -[aliases] abstract = summary dc:summary description xesam:description author = creator dc:creator xesam:author xesam:creator from title = caption title dc:title subject @@ -97,18 +107,11 @@ mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format recipient = to xesam:recipient url = dc:identifier xesam:url -######################### -# This section defines a hierarchy for field names. Searching for a lhs -# ancestor will be expanded to a search for itself and all rhs descendants -# This is not used for now -[specialisations] -author = from - +[xattrtofields] ###################### # Section to define translations from extended file attribute names to # field names. xattr use must be enabled at compile time for this to be # used. Enter translations as "xattrname = fieldname". Case matters. -[xattrtofields] ########################