diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 2719ee1d..5e235c2e 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -590,10 +590,8 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc) } #endif - string ss; - if (m_fields->get("stored", ss, "stored")) { - list sl; - stringToStrings(ss, sl); + list sl = m_fields->getNames("stored"); + if (!sl.empty()) { for (list::const_iterator it = sl.begin(); it != sl.end(); it++) { string fld = fieldCanon(stringtolower(*it)); @@ -635,6 +633,8 @@ bool RclConfig::getFieldPrefix(const string& _fld, string &pfx) bool RclConfig::getFieldSpecialisations(const string& fld, list& children, bool top) { + if (m_fields == 0) + return false; string sclds; children.push_back(fld); if (m_fields->get(fld, sclds, "specialisations")) { @@ -682,6 +682,22 @@ string RclConfig::fieldCanon(const string& f) return fld; } +list RclConfig::getFieldSectNames(const string &sk, const char* patrn) +{ + if (m_fields == 0) + return list(); + return m_fields->getNames(sk, patrn); +} + +bool RclConfig::getFieldConfParam(const string &name, const string &sk, + string &value) +{ + if (m_fields == 0) + return false; + return m_fields->get(name, value, sk); +} + + string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag) { LOGDEB(("RclConfig::getMimeViewerDef: mtype %s apptag %s\n", diff --git a/src/common/rclconfig.h b/src/common/rclconfig.h index d5d79f3c..cac1087a 100644 --- a/src/common/rclconfig.h +++ b/src/common/rclconfig.h @@ -195,7 +195,12 @@ class RclConfig { string fieldCanon(const string& fld); /** Get xattr name to field names translations */ const map& getXattrToField() {return m_xattrtofld;} - + /** Get value of a parameter inside the "fields" file. Only some filters + use this (ie: mh_mail). The information specific to a given filter + is typically stored in a separate section(ie: [mail]) */ + list getFieldSectNames(const string &sk, const char* = 0); + bool getFieldConfParam(const string &name, const string &sk, string &value); + /** mimeview: get/set external viewer exec string(s) for mimetype(s) */ string getMimeViewerDef(const string &mimetype, const string& apptag); bool getMimeViewerDefs(vector >&); diff --git a/src/doc/user/usermanual.sgml b/src/doc/user/usermanual.sgml index 9ac87968..2bcef666 100644 --- a/src/doc/user/usermanual.sgml +++ b/src/doc/user/usermanual.sgml @@ -772,9 +772,9 @@ fvwm &RCL; has no configured way to preview a given file type (which was indexed by name only), or no configured external editor for the file type. This can sometimes be adjusted simply by tweaking - the + the mimemap and - + mimeview configuration files (the latter can be modified with the user preferences dialog). @@ -2114,7 +2114,7 @@ application/x-chm = execm rclchm - Field data processing configuration + Field data processing Fields are named pieces of information in or about documents, like title, @@ -2148,19 +2148,16 @@ application/x-chm = execm rclchm - A field can be either or both indexed and stored. + A field can be either or both indexed and stored. This and + other aspects of fields handling is defined inside the + fields configuration file. + + You can find more information in the + section about the + fields file, or in comments inside the + file. - A field becomes indexed by having a prefix defined in - the [prefixes] section of the - fields file. See the comments in there for - details - A field becomes stored by appearing in - the [stored] section of the - fields file. - - See the comments inside the fields - for more details. @@ -3393,7 +3390,89 @@ skippedPaths = ~/somedir/∗.txt - + + The fields file + + This file contains information about dynamic fields handling + in &RCL;. Some very basic fields have hard-wired behaviour, + and, mostly, you should not change the original data inside the + fields file. But you can create custom fields + fitting your data and handle them just like they were native + ones. + + The fields file has several sections, + which each define an aspect of fields processing. Quite often, + you'll have to modify several sections to obtain the desired + behaviour. + + We will only give a short description here, you should refer + to the comments inside the file for more detailed information. + + Field names should be lowercase alphabetic ASCII. + + + + + [prefixes] + A field becomes indexed (searchable) by having + a prefix defined in this section. + + + + [stored] + A field becomes stored (displayable inside + results) by having its name listed in this section (typically + with an empty value). + + + + [aliases] + This section defines lists of synonyms for the + canonical names used inside the [prefixes] + and [stored] sections + + + + filter-specific sections + Some filters may need specific + configuration for handling fields. Only the mail message filter + currently has such a section (named + [mail]). It allows indexing arbitrary mail + headers in addition to the ones indexed by default. Other such + sections may appear in the future. + + + + + + Here follows a small example of a personal + fields + file. This would extract a specific mail header and + use it as a searchable field, with data displayable inside result + lists. (Side note: as the mail filter does no decoding on the values, + only plain ascii headers can be indexed, and that only the + first occurrence will be used in the case of multiple occurrence + headers). + +[prefixes] +# Index mail_mytag contents (with the given prefix) +mailmytag = XMTAG + +[stored] +# Store mail_mytag inside the document data record +mailmytag = + +[mail] +# Extract the X-My-Tag mail header, and use it internally with the +# mail_mytag field name +x-my-tag = mailmytag + + + + + + + The mimemap file mimemap specifies the @@ -3429,7 +3508,7 @@ skippedPaths = ~/somedir/∗.txt - + The mimeconf file mimeconf specifies how the @@ -3447,7 +3526,7 @@ skippedPaths = ~/somedir/∗.txt recoll.conf). - + The mimeview file mimeview specifies which programs @@ -3484,10 +3563,10 @@ skippedPaths = ~/somedir/∗.txt (which is set to use xdg-open by default). - + Examples of configuration adjustments - + Adding an external viewer for an non-indexed type Imagine that you have some kind of file which does not @@ -3532,7 +3611,7 @@ application/x-blobapp = blobviewer %f - + Adding indexing support for a new file type Let us now imagine that the above diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index d18f145d..f5c5d8e8 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -55,6 +55,22 @@ static const string cstr_title = "title"; static const string cstr_msgid = "msgid"; static const string cstr_abstract = "abstract"; +MimeHandlerMail::MimeHandlerMail(const string &mt) + : RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1) +{ + + // Look for additional headers to be processed as per config: + list hdrnames = + RclConfig::getMainConfig()->getFieldSectNames("mail"); + if (hdrnames.empty()) + return; + for (list::const_iterator it = hdrnames.begin(); + it != hdrnames.end(); it++) { + (void)RclConfig::getMainConfig()-> + getFieldConfParam(*it, "mail", m_addProcdHdrs[*it]); + } +} + MimeHandlerMail::~MimeHandlerMail() { clear(); @@ -96,7 +112,6 @@ bool MimeHandlerMail::set_document_file(const string &fn) reason.c_str())); } - m_fd = open(fn.c_str(), 0); if (m_fd < 0) { LOGERR(("MimeHandlerMail::set_document_file: open(%s) errno %d\n", @@ -352,6 +367,21 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth) } text += string("Subject: ") + transcoded + string("\n"); } + + // Check for the presence of configured additional headers and possibly + // add them to the metadata (with appropriate field name). + if (!m_addProcdHdrs.empty()) { + for (map::const_iterator it = m_addProcdHdrs.begin(); + it != m_addProcdHdrs.end(); it++) { + if (!it->second.empty()) { + string hval; + if (doc->h.getFirstHeader(it->first, hi)) { + m_metaData[it->second] = hi.getValue(); + } + } + } + } + text += '\n'; m_startoftext = text.size(); LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n", diff --git a/src/internfile/mh_mail.h b/src/internfile/mh_mail.h index 7b88eb13..2405ee7a 100644 --- a/src/internfile/mh_mail.h +++ b/src/internfile/mh_mail.h @@ -20,7 +20,9 @@ #include #include +#include using std::vector; +using std::map; #include "mimehandler.h" @@ -38,9 +40,7 @@ class MHMailAttach; */ class MimeHandlerMail : public RecollFilter { public: - MimeHandlerMail(const string &mt) - : RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1) - {} + MimeHandlerMail(const string &mt); virtual ~MimeHandlerMail(); virtual bool set_document_file(const string& file_path); virtual bool set_document_string(const string& data); @@ -69,6 +69,8 @@ private: string::size_type m_startoftext; string m_subject; vector m_attachments; + // Additional headers to be process as per config + field name translation + map m_addProcdHdrs; }; class MHMailAttach { diff --git a/src/sampleconf/fields b/src/sampleconf/fields index 19b29700..b4567335 100644 --- a/src/sampleconf/fields +++ b/src/sampleconf/fields @@ -3,8 +3,8 @@ # author:Hemingway # # Important: -# - the field names MUST be all lowercase here. They can be anycased -# in the documents. +# - the field names MUST be all lowercase alphabetic ascii here. They can +# be anycased in the documents. ##################################################### # This section defines what prefix the terms inside named fields will be @@ -43,7 +43,9 @@ recipient = XTO # "author" used to be stored by default, now set here as optional # "rclaptg" is used for viewer specialization (depending on local config) [stored] -stored = author rclaptg rclbes +author= +rclaptg= +rclbes= ########################## # This section defines field names aliases or synonyms. Any right hand side @@ -84,3 +86,18 @@ author = from # field names. xattr use must be enabled at compile time for this to be # used. Enter translations as "xattrname = fieldname". Case matters. [xattrtofields] + + +######################## +# Sections reserved for specific filters follow +# + +########################## +# Mail filter section. You can specify mail headers to be indexed +# in addition to the standard ones: (To:, Cc:, From:, Subject:, Date, +# Message-Id), along with the field name to be used. For this to be useful, +# the field name should also be listed in the [prefixes] and possibly the +# [stored] sections. +# +# [mail] +# x-my-tag = mymailtag