added the possibility to extract arbitrary mail headers and use them as document fields. This forced an incompatible change in the format of the [stored] section inside the "fields" config file

This commit is contained in:
Jean-Francois Dockes 2010-07-06 17:16:36 +02:00
parent 1b8ce2fc72
commit e6d5f72886
6 changed files with 181 additions and 32 deletions

View File

@ -590,10 +590,8 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
}
#endif
string ss;
if (m_fields->get("stored", ss, "stored")) {
list<string> sl;
stringToStrings(ss, sl);
list<string> sl = m_fields->getNames("stored");
if (!sl.empty()) {
for (list<string>::const_iterator it = sl.begin();
it != sl.end(); it++) {
string fld = fieldCanon(stringtolower(*it));
@ -635,6 +633,8 @@ bool RclConfig::getFieldPrefix(const string& _fld, string &pfx)
bool RclConfig::getFieldSpecialisations(const string& fld,
list<string>& children, bool top)
{
if (m_fields == 0)
return false;
string sclds;
children.push_back(fld);
if (m_fields->get(fld, sclds, "specialisations")) {
@ -682,6 +682,22 @@ string RclConfig::fieldCanon(const string& f)
return fld;
}
list<string> RclConfig::getFieldSectNames(const string &sk, const char* patrn)
{
if (m_fields == 0)
return list<string>();
return m_fields->getNames(sk, patrn);
}
bool RclConfig::getFieldConfParam(const string &name, const string &sk,
string &value)
{
if (m_fields == 0)
return false;
return m_fields->get(name, value, sk);
}
string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag)
{
LOGDEB(("RclConfig::getMimeViewerDef: mtype %s apptag %s\n",

View File

@ -195,7 +195,12 @@ class RclConfig {
string fieldCanon(const string& fld);
/** Get xattr name to field names translations */
const map<string, string>& getXattrToField() {return m_xattrtofld;}
/** Get value of a parameter inside the "fields" file. Only some filters
use this (ie: mh_mail). The information specific to a given filter
is typically stored in a separate section(ie: [mail]) */
list<string> getFieldSectNames(const string &sk, const char* = 0);
bool getFieldConfParam(const string &name, const string &sk, string &value);
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
string getMimeViewerDef(const string &mimetype, const string& apptag);
bool getMimeViewerDefs(vector<pair<string, string> >&);

View File

@ -772,9 +772,9 @@ fvwm
&RCL; has no configured way to preview a given file type (which
was indexed by name only), or no configured external editor for
the file type. This can sometimes be adjusted simply by tweaking
the <link linkend="rclinstall.config.mimemap">
the <link linkend="rcl.install.config.mimemap">
<filename>mimemap</filename></link> and
<link linkend="rclinstall.config.mimeview">
<link linkend="rcl.install.config.mimeview">
<filename>mimeview</filename></link> configuration files (the latter
can be modified with the user preferences dialog).</para>
@ -2114,7 +2114,7 @@ application/x-chm = execm rclchm
</sect1>
<sect1 id="rcl.program.fields">
<title>Field data processing configuration</title>
<title>Field data processing</title>
<para><literal>Fields</literal> are named pieces of information
in or about documents, like <literal>title</literal>,
@ -2148,19 +2148,16 @@ application/x-chm = execm rclchm
</itemizedlist>
<para>A field can be either or both indexed and stored.</para>
<para>A field can be either or both indexed and stored. This and
other aspects of fields handling is defined inside the
<filename>fields</filename> configuration file.</para>
<para>You can find more information in the
<link linkend="rcl.install.config.fields">section about the
<filename>fields</filename> file</link>, or in comments inside the
file.</para>
<para>A field becomes indexed by having a prefix defined in
the <literal>[prefixes]</literal> section of the
<filename>fields</filename> file. See the comments in there for
details</para>
<para>A field becomes stored by appearing in
the <literal>[stored]</literal> section of the
<filename>fields</filename> file.</para>
<para>See the comments inside the <filename>fields</filename>
for more details.</para>
</sect1>
@ -3393,7 +3390,89 @@ skippedPaths = ~/somedir/&lowast;.txt
</sect2>
<sect2 id="rclinstall.config.mimemap">
<sect2 id="rcl.install.config.fields">
<title>The fields file</title>
<para>This file contains information about dynamic fields handling
in &RCL;. Some very basic fields have hard-wired behaviour,
and, mostly, you should not change the original data inside the
<filename>fields</filename> file. But you can create custom fields
fitting your data and handle them just like they were native
ones.</para>
<para>The <filename>fields</filename> file has several sections,
which each define an aspect of fields processing. Quite often,
you'll have to modify several sections to obtain the desired
behaviour.</para>
<para>We will only give a short description here, you should refer
to the comments inside the file for more detailed information.</para>
<para>Field names should be lowercase alphabetic ASCII.</para>
<variablelist>
<varlistentry>
<term>[prefixes]</term>
<listitem><para>A field becomes indexed (searchable) by having
a prefix defined in this section.
</listitem>
</varlistentry>
<varlistentry>
<term>[stored]</term>
<listitem><para>A field becomes stored (displayable inside
results) by having its name listed in this section (typically
with an empty value).
</listitem>
</varlistentry>
<varlistentry>
<term>[aliases]</term>
<listitem><para>This section defines lists of synonyms for the
canonical names used inside the <literal>[prefixes]</literal>
and <literal>[stored]</literal> sections</para>
</listitem>
</varlistentry>
<varlistentry>
<term>filter-specific sections</term>
<listitem><para>Some filters may need specific
configuration for handling fields. Only the mail message filter
currently has such a section (named
<literal>[mail]</literal>). It allows indexing arbitrary mail
headers in addition to the ones indexed by default. Other such
sections may appear in the future.</para>
</listitem>
</varlistentry>
</variablelist>
<para>Here follows a small example of a personal
<filename>fields</filename>
file. This would extract a specific mail header and
use it as a searchable field, with data displayable inside result
lists. (Side note: as the mail filter does no decoding on the values,
only plain ascii headers can be indexed, and that only the
first occurrence will be used in the case of multiple occurrence
headers).
<programlisting>[prefixes]
# Index mail_mytag contents (with the given prefix)
mailmytag = XMTAG
[stored]
# Store mail_mytag inside the document data record
mailmytag =
[mail]
# Extract the X-My-Tag mail header, and use it internally with the
# mail_mytag field name
x-my-tag = mailmytag
</programlisting>
</para>
</sect2>
<sect2 id="rcl.install.config.mimemap">
<title>The mimemap file</title>
<para><filename>mimemap</filename> specifies the
@ -3429,7 +3508,7 @@ skippedPaths = ~/somedir/&lowast;.txt
</sect2>
<sect2 id="rclinstall.config.mimeconf">
<sect2 id="rcl.install.config.mimeconf">
<title>The mimeconf file</title>
<para><filename>mimeconf</filename> specifies how the
@ -3447,7 +3526,7 @@ skippedPaths = ~/somedir/&lowast;.txt
<filename>recoll.conf</filename>).</para>
</sect2>
<sect2 id="rclinstall.config.mimeview">
<sect2 id="rcl.install.config.mimeview">
<title>The mimeview file</title>
<para><filename>mimeview</filename> specifies which programs
@ -3484,10 +3563,10 @@ skippedPaths = ~/somedir/&lowast;.txt
(which is set to use <command>xdg-open</command> by default).</para>
</sect2>
<sect2 id="rclinstall.config.examples">
<sect2 id="rcl.install.config.examples">
<title>Examples of configuration adjustments</title>
<sect3 id="rclinstall.config.examples.addview">
<sect3 id="rcl.install.config.examples.addview">
<title>Adding an external viewer for an non-indexed type</title>
<para>Imagine that you have some kind of file which does not
@ -3532,7 +3611,7 @@ application/x-blobapp = blobviewer %f
</sect3>
<sect3 id="rclinstall.config.examples.addindex">
<sect3 id="rcl.install.config.examples.addindex">
<title>Adding indexing support for a new file type</title>
<para>Let us now imagine that the above

View File

@ -55,6 +55,22 @@ static const string cstr_title = "title";
static const string cstr_msgid = "msgid";
static const string cstr_abstract = "abstract";
MimeHandlerMail::MimeHandlerMail(const string &mt)
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
{
// Look for additional headers to be processed as per config:
list<string> hdrnames =
RclConfig::getMainConfig()->getFieldSectNames("mail");
if (hdrnames.empty())
return;
for (list<string>::const_iterator it = hdrnames.begin();
it != hdrnames.end(); it++) {
(void)RclConfig::getMainConfig()->
getFieldConfParam(*it, "mail", m_addProcdHdrs[*it]);
}
}
MimeHandlerMail::~MimeHandlerMail()
{
clear();
@ -96,7 +112,6 @@ bool MimeHandlerMail::set_document_file(const string &fn)
reason.c_str()));
}
m_fd = open(fn.c_str(), 0);
if (m_fd < 0) {
LOGERR(("MimeHandlerMail::set_document_file: open(%s) errno %d\n",
@ -352,6 +367,21 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
}
text += string("Subject: ") + transcoded + string("\n");
}
// Check for the presence of configured additional headers and possibly
// add them to the metadata (with appropriate field name).
if (!m_addProcdHdrs.empty()) {
for (map<string, string>::const_iterator it = m_addProcdHdrs.begin();
it != m_addProcdHdrs.end(); it++) {
if (!it->second.empty()) {
string hval;
if (doc->h.getFirstHeader(it->first, hi)) {
m_metaData[it->second] = hi.getValue();
}
}
}
}
text += '\n';
m_startoftext = text.size();
LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",

View File

@ -20,7 +20,9 @@
#include <sstream>
#include <vector>
#include <map>
using std::vector;
using std::map;
#include "mimehandler.h"
@ -38,9 +40,7 @@ class MHMailAttach;
*/
class MimeHandlerMail : public RecollFilter {
public:
MimeHandlerMail(const string &mt)
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
{}
MimeHandlerMail(const string &mt);
virtual ~MimeHandlerMail();
virtual bool set_document_file(const string& file_path);
virtual bool set_document_string(const string& data);
@ -69,6 +69,8 @@ private:
string::size_type m_startoftext;
string m_subject;
vector<MHMailAttach *> m_attachments;
// Additional headers to be process as per config + field name translation
map<string,string> m_addProcdHdrs;
};
class MHMailAttach {

View File

@ -3,8 +3,8 @@
# author:Hemingway
#
# Important:
# - the field names MUST be all lowercase here. They can be anycased
# in the documents.
# - the field names MUST be all lowercase alphabetic ascii here. They can
# be anycased in the documents.
#####################################################
# This section defines what prefix the terms inside named fields will be
@ -43,7 +43,9 @@ recipient = XTO
# "author" used to be stored by default, now set here as optional
# "rclaptg" is used for viewer specialization (depending on local config)
[stored]
stored = author rclaptg rclbes
author=
rclaptg=
rclbes=
##########################
# This section defines field names aliases or synonyms. Any right hand side
@ -84,3 +86,18 @@ author = from
# field names. xattr use must be enabled at compile time for this to be
# used. Enter translations as "xattrname = fieldname". Case matters.
[xattrtofields]
########################
# Sections reserved for specific filters follow
#
##########################
# Mail filter section. You can specify mail headers to be indexed
# in addition to the standard ones: (To:, Cc:, From:, Subject:, Date,
# Message-Id), along with the field name to be used. For this to be useful,
# the field name should also be listed in the [prefixes] and possibly the
# [stored] sections.
#
# [mail]
# x-my-tag = mymailtag