added the possibility to extract arbitrary mail headers and use them as document fields. This forced an incompatible change in the format of the [stored] section inside the "fields" config file
This commit is contained in:
parent
1b8ce2fc72
commit
e6d5f72886
@ -590,10 +590,8 @@ bool RclConfig::readFieldsConfig(const string& cnferrloc)
|
||||
}
|
||||
#endif
|
||||
|
||||
string ss;
|
||||
if (m_fields->get("stored", ss, "stored")) {
|
||||
list<string> sl;
|
||||
stringToStrings(ss, sl);
|
||||
list<string> sl = m_fields->getNames("stored");
|
||||
if (!sl.empty()) {
|
||||
for (list<string>::const_iterator it = sl.begin();
|
||||
it != sl.end(); it++) {
|
||||
string fld = fieldCanon(stringtolower(*it));
|
||||
@ -635,6 +633,8 @@ bool RclConfig::getFieldPrefix(const string& _fld, string &pfx)
|
||||
bool RclConfig::getFieldSpecialisations(const string& fld,
|
||||
list<string>& children, bool top)
|
||||
{
|
||||
if (m_fields == 0)
|
||||
return false;
|
||||
string sclds;
|
||||
children.push_back(fld);
|
||||
if (m_fields->get(fld, sclds, "specialisations")) {
|
||||
@ -682,6 +682,22 @@ string RclConfig::fieldCanon(const string& f)
|
||||
return fld;
|
||||
}
|
||||
|
||||
list<string> RclConfig::getFieldSectNames(const string &sk, const char* patrn)
|
||||
{
|
||||
if (m_fields == 0)
|
||||
return list<string>();
|
||||
return m_fields->getNames(sk, patrn);
|
||||
}
|
||||
|
||||
bool RclConfig::getFieldConfParam(const string &name, const string &sk,
|
||||
string &value)
|
||||
{
|
||||
if (m_fields == 0)
|
||||
return false;
|
||||
return m_fields->get(name, value, sk);
|
||||
}
|
||||
|
||||
|
||||
string RclConfig::getMimeViewerDef(const string &mtype, const string& apptag)
|
||||
{
|
||||
LOGDEB(("RclConfig::getMimeViewerDef: mtype %s apptag %s\n",
|
||||
|
||||
@ -195,7 +195,12 @@ class RclConfig {
|
||||
string fieldCanon(const string& fld);
|
||||
/** Get xattr name to field names translations */
|
||||
const map<string, string>& getXattrToField() {return m_xattrtofld;}
|
||||
|
||||
/** Get value of a parameter inside the "fields" file. Only some filters
|
||||
use this (ie: mh_mail). The information specific to a given filter
|
||||
is typically stored in a separate section(ie: [mail]) */
|
||||
list<string> getFieldSectNames(const string &sk, const char* = 0);
|
||||
bool getFieldConfParam(const string &name, const string &sk, string &value);
|
||||
|
||||
/** mimeview: get/set external viewer exec string(s) for mimetype(s) */
|
||||
string getMimeViewerDef(const string &mimetype, const string& apptag);
|
||||
bool getMimeViewerDefs(vector<pair<string, string> >&);
|
||||
|
||||
@ -772,9 +772,9 @@ fvwm
|
||||
&RCL; has no configured way to preview a given file type (which
|
||||
was indexed by name only), or no configured external editor for
|
||||
the file type. This can sometimes be adjusted simply by tweaking
|
||||
the <link linkend="rclinstall.config.mimemap">
|
||||
the <link linkend="rcl.install.config.mimemap">
|
||||
<filename>mimemap</filename></link> and
|
||||
<link linkend="rclinstall.config.mimeview">
|
||||
<link linkend="rcl.install.config.mimeview">
|
||||
<filename>mimeview</filename></link> configuration files (the latter
|
||||
can be modified with the user preferences dialog).</para>
|
||||
|
||||
@ -2114,7 +2114,7 @@ application/x-chm = execm rclchm
|
||||
</sect1>
|
||||
|
||||
<sect1 id="rcl.program.fields">
|
||||
<title>Field data processing configuration</title>
|
||||
<title>Field data processing</title>
|
||||
|
||||
<para><literal>Fields</literal> are named pieces of information
|
||||
in or about documents, like <literal>title</literal>,
|
||||
@ -2148,19 +2148,16 @@ application/x-chm = execm rclchm
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<para>A field can be either or both indexed and stored.</para>
|
||||
<para>A field can be either or both indexed and stored. This and
|
||||
other aspects of fields handling is defined inside the
|
||||
<filename>fields</filename> configuration file.</para>
|
||||
|
||||
<para>You can find more information in the
|
||||
<link linkend="rcl.install.config.fields">section about the
|
||||
<filename>fields</filename> file</link>, or in comments inside the
|
||||
file.</para>
|
||||
|
||||
<para>A field becomes indexed by having a prefix defined in
|
||||
the <literal>[prefixes]</literal> section of the
|
||||
<filename>fields</filename> file. See the comments in there for
|
||||
details</para>
|
||||
|
||||
<para>A field becomes stored by appearing in
|
||||
the <literal>[stored]</literal> section of the
|
||||
<filename>fields</filename> file.</para>
|
||||
|
||||
<para>See the comments inside the <filename>fields</filename>
|
||||
for more details.</para>
|
||||
</sect1>
|
||||
|
||||
|
||||
@ -3393,7 +3390,89 @@ skippedPaths = ~/somedir/∗.txt
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="rclinstall.config.mimemap">
|
||||
<sect2 id="rcl.install.config.fields">
|
||||
<title>The fields file</title>
|
||||
|
||||
<para>This file contains information about dynamic fields handling
|
||||
in &RCL;. Some very basic fields have hard-wired behaviour,
|
||||
and, mostly, you should not change the original data inside the
|
||||
<filename>fields</filename> file. But you can create custom fields
|
||||
fitting your data and handle them just like they were native
|
||||
ones.</para>
|
||||
|
||||
<para>The <filename>fields</filename> file has several sections,
|
||||
which each define an aspect of fields processing. Quite often,
|
||||
you'll have to modify several sections to obtain the desired
|
||||
behaviour.</para>
|
||||
|
||||
<para>We will only give a short description here, you should refer
|
||||
to the comments inside the file for more detailed information.</para>
|
||||
|
||||
<para>Field names should be lowercase alphabetic ASCII.</para>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term>[prefixes]</term>
|
||||
<listitem><para>A field becomes indexed (searchable) by having
|
||||
a prefix defined in this section.
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term>[stored]</term>
|
||||
<listitem><para>A field becomes stored (displayable inside
|
||||
results) by having its name listed in this section (typically
|
||||
with an empty value).
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term>[aliases]</term>
|
||||
<listitem><para>This section defines lists of synonyms for the
|
||||
canonical names used inside the <literal>[prefixes]</literal>
|
||||
and <literal>[stored]</literal> sections</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term>filter-specific sections</term>
|
||||
<listitem><para>Some filters may need specific
|
||||
configuration for handling fields. Only the mail message filter
|
||||
currently has such a section (named
|
||||
<literal>[mail]</literal>). It allows indexing arbitrary mail
|
||||
headers in addition to the ones indexed by default. Other such
|
||||
sections may appear in the future.</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
<para>Here follows a small example of a personal
|
||||
<filename>fields</filename>
|
||||
file. This would extract a specific mail header and
|
||||
use it as a searchable field, with data displayable inside result
|
||||
lists. (Side note: as the mail filter does no decoding on the values,
|
||||
only plain ascii headers can be indexed, and that only the
|
||||
first occurrence will be used in the case of multiple occurrence
|
||||
headers).
|
||||
|
||||
<programlisting>[prefixes]
|
||||
# Index mail_mytag contents (with the given prefix)
|
||||
mailmytag = XMTAG
|
||||
|
||||
[stored]
|
||||
# Store mail_mytag inside the document data record
|
||||
mailmytag =
|
||||
|
||||
[mail]
|
||||
# Extract the X-My-Tag mail header, and use it internally with the
|
||||
# mail_mytag field name
|
||||
x-my-tag = mailmytag
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="rcl.install.config.mimemap">
|
||||
<title>The mimemap file</title>
|
||||
|
||||
<para><filename>mimemap</filename> specifies the
|
||||
@ -3429,7 +3508,7 @@ skippedPaths = ~/somedir/∗.txt
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="rclinstall.config.mimeconf">
|
||||
<sect2 id="rcl.install.config.mimeconf">
|
||||
<title>The mimeconf file</title>
|
||||
|
||||
<para><filename>mimeconf</filename> specifies how the
|
||||
@ -3447,7 +3526,7 @@ skippedPaths = ~/somedir/∗.txt
|
||||
<filename>recoll.conf</filename>).</para>
|
||||
|
||||
</sect2>
|
||||
<sect2 id="rclinstall.config.mimeview">
|
||||
<sect2 id="rcl.install.config.mimeview">
|
||||
<title>The mimeview file</title>
|
||||
|
||||
<para><filename>mimeview</filename> specifies which programs
|
||||
@ -3484,10 +3563,10 @@ skippedPaths = ~/somedir/∗.txt
|
||||
(which is set to use <command>xdg-open</command> by default).</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="rclinstall.config.examples">
|
||||
<sect2 id="rcl.install.config.examples">
|
||||
<title>Examples of configuration adjustments</title>
|
||||
|
||||
<sect3 id="rclinstall.config.examples.addview">
|
||||
<sect3 id="rcl.install.config.examples.addview">
|
||||
<title>Adding an external viewer for an non-indexed type</title>
|
||||
|
||||
<para>Imagine that you have some kind of file which does not
|
||||
@ -3532,7 +3611,7 @@ application/x-blobapp = blobviewer %f
|
||||
|
||||
</sect3>
|
||||
|
||||
<sect3 id="rclinstall.config.examples.addindex">
|
||||
<sect3 id="rcl.install.config.examples.addindex">
|
||||
<title>Adding indexing support for a new file type</title>
|
||||
|
||||
<para>Let us now imagine that the above
|
||||
|
||||
@ -55,6 +55,22 @@ static const string cstr_title = "title";
|
||||
static const string cstr_msgid = "msgid";
|
||||
static const string cstr_abstract = "abstract";
|
||||
|
||||
MimeHandlerMail::MimeHandlerMail(const string &mt)
|
||||
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||
{
|
||||
|
||||
// Look for additional headers to be processed as per config:
|
||||
list<string> hdrnames =
|
||||
RclConfig::getMainConfig()->getFieldSectNames("mail");
|
||||
if (hdrnames.empty())
|
||||
return;
|
||||
for (list<string>::const_iterator it = hdrnames.begin();
|
||||
it != hdrnames.end(); it++) {
|
||||
(void)RclConfig::getMainConfig()->
|
||||
getFieldConfParam(*it, "mail", m_addProcdHdrs[*it]);
|
||||
}
|
||||
}
|
||||
|
||||
MimeHandlerMail::~MimeHandlerMail()
|
||||
{
|
||||
clear();
|
||||
@ -96,7 +112,6 @@ bool MimeHandlerMail::set_document_file(const string &fn)
|
||||
reason.c_str()));
|
||||
}
|
||||
|
||||
|
||||
m_fd = open(fn.c_str(), 0);
|
||||
if (m_fd < 0) {
|
||||
LOGERR(("MimeHandlerMail::set_document_file: open(%s) errno %d\n",
|
||||
@ -352,6 +367,21 @@ bool MimeHandlerMail::processMsg(Binc::MimePart *doc, int depth)
|
||||
}
|
||||
text += string("Subject: ") + transcoded + string("\n");
|
||||
}
|
||||
|
||||
// Check for the presence of configured additional headers and possibly
|
||||
// add them to the metadata (with appropriate field name).
|
||||
if (!m_addProcdHdrs.empty()) {
|
||||
for (map<string, string>::const_iterator it = m_addProcdHdrs.begin();
|
||||
it != m_addProcdHdrs.end(); it++) {
|
||||
if (!it->second.empty()) {
|
||||
string hval;
|
||||
if (doc->h.getFirstHeader(it->first, hi)) {
|
||||
m_metaData[it->second] = hi.getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
text += '\n';
|
||||
m_startoftext = text.size();
|
||||
LOGDEB2(("MimeHandlerMail::processMsg:ismultipart %d mime subtype '%s'\n",
|
||||
|
||||
@ -20,7 +20,9 @@
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
using std::vector;
|
||||
using std::map;
|
||||
|
||||
#include "mimehandler.h"
|
||||
|
||||
@ -38,9 +40,7 @@ class MHMailAttach;
|
||||
*/
|
||||
class MimeHandlerMail : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerMail(const string &mt)
|
||||
: RecollFilter(mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
||||
{}
|
||||
MimeHandlerMail(const string &mt);
|
||||
virtual ~MimeHandlerMail();
|
||||
virtual bool set_document_file(const string& file_path);
|
||||
virtual bool set_document_string(const string& data);
|
||||
@ -69,6 +69,8 @@ private:
|
||||
string::size_type m_startoftext;
|
||||
string m_subject;
|
||||
vector<MHMailAttach *> m_attachments;
|
||||
// Additional headers to be process as per config + field name translation
|
||||
map<string,string> m_addProcdHdrs;
|
||||
};
|
||||
|
||||
class MHMailAttach {
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
# author:Hemingway
|
||||
#
|
||||
# Important:
|
||||
# - the field names MUST be all lowercase here. They can be anycased
|
||||
# in the documents.
|
||||
# - the field names MUST be all lowercase alphabetic ascii here. They can
|
||||
# be anycased in the documents.
|
||||
|
||||
#####################################################
|
||||
# This section defines what prefix the terms inside named fields will be
|
||||
@ -43,7 +43,9 @@ recipient = XTO
|
||||
# "author" used to be stored by default, now set here as optional
|
||||
# "rclaptg" is used for viewer specialization (depending on local config)
|
||||
[stored]
|
||||
stored = author rclaptg rclbes
|
||||
author=
|
||||
rclaptg=
|
||||
rclbes=
|
||||
|
||||
##########################
|
||||
# This section defines field names aliases or synonyms. Any right hand side
|
||||
@ -84,3 +86,18 @@ author = from
|
||||
# field names. xattr use must be enabled at compile time for this to be
|
||||
# used. Enter translations as "xattrname = fieldname". Case matters.
|
||||
[xattrtofields]
|
||||
|
||||
|
||||
########################
|
||||
# Sections reserved for specific filters follow
|
||||
#
|
||||
|
||||
##########################
|
||||
# Mail filter section. You can specify mail headers to be indexed
|
||||
# in addition to the standard ones: (To:, Cc:, From:, Subject:, Date,
|
||||
# Message-Id), along with the field name to be used. For this to be useful,
|
||||
# the field name should also be listed in the [prefixes] and possibly the
|
||||
# [stored] sections.
|
||||
#
|
||||
# [mail]
|
||||
# x-my-tag = mymailtag
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user