add possibility to import tags from external app, e.g. tmsu

This commit is contained in:
Jean-Francois Dockes 2013-01-22 18:30:18 +01:00
parent fe2f32fc38
commit 4272939c3d
16 changed files with 221 additions and 82 deletions

View File

@ -410,28 +410,6 @@ const string& RclConfig::getDefCharset(bool filename) const
}
}
bool RclConfig::addLocalFields(map<string, string> *tgt) const
{
LOGDEB0(("RclConfig::addLocalFields: keydir [%s]\n", m_keydir.c_str()));
string sfields;
if (tgt == 0 || ! getConfParam("localfields", sfields))
return false;
// Substitute ':' with '\n' inside the string. There is no way to escape ':'
for (string::size_type i = 0; i < sfields.size(); i++)
if (sfields[i] == ':')
sfields[i] = '\n';
// Parse the result with a confsimple and add the results to the metadata
ConfSimple conf(sfields, 1, true);
vector<string> nmlst = conf.getNames(cstr_null);
for (vector<string>::const_iterator it = nmlst.begin();
it != nmlst.end(); it++) {
conf.get(*it, (*tgt)[*it]);
LOGDEB(("RclConfig::addLocalFields: [%s] => [%s]\n",
(*it).c_str(), (*tgt)[*it].c_str()));
}
return true;
}
// Get all known document mime values. We get them from the mimeconf
// 'index' submap.
// It's quite possible that there are other mime types in the index
@ -630,15 +608,18 @@ bool RclConfig::valueSplitAttributes(const string& whole, string& value,
// Handle additional attributes. We substitute the semi-colons
// with newlines and use a ConfSimple
if (!attrstr.empty()) {
for (string::size_type i = 0; i < attrstr.size(); i++)
for (string::size_type i = 0; i < attrstr.size(); i++) {
if (attrstr[i] == ';')
attrstr[i] = '\n';
attrs = ConfSimple(attrstr);
}
attrs.reparse(attrstr);
} else {
attrs.clear();
}
return true;
}
bool RclConfig::getMissingHelperDesc(string& out) const
{
string fmiss = path_cat(getConfDir(), "missing");

View File

@ -163,9 +163,6 @@ class RclConfig {
Doesn't depend on the keydir */
vector<string> getDaemSkippedPaths() const;
/** conf: Add local fields to target dic */
bool addLocalFields(map<string, string> *tgt) const;
/**
* mimemap: Check if file name should be ignored because of suffix
*

View File

@ -4644,9 +4644,10 @@ unac_except_trans =
under a given directory. Typical usage would be to set an
"rclaptg" field, to be used in <filename>mimeview</filename> to
select a specific viewer. If several fields are to be set, they
should be separated with a colon (':') character (which there
is currently no way to escape). Ie:
<literal>localfields= rclaptg=gnus:other = val</literal>, then
should be separated with a semi-colon (';') character, which there
is currently no way to escape. Also note the initial semi-colon.
Example:
<literal>localfields= ;rclaptg=gnus;other = val</literal>, then
select specifier viewer with
<literal>mimetype|tag=...</literal> in
<filename>mimeview</filename>.</para>

View File

@ -29,7 +29,7 @@ subtreelist : $(SUBTREELIST_OBJS)
subtreelist.o : subtreelist.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_SUBTREELIST -c subtreelist.cpp
MIMETYPE_OBJS= trmimetype.o $(BIGLIB)
MIMETYPE_OBJS= trmimetype.o
mimetype : $(MIMETYPE_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) \
$(LIBRECOLL) $(LIBICONV) $(LIBSYS)

View File

@ -45,6 +45,7 @@
#include "fileudi.h"
#include "cancelcheck.h"
#include "rclinit.h"
#include "execmd.h"
// When using extended attributes, we have to use the ctime.
// This is quite an expensive price to pay...
@ -71,12 +72,13 @@ extern void *FsIndexerDbUpdWorker(void*);
class InternfileTask {
public:
InternfileTask(const std::string &f, const struct stat *i_stp,
map<string,string> lfields)
: fn(f), statbuf(*i_stp), localfields(lfields)
map<string,string> lfields, vector<MDReaper> reapers)
: fn(f), statbuf(*i_stp), localfields(lfields), mdreapers(reapers)
{}
string fn;
struct stat statbuf;
map<string,string> localfields;
vector<MDReapers> mdreapers;
};
extern void *FsIndexerInternfileWorker(void*);
#endif // IDX_THREADS
@ -108,6 +110,7 @@ FsIndexer::FsIndexer(RclConfig *cnf, Rcl::Db *db, DbIxStatusUpdater *updfunc)
{
LOGDEB1(("FsIndexer::FsIndexer\n"));
m_havelocalfields = m_config->hasNameAnywhere("localfields");
m_havemdreapers = m_config->hasNameAnywhere("metadatacmds");
#ifdef IDX_THREADS
m_stableconfig = new RclConfig(*m_config);
@ -311,6 +314,9 @@ bool FsIndexer::indexFiles(list<string>& files, ConfIndexer::IxFlag flag)
m_config->setKeyDir(path_getfather(*it));
if (m_havelocalfields)
localfieldsfromconf();
if (m_havemdreapers)
mdreapersfromconf();
bool follow = false;
m_config->getConfParam("followLinks", &follow);
@ -396,21 +402,90 @@ out:
// Local fields can be set for fs subtrees in the configuration file
void FsIndexer::localfieldsfromconf()
{
LOGDEB0(("FsIndexer::localfieldsfromconf\n"));
LOGDEB1(("FsIndexer::localfieldsfromconf\n"));
string sfields;
m_config->getConfParam("localfields", sfields);
if (!sfields.compare(m_slocalfields))
return;
m_slocalfields = sfields;
m_localfields.clear();
m_config->addLocalFields(&m_localfields);
if (sfields.empty())
return;
string value;
ConfSimple attrs;
m_config->valueSplitAttributes(sfields, value, attrs);
vector<string> nmlst = attrs.getNames(cstr_null);
for (vector<string>::const_iterator it = nmlst.begin();
it != nmlst.end(); it++) {
attrs.get(*it, m_localfields[*it]);
}
}
//
void FsIndexer::setlocalfields(map<string, string> fields, Rcl::Doc& doc)
void FsIndexer::setlocalfields(const map<string, string>& fields, Rcl::Doc& doc)
{
for (map<string, string>::const_iterator it = fields.begin();
it != fields.end(); it++) {
it != fields.end(); it++) {
// Should local fields override those coming from the document
// ? I think not, but not too sure
// ? I think not, but not too sure. We could also chose to
// concatenate the values ?
if (doc.meta.find(it->second) == doc.meta.end()) {
doc.meta[it->first] = it->second;
}
}
}
}
// Metadata gathering commands
void FsIndexer::mdreapersfromconf()
{
LOGDEB1(("FsIndexer::mdreapersfromconf\n"));
string sreapers;
m_config->getConfParam("metadatacmds", sreapers);
if (!sreapers.compare(m_smdreapers))
return;
m_smdreapers = sreapers;
m_mdreapers.clear();
if (sreapers.empty())
return;
string value;
ConfSimple attrs;
m_config->valueSplitAttributes(sreapers, value, attrs);
vector<string> nmlst = attrs.getNames(cstr_null);
for (vector<string>::const_iterator it = nmlst.begin();
it != nmlst.end(); it++) {
MDReaper reaper;
reaper.fieldname = m_config->fieldCanon(*it);
string s;
attrs.get(*it, s);
stringToStrings(s, reaper.cmdv);
m_mdreapers.push_back(reaper);
}
}
void FsIndexer::reapmetadata(const vector<MDReaper>& reapers, const string& fn,
Rcl::Doc& doc)
{
map<char,string> smap = create_map<char, string>('f', fn);
for (vector<MDReaper>::const_iterator rp = reapers.begin();
rp != reapers.end(); rp++) {
vector<string> cmd;
for (vector<string>::const_iterator it = rp->cmdv.begin();
it != rp->cmdv.end(); it++) {
string s;
pcSubst(*it, s, smap);
cmd.push_back(s);
}
string output;
if (ExecCmd::backtick(cmd, output)) {
doc.meta[rp->fieldname] += string(" ") + output;
}
}
}
@ -515,6 +590,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
// Adjust local fields from config for this subtree
if (m_havelocalfields)
localfieldsfromconf();
if (m_havemdreapers)
mdreapersfromconf();
if (flg == FsTreeWalker::FtwDirReturn)
return FsTreeWalker::FtwOk;
@ -522,7 +599,8 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
#ifdef IDX_THREADS
if (m_haveInternQ) {
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields);
InternfileTask *tp = new InternfileTask(fn, stp, m_localfields,
m_mdreapers);
if (m_iwqueue.put(tp)) {
return FsTreeWalker::FtwOk;
} else {
@ -531,14 +609,16 @@ FsIndexer::processone(const std::string &fn, const struct stat *stp,
}
#endif
return processonefile(m_config, m_tmpdir, fn, stp, m_localfields);
return processonefile(m_config, m_tmpdir, fn, stp, m_localfields,
m_mdreapers);
}
FsTreeWalker::Status
FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
const std::string &fn, const struct stat *stp,
map<string, string> localfields)
const map<string, string>& localfields,
const vector<MDReaper>& mdreapers)
{
////////////////////
// Check db up to date ? Doing this before file type
@ -623,9 +703,13 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
// We'll change the signature to ensure that the indexing will
// be retried every time.
// Internal access path for multi-document files
if (doc.ipath.empty())
// Internal access path for multi-document files. If empty, this is
// for the main file.
if (doc.ipath.empty()) {
hadNullIpath = true;
if (m_havemdreapers)
reapmetadata(mdreapers, fn, doc);
}
// Set file name, mod time and url if not done by filter
if (doc.fmtime.empty())
@ -708,7 +792,8 @@ FsIndexer::processonefile(RclConfig *config, TempDir& tmpdir,
fileDoc.url = cstr_fileu + fn;
if (m_havelocalfields)
setlocalfields(localfields, fileDoc);
if (m_havemdreapers)
reapmetadata(mdreapers, fn, fileDoc);
char cbuf[100];
sprintf(cbuf, OFFTPC, stp->st_size);
fileDoc.pcbytes = cbuf;

View File

@ -88,13 +88,24 @@ class FsIndexer : public FsTreeWalkerCB {
std::vector<std::string> m_tdl;
FIMissingStore *m_missing;
// The configuration can set attribute fields to be inherited by
// all files in a file system area. Ie: set "rclaptg = thunderbird"
// inside ~/.thunderbird. The boolean is set at init to avoid
// further wasteful processing if no local fields are set.
bool m_havelocalfields;
map<string, string> m_localfields;
string m_slocalfields;
map<string, string> m_localfields;
// Same idea with the metadata-gathering external commands,
// (e.g. used to reap tagging info: "tmsu tags %f")
/* Hold the description for an external metadata-gathering command */
struct MDReaper {
string fieldname;
vector<string> cmdv;
};
bool m_havemdreapers;
string m_smdreapers;
vector<MDReaper> m_mdreapers;
#ifdef IDX_THREADS
friend void *FsIndexerDbUpdWorker(void*);
@ -109,11 +120,15 @@ class FsIndexer : public FsTreeWalkerCB {
bool init();
void localfieldsfromconf();
void setlocalfields(const map<string, string> flds, Rcl::Doc& doc);
void mdreapersfromconf();
void setlocalfields(const map<string, string>& flds, Rcl::Doc& doc);
void reapmetadata(const vector<MDReaper>& reapers, const string &fn,
Rcl::Doc& doc);
string getDbDir() {return m_config->getDbDir();}
FsTreeWalker::Status
processonefile(RclConfig *config, TempDir& tmpdir, const string &fn,
const struct stat *, map<string,string> localfields);
const struct stat *, const map<string,string>& localfields,
const vector<MDReaper>& mdreapers);
};
#endif /* _fsindexer_h_included_ */

View File

@ -23,10 +23,7 @@
#include <ctype.h>
#include <string>
#include <list>
#ifndef NO_NAMESPACES
using namespace std;
#endif /* NO_NAMESPACES */
#include "mimetype.h"
#include "debuglog.h"
@ -53,20 +50,13 @@ static string mimetypefromdata(const string &fn, bool usfc)
// First try the internal identifying routine
string mime = idFile(fn.c_str());
// Then exec 'file -i'
#ifdef USE_SYSTEM_FILE_COMMAND
if (usfc && mime.empty()) {
// Last resort: use "file -i"
vector<string> args;
args.push_back("-i");
args.push_back(fn);
ExecCmd ex;
vector<string> cmd = create_vector<string>(FILE_PROG) ("-i") (fn);
string result;
string cmd = FILE_PROG;
int status = ex.doexec(cmd, args, 0, &result);
if (status) {
LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status));
if (!ExecCmd::backtick(cmd, result)) {
LOGERR(("mimetypefromdata: exec %s failed\n", FILE_PROG));
return string();
}
LOGDEB2(("mimetype: [%s] \"file\" output [%s]\n",
@ -160,7 +150,6 @@ string mimetype(const string &fn, const struct stat *stp,
}
#else // TEST->
#include <stdio.h>

View File

@ -832,8 +832,8 @@ bool Preview::loadDocInCurrentTab(const Rcl::Doc &idoc, int docnum)
////////////////////////////////////////////////////////////////////////
// Load and convert document
// idoc came out of the index data (main text and other fields missing).
// foc is the complete one what we are going to extract from storage.
// idoc came out of the index data (main text and some fields missing).
// fdoc is the complete one what we are going to extract from storage.
Rcl::Doc fdoc;
int status = 1;
LoadThread lthr(&status, fdoc, idoc);

View File

@ -277,19 +277,20 @@ snippetMaxPosWalk = 1000000
#defaultcharset = iso-8859-2
# You can set fields on all files of a specific fs area. (rclaptg can be
# used for application selection inside mimeview
# used for application selection inside mimeview).
# Syntax is the usual name = value ; attr1 = val1 ; ... with an empty value
# so needs initial semi-colon
#[/some/app/directory]
#localfields = rclaptg = someapp; otherfield = somevalue
#localfields = ; rclaptg = someapp; otherfield = somevalue
# Use app tag to enable using gnu info to open info files (as the subnodes
# are indexed as html, we'd use firefox on a temp file else. Set this on
# some known info storage places
[/usr/share/info]
localfields = rclaptg=gnuinfo
[/usr/local/share/info]
localfields = rclaptg=gnuinfo
[/usr/local/info]
localfields = rclaptg=gnuinfo
# It's also possible to execute external commands to gather external
# metadata, for example tmsu tags.
# There can be several entries, separated by semi-colons, each defining
# which field name the data goes into and the command to use. Don't forget the
# initial semi-colon. All the field names must be different. You can use
# aliases in the "field" file if necessary.
#[/some/area/of/the/fs]
#metadatacmds = ; tags = tmsu tags %f
[/usr/share/man]
followSymlinks = 1

View File

@ -146,6 +146,13 @@ ConfSimple::ConfSimple(int readonly, bool tildexp)
status = readonly ? STATUS_RO : STATUS_RW;
}
void ConfSimple::reparse(const string& d)
{
clear();
stringstream input(d, ios::in);
parseinput(input);
}
ConfSimple::ConfSimple(const string& d, int readonly, bool tildexp)
: dotildexpand(tildexp), m_fmtime(0), m_holdWrites(false)
{

View File

@ -158,6 +158,16 @@ public:
return true;
}
/** Clear, then reparse from string */
void reparse(const string& in);
/** Clear all content */
void clear()
{
m_submaps.clear();
m_order.clear();
}
/**
* Get value for named parameter, from specified subsection (looks in
* global space if sk is empty).

View File

@ -242,7 +242,7 @@ inline void ExecCmd::dochild(const string &cmd, const char **argv,
execve(cmd.c_str(), (char *const*)argv, (char *const*)envv);
// Hu ho
LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(),
LOGERR(("ExecCmd::doexec: execve(%s) failed. errno %d\n", cmd.c_str(),
errno));
_exit(127);
}
@ -634,6 +634,18 @@ bool ExecCmd::maybereap(int *status)
}
}
// Static
bool ExecCmd::backtick(const std::vector<std::string> cmd, std::string& out)
{
vector<string>::const_iterator it = cmd.begin();
it++;
vector<string> args(it, cmd.end());
ExecCmd mexec;
int status = mexec.doexec(*cmd.begin(), args, 0, &out);
return status == 0;
}
/// ReExec class methods ///////////////////////////////////////////////////
ReExec::ReExec(int argc, char *args[])
{
init(argc, args);
@ -747,18 +759,21 @@ void ReExec::reexec()
execvp(m_argv[0].c_str(), (char *const*)argv);
}
////////////////////////////////////////////////////////////////////
#else // TEST
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <string>
#include <iostream>
#include <vector>
#include "debuglog.h"
#include "cancelcheck.h"
using namespace std;
#include "debuglog.h"
#include "cancelcheck.h"
#include "execmd.h"
static int op_flags;

View File

@ -178,6 +178,14 @@ class ExecCmd {
*/
static bool which(const string& cmd, string& exe, const char* path = 0);
/**
* Execute command and return stdout output in a string
* @param cmd input: command and args
* @param out output: what the command printed
* @return true if exec status was 0
*/
static bool backtick(const std::vector<std::string> cmd, std::string& out);
friend class ExecCmdRsrc;
private:
static bool o_useVfork;

View File

@ -109,7 +109,7 @@ private:
typedef RefCntr<TempFileInternal> TempFile;
/// Temporary directory class
/// Temporary directory class. Recursively deleted by destructor.
class TempDir {
public:
TempDir();
@ -117,6 +117,7 @@ public:
const char *dirname() {return m_dirname.c_str();}
const string &getreason() {return m_reason;}
bool ok() {return !m_dirname.empty();}
/// Recursively delete contents but not self.
bool wipe();
private:
string m_dirname;

View File

@ -225,6 +225,28 @@ public:
return m_map;
}
};
template <typename T>
class create_vector
{
private:
std::vector<T> m_vector;
public:
create_vector(const T& val)
{
m_vector.push_back(val);
}
create_vector<T>& operator()(const T& val)
{
m_vector.push_back(val);
return *this;
}
operator std::vector<T>()
{
return m_vector;
}
};
#ifndef MIN
#define MIN(A,B) (((A)<(B)) ? (A) : (B))

View File

@ -73,6 +73,13 @@
<p>Recoll 1.19 .... Changes documented from 1.18.1 to rev 3159</p>
<ul>
<li>Changed format for rclaptg field. Was colon-separated,
now uses normal value/attributes syntax with an empty value
like:
<pre>
localfields = ; attr1 = val1 ; attr2 = val2
</pre>
</li>
<li>There are new GUI configuration options to run in "search
as you type" mode, and to disable the Qt auto-completion
inside the simple search string, which was often more confusing