xattrs: make them work with non-text files. Use ctime for up to date checks
This commit is contained in:
parent
ce2d3c0b24
commit
9b44f94629
@ -52,6 +52,14 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.71 2008-12-17 08:01:40 dockes Exp
|
|||||||
#include "rclaspell.h"
|
#include "rclaspell.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// When using extended attributes, we have to use the ctime.
|
||||||
|
// This is quite an expensive price to pay...
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
#define RCL_STTIME st_ctime
|
||||||
|
#else
|
||||||
|
#define RCL_STTIME st_mtime
|
||||||
|
#endif // RCL_USE_XATTR
|
||||||
|
|
||||||
#ifndef NO_NAMESPACES
|
#ifndef NO_NAMESPACES
|
||||||
using namespace std;
|
using namespace std;
|
||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
@ -391,11 +399,11 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// without mime type will not be purged from the db, resulting
|
// without mime type will not be purged from the db, resulting
|
||||||
// in possible 'cannot intern file' messages at query time...
|
// in possible 'cannot intern file' messages at query time...
|
||||||
char cbuf[100];
|
char cbuf[100];
|
||||||
// Document signature. This is based on mtime and size and used
|
// Document signature. This is based on m/ctime and size and used
|
||||||
// for the uptodate check (the value computed here is checked
|
// for the uptodate check (the value computed here is checked
|
||||||
// against the stored one). Changing the computation forces a full
|
// against the stored one). Changing the computation forces a full
|
||||||
// reindex of course.
|
// reindex of course.
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
||||||
string sig = cbuf;
|
string sig = cbuf;
|
||||||
string udi;
|
string udi;
|
||||||
make_udi(fn, "", udi);
|
make_udi(fn, "", udi);
|
||||||
@ -488,7 +496,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// Document signature for up to date checks: concatenate mtime and
|
// Document signature for up to date checks: concatenate mtime and
|
||||||
// size. Note: looking for changes only, no need to parseback so no
|
// size. Note: looking for changes only, no need to parseback so no
|
||||||
// need for reversible formatting
|
// need for reversible formatting
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
||||||
doc.sig = cbuf;
|
doc.sig = cbuf;
|
||||||
// If there was an error, ensure indexing will be
|
// If there was an error, ensure indexing will be
|
||||||
// retried. This is for the once missing, later installed
|
// retried. This is for the once missing, later installed
|
||||||
@ -533,7 +541,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
sprintf(cbuf, "%ld", (long)stp->st_size);
|
||||||
fileDoc.fbytes = cbuf;
|
fileDoc.fbytes = cbuf;
|
||||||
// Document signature for up to date checks.
|
// Document signature for up to date checks.
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME);
|
||||||
fileDoc.sig = cbuf;
|
fileDoc.sig = cbuf;
|
||||||
if (!m_db.addOrUpdate(parent_udi, "", fileDoc))
|
if (!m_db.addOrUpdate(parent_udi, "", fileDoc))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
|
|||||||
@ -44,6 +44,9 @@ using namespace std;
|
|||||||
#include "wipedir.h"
|
#include "wipedir.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "mh_html.h"
|
#include "mh_html.h"
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
#include "pxattr.h"
|
||||||
|
#endif // RCL_USE_XATTR
|
||||||
|
|
||||||
// The internal path element separator. This can't be the same as the rcldb
|
// The internal path element separator. This can't be the same as the rcldb
|
||||||
// file to ipath separator : "|"
|
// file to ipath separator : "|"
|
||||||
@ -53,6 +56,32 @@ static const string stxtplain("text/plain");
|
|||||||
set<string> FileInterner::o_missingExternal;
|
set<string> FileInterner::o_missingExternal;
|
||||||
map<string, set<string> > FileInterner::o_typesForMissing;
|
map<string, set<string> > FileInterner::o_typesForMissing;
|
||||||
|
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
void FileInterner::reapXAttrs(const string& path)
|
||||||
|
{
|
||||||
|
vector<string> xnames;
|
||||||
|
if (!pxattr::list(path, &xnames)) {
|
||||||
|
LOGERR(("FileInterner::reapXattrs: pxattr::list: errno %d\n", errno));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const map<string, string>& xtof = m_cfg->getXattrToField();
|
||||||
|
for (vector<string>::const_iterator it = xnames.begin();
|
||||||
|
it != xnames.end(); it++) {
|
||||||
|
map<string, string>::const_iterator mit;
|
||||||
|
if ((mit = xtof.find(*it)) != xtof.end()) {
|
||||||
|
string value;
|
||||||
|
if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) {
|
||||||
|
LOGERR(("FileInterner::reapXattrs: pxattr::get failed"
|
||||||
|
"for %s, errno %d\n", (*it).c_str(), errno));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Encode should we ?
|
||||||
|
m_XAttrsFields[mit->second] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // RCL_USE_XATTR
|
||||||
|
|
||||||
// This is used when the user wants to retrieve a search result doc's parent
|
// This is used when the user wants to retrieve a search result doc's parent
|
||||||
// (ie message having a given attachment)
|
// (ie message having a given attachment)
|
||||||
bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
||||||
@ -193,7 +222,8 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
|
|||||||
if (!df) {
|
if (!df) {
|
||||||
// No handler for this type, for now :( if indexallfilenames
|
// No handler for this type, for now :( if indexallfilenames
|
||||||
// is set in the config, this normally wont happen (we get mh_unknown)
|
// is set in the config, this normally wont happen (we get mh_unknown)
|
||||||
LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n", f.c_str(), l_mime.c_str()));
|
LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n",
|
||||||
|
f.c_str(), l_mime.c_str()));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
df->set_property(Dijon::Filter::OPERATING_MODE,
|
df->set_property(Dijon::Filter::OPERATING_MODE,
|
||||||
@ -201,6 +231,14 @@ FileInterner::FileInterner(const string &f, const struct stat *stp,
|
|||||||
|
|
||||||
string charset = m_cfg->getDefCharset();
|
string charset = m_cfg->getDefCharset();
|
||||||
df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
||||||
|
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
// Get fields computed from extended attributes. We use the
|
||||||
|
// original file, not the m_fn which may be the uncompressed temp
|
||||||
|
// file
|
||||||
|
reapXAttrs(f);
|
||||||
|
#endif //RCL_USE_XATTR
|
||||||
|
|
||||||
if (!df->set_document_file(m_fn)) {
|
if (!df->set_document_file(m_fn)) {
|
||||||
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
|
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
|
||||||
return;
|
return;
|
||||||
@ -363,18 +401,6 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
|||||||
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
|
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
|
||||||
doc.meta.erase(keyds);
|
doc.meta.erase(keyds);
|
||||||
}
|
}
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
// Finally set any data possibly coming out of the extended file attributes
|
|
||||||
// these override any values from inside the file.
|
|
||||||
RecollFilter *rf = dynamic_cast<RecollFilter*>(df);
|
|
||||||
if (rf != 0) {
|
|
||||||
const map<string, string>& ffa = rf->getFieldsFromAttrs();
|
|
||||||
for (map<string,string>::const_iterator it = ffa.begin();
|
|
||||||
it != ffa.end(); it++) {
|
|
||||||
doc.meta[it->first] = it->second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif //RCL_USE_XATTR
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -388,6 +414,15 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
|
|||||||
{
|
{
|
||||||
bool hasipath = false;
|
bool hasipath = false;
|
||||||
|
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
// Set fields from extended file attributes.
|
||||||
|
// These can be overriden by values from inside the file
|
||||||
|
for (map<string,string>::const_iterator it = m_XAttrsFields.begin();
|
||||||
|
it != m_XAttrsFields.end(); it++) {
|
||||||
|
doc.meta[it->first] = it->second;
|
||||||
|
}
|
||||||
|
#endif //RCL_USE_XATTR
|
||||||
|
|
||||||
// If there is no ipath stack, the mimetype is the one from the file
|
// If there is no ipath stack, the mimetype is the one from the file
|
||||||
doc.mimetype = m_mimetype;
|
doc.mimetype = m_mimetype;
|
||||||
|
|
||||||
@ -610,18 +645,17 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
breakloop:
|
breakloop:
|
||||||
|
|
||||||
if (m_handlers.empty()) {
|
if (m_handlers.empty()) {
|
||||||
LOGDEB(("FileInterner::internfile: conversion ended with no doc\n"));
|
LOGDEB(("FileInterner::internfile: conversion ended with no doc\n"));
|
||||||
return FIError;
|
return FIError;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If indexing compute ipath and significant mimetype.
|
// If indexing compute ipath and significant mimetype. ipath is
|
||||||
// ipath is returned through the parameter not doc.ipath We also
|
// returned through the parameter not doc.ipath We also retrieve
|
||||||
// retrieve some metadata fields from the ancesters (like date or
|
// some metadata fields from the ancesters (like date or
|
||||||
// author). This is useful for email attachments. The values will
|
// author). This is useful for email attachments. The values will
|
||||||
// be replaced by those found by dijontorcl if any, so the order
|
// be replaced by those internal to the document (by dijontorcl())
|
||||||
// of calls is important.
|
// if any, so the order of calls is important.
|
||||||
if (!m_forPreview)
|
if (!m_forPreview)
|
||||||
collectIpathAndMT(doc, ipath);
|
collectIpathAndMT(doc, ipath);
|
||||||
else
|
else
|
||||||
|
|||||||
@ -29,6 +29,9 @@ using std::set;
|
|||||||
|
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
#include "Filter.h"
|
#include "Filter.h"
|
||||||
|
// Beware: the class changes according to RCL_USE_XATTR, so any file
|
||||||
|
// including this needs autoconfig.h
|
||||||
|
#include "autoconfig.h"
|
||||||
|
|
||||||
class RclConfig;
|
class RclConfig;
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
@ -134,6 +137,14 @@ class FileInterner {
|
|||||||
// m_tdir and m_tfile are used only for decompressing input file if needed
|
// m_tdir and m_tfile are used only for decompressing input file if needed
|
||||||
const string& m_tdir;
|
const string& m_tdir;
|
||||||
string m_tfile;
|
string m_tfile;
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
// Fields found in file extended attributes. This is kept here,
|
||||||
|
// not in the file-level handler because we are only interested in
|
||||||
|
// the top-level file, not any temp file necessitated by
|
||||||
|
// processing the internal doc hierarchy.
|
||||||
|
map<string, string> m_XAttrsFields;
|
||||||
|
#endif // RCL_USE_XATTR
|
||||||
|
|
||||||
// Filter stack, path to the current document from which we're
|
// Filter stack, path to the current document from which we're
|
||||||
// fetching subdocs
|
// fetching subdocs
|
||||||
vector<Dijon::Filter*> m_handlers;
|
vector<Dijon::Filter*> m_handlers;
|
||||||
@ -154,6 +165,9 @@ class FileInterner {
|
|||||||
int addHandler();
|
int addHandler();
|
||||||
void checkExternalMissing(const string& msg, const string& mt);
|
void checkExternalMissing(const string& msg, const string& mt);
|
||||||
void processNextDocError(Rcl::Doc &doc, string& ipath);
|
void processNextDocError(Rcl::Doc &doc, string& ipath);
|
||||||
|
#ifdef RCL_USE_XATTR
|
||||||
|
void reapXAttrs(const string& fn);
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -29,9 +29,6 @@ using namespace std;
|
|||||||
#include "debuglog.h"
|
#include "debuglog.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
#include "pxattr.h"
|
|
||||||
#endif // RCL_USE_XATTR
|
|
||||||
|
|
||||||
#include "mh_exec.h"
|
#include "mh_exec.h"
|
||||||
#include "mh_html.h"
|
#include "mh_html.h"
|
||||||
@ -40,43 +37,10 @@ using namespace std;
|
|||||||
#include "mh_text.h"
|
#include "mh_text.h"
|
||||||
#include "mh_unknown.h"
|
#include "mh_unknown.h"
|
||||||
|
|
||||||
// Common code for all docs that are a file (not subdocs). If extended
|
// Performance help: we use a pool of already known and created
|
||||||
// attributes support is enabled, fetch the data.
|
// handlers. There can be several instances for a given mime type
|
||||||
bool RecollFilter::set_document_file(const string& path)
|
// (think email attachment in email message: 2 rfc822 handlers are
|
||||||
{
|
// needed simulteanously)
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
RclConfig* rclconfig = RclConfig::getMainConfig();
|
|
||||||
if (rclconfig == 0) {
|
|
||||||
LOGERR(("RecollFilter::set_document_file: no config\n"));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
vector<string> xnames;
|
|
||||||
if (!pxattr::list(path, &xnames)) {
|
|
||||||
LOGERR(("xattrToMeta: pxattr::list failed, errno %d\n", errno));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const map<string, string>& xtof = rclconfig->getXattrToField();
|
|
||||||
for (vector<string>::const_iterator it = xnames.begin();
|
|
||||||
it != xnames.end(); it++) {
|
|
||||||
map<string, string>::const_iterator mit;
|
|
||||||
if ((mit = xtof.find(*it)) != xtof.end()) {
|
|
||||||
string value;
|
|
||||||
if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) {
|
|
||||||
LOGERR(("xattrToMeta: pxattr::get failed for %s, errno %d\n",
|
|
||||||
(*it).c_str(), errno));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Encode should we ?
|
|
||||||
m_fieldsFromAttrs[mit->second] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // RCL_USE_XATTR
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pool of already known and created handlers. There can be several
|
|
||||||
// instances for a given mime type (think email attachment in email
|
|
||||||
// message: 2 rfc822 handlers are needed simulteanously)
|
|
||||||
static multimap<string, Dijon::Filter*> o_handlers;
|
static multimap<string, Dijon::Filter*> o_handlers;
|
||||||
|
|
||||||
/** For mime types set as "internal" in mimeconf:
|
/** For mime types set as "internal" in mimeconf:
|
||||||
|
|||||||
@ -23,10 +23,6 @@
|
|||||||
#include <list>
|
#include <list>
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::list;
|
using std::list;
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
#include <map>
|
|
||||||
using std::map;
|
|
||||||
#endif // RCL_USE_XATTR
|
|
||||||
|
|
||||||
#include <Filter.h>
|
#include <Filter.h>
|
||||||
|
|
||||||
@ -56,9 +52,10 @@ public:
|
|||||||
// We don't use this for now
|
// We don't use this for now
|
||||||
virtual bool set_document_uri(const std::string &) {return false;}
|
virtual bool set_document_uri(const std::string &) {return false;}
|
||||||
|
|
||||||
/// This does the extended attributes thing if enabled and should
|
/// This does nothing right now but should be called from the
|
||||||
/// be called from subclasses.
|
/// subclass method in case we need some common processing one day
|
||||||
virtual bool set_document_file(const string &file_path);
|
/// (was used for xattrs at some point).
|
||||||
|
virtual bool set_document_file(const string &file_path) {return true;}
|
||||||
|
|
||||||
// Default implementations
|
// Default implementations
|
||||||
virtual bool set_document_string(const std::string &) {return false;}
|
virtual bool set_document_string(const std::string &) {return false;}
|
||||||
@ -90,22 +87,13 @@ public:
|
|||||||
m_forPreview = m_havedoc = false;
|
m_forPreview = m_havedoc = false;
|
||||||
m_defcharset.clear();
|
m_defcharset.clear();
|
||||||
m_reason.clear();
|
m_reason.clear();
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
m_fieldsFromAttrs.clear();
|
|
||||||
#endif // RCL_USE_XATTR
|
|
||||||
}
|
}
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
const map<string, string>& getFieldsFromAttrs() {return m_fieldsFromAttrs;}
|
|
||||||
#endif // RCL_USE_XATTR
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool m_forPreview;
|
bool m_forPreview;
|
||||||
string m_defcharset;
|
string m_defcharset;
|
||||||
string m_reason;
|
string m_reason;
|
||||||
bool m_havedoc;
|
bool m_havedoc;
|
||||||
#ifdef RCL_USE_XATTR
|
|
||||||
map<string, string> m_fieldsFromAttrs;
|
|
||||||
#endif // RCL_USE_XATTR
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user