diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index fb9089d1..4db0f3af 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -52,6 +52,14 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.71 2008-12-17 08:01:40 dockes Exp #include "rclaspell.h" #endif +// When using extended attributes, we have to use the ctime. +// This is quite an expensive price to pay... +#ifdef RCL_USE_XATTR +#define RCL_STTIME st_ctime +#else +#define RCL_STTIME st_mtime +#endif // RCL_USE_XATTR + #ifndef NO_NAMESPACES using namespace std; #endif /* NO_NAMESPACES */ @@ -391,11 +399,11 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, // without mime type will not be purged from the db, resulting // in possible 'cannot intern file' messages at query time... char cbuf[100]; - // Document signature. This is based on mtime and size and used + // Document signature. This is based on m/ctime and size and used // for the uptodate check (the value computed here is checked // against the stored one). Changing the computation forces a full // reindex of course. - sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime); + sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME); string sig = cbuf; string udi; make_udi(fn, "", udi); @@ -488,7 +496,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, // Document signature for up to date checks: concatenate mtime and // size. Note: looking for changes only, no need to parseback so no // need for reversible formatting - sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime); + sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME); doc.sig = cbuf; // If there was an error, ensure indexing will be // retried. This is for the once missing, later installed @@ -533,7 +541,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp, sprintf(cbuf, "%ld", (long)stp->st_size); fileDoc.fbytes = cbuf; // Document signature for up to date checks. - sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime); + sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->RCL_STTIME); fileDoc.sig = cbuf; if (!m_db.addOrUpdate(parent_udi, "", fileDoc)) return FsTreeWalker::FtwError; diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index 024f475c..f931e9b6 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -44,6 +44,9 @@ using namespace std; #include "wipedir.h" #include "rclconfig.h" #include "mh_html.h" +#ifdef RCL_USE_XATTR +#include "pxattr.h" +#endif // RCL_USE_XATTR // The internal path element separator. This can't be the same as the rcldb // file to ipath separator : "|" @@ -53,6 +56,32 @@ static const string stxtplain("text/plain"); set FileInterner::o_missingExternal; map > FileInterner::o_typesForMissing; +#ifdef RCL_USE_XATTR +void FileInterner::reapXAttrs(const string& path) +{ + vector xnames; + if (!pxattr::list(path, &xnames)) { + LOGERR(("FileInterner::reapXattrs: pxattr::list: errno %d\n", errno)); + return; + } + const map& xtof = m_cfg->getXattrToField(); + for (vector::const_iterator it = xnames.begin(); + it != xnames.end(); it++) { + map::const_iterator mit; + if ((mit = xtof.find(*it)) != xtof.end()) { + string value; + if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) { + LOGERR(("FileInterner::reapXattrs: pxattr::get failed" + "for %s, errno %d\n", (*it).c_str(), errno)); + continue; + } + // Encode should we ? + m_XAttrsFields[mit->second] = value; + } + } +} +#endif // RCL_USE_XATTR + // This is used when the user wants to retrieve a search result doc's parent // (ie message having a given attachment) bool FileInterner::getEnclosing(const string &url, const string &ipath, @@ -193,7 +222,8 @@ FileInterner::FileInterner(const string &f, const struct stat *stp, if (!df) { // No handler for this type, for now :( if indexallfilenames // is set in the config, this normally wont happen (we get mh_unknown) - LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n", f.c_str(), l_mime.c_str())); + LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n", + f.c_str(), l_mime.c_str())); return; } df->set_property(Dijon::Filter::OPERATING_MODE, @@ -201,6 +231,14 @@ FileInterner::FileInterner(const string &f, const struct stat *stp, string charset = m_cfg->getDefCharset(); df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset); + +#ifdef RCL_USE_XATTR + // Get fields computed from extended attributes. We use the + // original file, not the m_fn which may be the uncompressed temp + // file + reapXAttrs(f); +#endif //RCL_USE_XATTR + if (!df->set_document_file(m_fn)) { LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str())); return; @@ -363,18 +401,6 @@ bool FileInterner::dijontorcl(Rcl::Doc& doc) doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds]; doc.meta.erase(keyds); } -#ifdef RCL_USE_XATTR - // Finally set any data possibly coming out of the extended file attributes - // these override any values from inside the file. - RecollFilter *rf = dynamic_cast(df); - if (rf != 0) { - const map& ffa = rf->getFieldsFromAttrs(); - for (map::const_iterator it = ffa.begin(); - it != ffa.end(); it++) { - doc.meta[it->first] = it->second; - } - } -#endif //RCL_USE_XATTR return true; } @@ -388,6 +414,15 @@ void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const { bool hasipath = false; +#ifdef RCL_USE_XATTR + // Set fields from extended file attributes. + // These can be overriden by values from inside the file + for (map::const_iterator it = m_XAttrsFields.begin(); + it != m_XAttrsFields.end(); it++) { + doc.meta[it->first] = it->second; + } +#endif //RCL_USE_XATTR + // If there is no ipath stack, the mimetype is the one from the file doc.mimetype = m_mimetype; @@ -610,18 +645,17 @@ FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath) } } breakloop: - if (m_handlers.empty()) { LOGDEB(("FileInterner::internfile: conversion ended with no doc\n")); return FIError; } - // If indexing compute ipath and significant mimetype. - // ipath is returned through the parameter not doc.ipath We also - // retrieve some metadata fields from the ancesters (like date or + // If indexing compute ipath and significant mimetype. ipath is + // returned through the parameter not doc.ipath We also retrieve + // some metadata fields from the ancesters (like date or // author). This is useful for email attachments. The values will - // be replaced by those found by dijontorcl if any, so the order - // of calls is important. + // be replaced by those internal to the document (by dijontorcl()) + // if any, so the order of calls is important. if (!m_forPreview) collectIpathAndMT(doc, ipath); else diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index 857ea653..e09c9d68 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -29,6 +29,9 @@ using std::set; #include "pathut.h" #include "Filter.h" +// Beware: the class changes according to RCL_USE_XATTR, so any file +// including this needs autoconfig.h +#include "autoconfig.h" class RclConfig; namespace Rcl { @@ -134,6 +137,14 @@ class FileInterner { // m_tdir and m_tfile are used only for decompressing input file if needed const string& m_tdir; string m_tfile; +#ifdef RCL_USE_XATTR + // Fields found in file extended attributes. This is kept here, + // not in the file-level handler because we are only interested in + // the top-level file, not any temp file necessitated by + // processing the internal doc hierarchy. + map m_XAttrsFields; +#endif // RCL_USE_XATTR + // Filter stack, path to the current document from which we're // fetching subdocs vector m_handlers; @@ -154,6 +165,9 @@ class FileInterner { int addHandler(); void checkExternalMissing(const string& msg, const string& mt); void processNextDocError(Rcl::Doc &doc, string& ipath); +#ifdef RCL_USE_XATTR + void reapXAttrs(const string& fn); +#endif }; diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index f67fb87c..dff020dc 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -29,9 +29,6 @@ using namespace std; #include "debuglog.h" #include "rclconfig.h" #include "smallut.h" -#ifdef RCL_USE_XATTR -#include "pxattr.h" -#endif // RCL_USE_XATTR #include "mh_exec.h" #include "mh_html.h" @@ -40,43 +37,10 @@ using namespace std; #include "mh_text.h" #include "mh_unknown.h" -// Common code for all docs that are a file (not subdocs). If extended -// attributes support is enabled, fetch the data. -bool RecollFilter::set_document_file(const string& path) -{ -#ifdef RCL_USE_XATTR - RclConfig* rclconfig = RclConfig::getMainConfig(); - if (rclconfig == 0) { - LOGERR(("RecollFilter::set_document_file: no config\n")); - return false; - } - vector xnames; - if (!pxattr::list(path, &xnames)) { - LOGERR(("xattrToMeta: pxattr::list failed, errno %d\n", errno)); - return false; - } - const map& xtof = rclconfig->getXattrToField(); - for (vector::const_iterator it = xnames.begin(); - it != xnames.end(); it++) { - map::const_iterator mit; - if ((mit = xtof.find(*it)) != xtof.end()) { - string value; - if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) { - LOGERR(("xattrToMeta: pxattr::get failed for %s, errno %d\n", - (*it).c_str(), errno)); - continue; - } - // Encode should we ? - m_fieldsFromAttrs[mit->second] = value; - } - } -#endif // RCL_USE_XATTR - return true; -} - -// Pool of already known and created handlers. There can be several -// instances for a given mime type (think email attachment in email -// message: 2 rfc822 handlers are needed simulteanously) +// Performance help: we use a pool of already known and created +// handlers. There can be several instances for a given mime type +// (think email attachment in email message: 2 rfc822 handlers are +// needed simulteanously) static multimap o_handlers; /** For mime types set as "internal" in mimeconf: diff --git a/src/internfile/mimehandler.h b/src/internfile/mimehandler.h index a0a7d6b1..2cb36af9 100644 --- a/src/internfile/mimehandler.h +++ b/src/internfile/mimehandler.h @@ -23,10 +23,6 @@ #include using std::string; using std::list; -#ifdef RCL_USE_XATTR -#include -using std::map; -#endif // RCL_USE_XATTR #include @@ -56,9 +52,10 @@ public: // We don't use this for now virtual bool set_document_uri(const std::string &) {return false;} - /// This does the extended attributes thing if enabled and should - /// be called from subclasses. - virtual bool set_document_file(const string &file_path); + /// This does nothing right now but should be called from the + /// subclass method in case we need some common processing one day + /// (was used for xattrs at some point). + virtual bool set_document_file(const string &file_path) {return true;} // Default implementations virtual bool set_document_string(const std::string &) {return false;} @@ -90,22 +87,13 @@ public: m_forPreview = m_havedoc = false; m_defcharset.clear(); m_reason.clear(); -#ifdef RCL_USE_XATTR - m_fieldsFromAttrs.clear(); -#endif // RCL_USE_XATTR } -#ifdef RCL_USE_XATTR - const map& getFieldsFromAttrs() {return m_fieldsFromAttrs;} -#endif // RCL_USE_XATTR protected: bool m_forPreview; string m_defcharset; string m_reason; bool m_havedoc; -#ifdef RCL_USE_XATTR - map m_fieldsFromAttrs; -#endif // RCL_USE_XATTR }; /**