Ensure that GUI "Open" works with firefox/beagle cache data: use the backend-agnostic internfile interface in iDocToFile

This commit is contained in:
Jean-Francois Dockes 2010-06-09 16:34:59 +02:00
parent a076380695
commit 162cb7fe98
2 changed files with 50 additions and 30 deletions

View File

@ -901,14 +901,6 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
{ {
LOGDEB(("FileInterner::idocToFile\n")); LOGDEB(("FileInterner::idocToFile\n"));
idoc.dump(); idoc.dump();
string fn = urltolocalpath(idoc.url);
string ipath = idoc.ipath;
string mtype = idoc.mimetype;
struct stat st;
if (stat(fn.c_str(), &st) < 0) {
LOGERR(("FileInterner::idocToFile: can't stat [%s]\n", fn.c_str()));
return false;
}
string tmpdir, reason; string tmpdir, reason;
if (!maketmpdir(tmpdir, reason)) if (!maketmpdir(tmpdir, reason))
@ -918,20 +910,32 @@ bool FileInterner::idocToFile(TempFile& otemp, const string& tofile,
// We set FIF_forPreview for consistency with the previous version // We set FIF_forPreview for consistency with the previous version
// which determined this by looking at mtype!=null. Probably // which determined this by looking at mtype!=null. Probably
// doesn't change anything in this case. // doesn't change anything in this case.
FileInterner interner(fn, &st, cnf, tmpdir, FIF_forPreview, &mtype); FileInterner interner(idoc, cnf, tmpdir, FIF_forPreview);
interner.setTargetMType(mtype); interner.setTargetMType(idoc.mimetype);
Rcl::Doc doc; Rcl::Doc doc;
string mipath = ipath; string mipath = idoc.ipath;
Status ret = interner.internfile(doc, mipath); Status ret = interner.internfile(doc, mipath);
if (ret == FileInterner::FIError) { if (ret == FileInterner::FIError) {
LOGERR(("FileInterner::idocToFile: internfile() failed\n")); LOGERR(("FileInterner::idocToFile: internfile() failed\n"));
return false; return false;
} }
// Specialcase text/html. This is to work around a bug that will
// get fixed some day: internfile initialisation does not check
// targetmtype, so that at least one conversion is always
// performed. A common case would be an "Open" on an html file
// (we'd end up with text/plain content). As the html version is
// saved in this case, use it.
if (!stringlowercmp("text/html", idoc.mimetype) &&
!interner.get_html().empty()) {
doc.text = interner.get_html();
doc.mimetype = "text/html";
}
string filename; string filename;
TempFile temp; TempFile temp;
if (tofile.empty()) { if (tofile.empty()) {
TempFile temp1(new TempFileInternal(cnf->getSuffixFromMimeType(mtype))); TempFile temp1(new TempFileInternal(cnf->getSuffixFromMimeType(idoc.mimetype)));
temp = temp1; temp = temp1;
if (!temp->ok()) { if (!temp->ok()) {
LOGERR(("FileInterner::idocToFile: cant create temporary file")); LOGERR(("FileInterner::idocToFile: cant create temporary file"));

View File

@ -41,11 +41,26 @@ class Doc;
struct stat; struct stat;
/** /**
* A class to convert a file into possibly multiple documents in internal * A class to convert data from a datastore (file-system, firefox
* representation. * history, etc.) into possibly one or severaldocuments in internal
* representation, either for indexing or viewing at query time (gui preview).
* Things work a little differently when indexing or previewing:
* - When indexing, all data has to come from the datastore, and it is
* normally desired that all found subdocuments be returned (ie:
* all messages and attachments out of a single file mail folder)
* - When previewing, some data is taken from the index (ie: the mime type
* is already known, and a single document usually needs to be processed,
* so that the full doc identifier is passed in: high level url
* (ie: file path) and internal identifier: ipath, ie: message and
* attachment number.
*/ */
class FileInterner { class FileInterner {
public: public:
/// Operation modifier flags
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
/// Return values for internfile()
enum Status {FIError, FIDone, FIAgain};
/** /**
* Get immediate parent for document. * Get immediate parent for document.
* *
@ -55,14 +70,15 @@ class FileInterner {
*/ */
static bool getEnclosing(const string &url, const string &ipath, static bool getEnclosing(const string &url, const string &ipath,
string &eurl, string &eipath, string& udi); string &eurl, string &eipath, string& udi);
/// Operation modifier flags
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype}; /** Constructors take the initial step to preprocess the data object and
* create the top filter */
/** /**
* Identify and possibly decompress file, and create the top filter * Identify and possibly decompress file, and create the top filter.
* The mtype parameter is not always set (it is when the object is * - The mtype parameter is not always set (it is when the object is
* created for previewing a file). Filter output may be * created for previewing a file).
* different for previewing and indexing. * - Filter output may be different for previewing and indexing.
* *
* @param fn file name * @param fn file name
* @param stp pointer to updated stat struct. * @param stp pointer to updated stat struct.
@ -95,9 +111,6 @@ class FileInterner {
~FileInterner(); ~FileInterner();
/// Return values for internfile()
enum Status {FIError, FIDone, FIAgain};
/** /**
* Turn file or file part into Recoll document. * Turn file or file part into Recoll document.
* *
@ -106,14 +119,16 @@ class FileInterner {
* @param doc output document * @param doc output document
* @param ipath internal path. If set by caller, the specified subdoc will * @param ipath internal path. If set by caller, the specified subdoc will
* be returned. Else the next document according to current state will * be returned. Else the next document according to current state will
* be returned, and the internal path will be set. * be returned, and ipath will be set on output.
* @return FIError and FIDone are self-explanatory. If FIAgain is returned, * @return FIError and FIDone are self-explanatory. If FIAgain is returned,
* this is a multi-document file, with more subdocs, and internfile() * this is a multi-document file, with more subdocs, and internfile()
* should be called again to get the following one(s). * should be called again to get the following one(s).
*/ */
Status internfile(Rcl::Doc& doc, string &ipath); Status internfile(Rcl::Doc& doc, string &ipath);
/** Return the file's mimetype (useful for container files) */ /** Return the file's (top level object) mimetype (useful for
* container files)
*/
const string& getMimetype() {return m_mimetype;} const string& getMimetype() {return m_mimetype;}
/** We normally always return text/plain data. A caller can request /** We normally always return text/plain data. A caller can request
@ -122,7 +137,9 @@ class FileInterner {
*/ */
void setTargetMType(const string& tp) {m_targetMType = tp;} void setTargetMType(const string& tp) {m_targetMType = tp;}
/* In case we see an html version, it's set aside and can be recovered */ /** In case we see an html version while converting, it is set aside
* and can be recovered
*/
const string& get_html() {return m_html;} const string& get_html() {return m_html;}
/** Extract internal document into temporary file. /** Extract internal document into temporary file.
@ -133,9 +150,8 @@ class FileInterner {
* away magically). Only used if tofile.empty() * away magically). Only used if tofile.empty()
* @param tofile output file if not null * @param tofile output file if not null
* @param cnf The recoll config * @param cnf The recoll config
* @param fn The main document from which to extract * @param doc Doc data taken from the index. We use it to access the
* @param ipath The internal path to the subdoc * actual document (ie: use mtype, fn, ipath...).
* @param mtype The target mime type (we don't want to decode to text!)
*/ */
static bool idocToFile(TempFile& temp, const string& tofile, static bool idocToFile(TempFile& temp, const string& tofile,
RclConfig *cnf, const Rcl::Doc& doc); RclConfig *cnf, const Rcl::Doc& doc);