From 54b8bea942131e3351f6ff476c1a8c70dfdc3c1d Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Thu, 8 Aug 2019 13:44:41 +0200 Subject: [PATCH] mbox: new std::stream-based implementation of offset cache ok on windows.. --- src/internfile/mh_mbox.cpp | 698 +++++++++++++++++++------------------ src/internfile/mh_mbox.h | 21 +- 2 files changed, 362 insertions(+), 357 deletions(-) diff --git a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp index 18d77e37..9148a92a 100644 --- a/src/internfile/mh_mbox.cpp +++ b/src/internfile/mh_mbox.cpp @@ -1,4 +1,4 @@ -/* Copyright (C) 2005 J.F.Dockes +/* Copyright (C) 2005-2019 J.F.Dockes * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -14,9 +14,10 @@ * Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include "autoconfig.h" -#include +#include "autoconfig.h" +#define _FILE_OFFSET_BITS 64 + #include #include #include @@ -24,11 +25,11 @@ #include #include #include +#include #include "cstr.h" #include "mimehandler.h" #include "log.h" -#include "readfile.h" #include "mh_mbox.h" #include "smallut.h" #include "rclconfig.h" @@ -37,271 +38,10 @@ #include "pathut.h" using namespace std; -#ifdef _WIN32 -#define fseeko _fseeki64 -#define ftello _ftelli64 -#endif // Define maximum message size for safety. 100MB would seem reasonable static const unsigned int max_mbox_member_size = 100 * 1024 * 1024; -// Automatic fp closing -class FpKeeper { -public: - FpKeeper(FILE **fpp) - : m_fpp(fpp) {} - ~FpKeeper() { - if (m_fpp && *m_fpp) { - fclose(*m_fpp); - *m_fpp = 0; - } - } -private: - FILE **m_fpp; -}; - -static std::mutex o_mcache_mutex; - -/** - * Handles a cache for message numbers to offset translations. Permits direct - * accesses inside big folders instead of having to scan up to the right place - * - * Message offsets are saved to files stored under cfg(mboxcachedir), default - * confdir/mboxcache. Mbox files smaller than cfg(mboxcacheminmbs) are not - * cached. - * Cache files are named as the md5 of the file UDI, which is kept in - * the first block for possible collision detection. The 64 bits - * offsets for all message "From_" lines follow. The format is purely - * binary, values are not even byte-swapped to be proc-idependant. - */ - -#define M_o_b1size 1024 - -class MboxCache { -public: - MboxCache() - : m_ok(false), m_minfsize(0) { - // Can't access rclconfig here, we're a static object, would - // have to make sure it's initialized. - } - - ~MboxCache() {} - - int64_t get_offset(RclConfig *config, const string& udi, int msgnum) { - LOGDEB0("MboxCache::get_offsets: udi [" << udi << "] msgnum " - << msgnum << "\n"); - if (!ok(config)) { - LOGDEB0("MboxCache::get_offsets: init failed\n"); - return -1; - } - std::unique_lock locker(o_mcache_mutex); - string fn = makefilename(udi); - FILE *fp = 0; - if ((fp = fopen(fn.c_str(), "rb")) == 0) { - LOGSYSERR("MboxCache::get_offsets", "open", fn); - return -1; - } - FpKeeper keeper(&fp); - - char blk1[M_o_b1size]; - if (fread(blk1, M_o_b1size, 1, fp) != 1) { - LOGSYSERR("MboxCache::get_offsets", "read blk1", ""); - return -1; - } - ConfSimple cf(string(blk1, M_o_b1size)); - string fudi; - if (!cf.get("udi", fudi) || fudi.compare(udi)) { - LOGINFO("MboxCache::get_offset:badudi fn " << fn << " udi [" - << udi << "], fudi [" << fudi << "]\n"); - return -1; - } - LOGDEB1("MboxCache::get_offsets: reading offsets file at offs " - << cacheoffset(msgnum) << "\n"); - if (fseeko(fp, cacheoffset(msgnum), SEEK_SET) != 0) { - LOGSYSERR("MboxCache::get_offsets", "seek", - lltodecstr(cacheoffset(msgnum))); - return -1; - } - int64_t offset = -1; - size_t ret; - if ((ret = fread(&offset, sizeof(int64_t), 1, fp)) != 1) { - LOGSYSERR("MboxCache::get_offsets", "read", ""); - return -1; - } - LOGDEB0("MboxCache::get_offsets: ret " << offset << "\n"); - return offset; - } - - // Save array of offsets for a given file, designated by Udi - void put_offsets(RclConfig *config, const string& udi, int64_t fsize, - vector& offs) { - LOGDEB0("MboxCache::put_offsets: " << offs.size() << " offsets\n"); - if (!ok(config) || !maybemakedir()) - return; - if (fsize < m_minfsize) { - LOGDEB0("MboxCache::put_offsets: fsize " << fsize << " < minsize " << - m_minfsize << endl); - return; - } - std::unique_lock locker(o_mcache_mutex); - string fn = makefilename(udi); - FILE *fp; - if ((fp = fopen(fn.c_str(), "wb")) == 0) { - LOGSYSERR("MboxCache::put_offsets", "fopen", fn); - return; - } - FpKeeper keeper(&fp); - string blk1("udi="); - blk1.append(udi); - blk1.append(cstr_newline); - blk1.resize(M_o_b1size, 0); - if (fwrite(blk1.c_str(), M_o_b1size, 1, fp) != 1) { - LOGSYSERR("MboxCache::put_offsets", "fwrite blk1", ""); - return; - } - - for (const auto& off : offs) { - LOGDEB1("MboxCache::put_offsets: writing value " << off << - " at offset " << ftello(fp) << endl); - if (fwrite((char*)&off, sizeof(int64_t), 1, fp) != 1) { - LOGSYSERR("MboxCache::put_offsets", "fwrite", ""); - return; - } - } - } - - // Check state, possibly initialize - bool ok(RclConfig *config) { - std::unique_lock locker(o_mcache_mutex); - if (m_minfsize == -1) - return false; - if (!m_ok) { - int minmbs = 5; - config->getConfParam("mboxcacheminmbs", &minmbs); - if (minmbs < 0) { - // minmbs set to negative to disable cache - m_minfsize = -1; - return false; - } - m_minfsize = minmbs * 1000 * 1000; - - m_dir = config->getMboxcacheDir(); - m_ok = true; - } - return m_ok; - } - -private: - bool m_ok; - - // Place where we store things - string m_dir; - // Don't cache smaller files. If -1, don't do anything. - int64_t m_minfsize; - - // Create the cache directory if it does not exist - bool maybemakedir() { - if (!path_makepath(m_dir, 0700)) { - LOGSYSERR("MboxCache::maybemakedir", "path_makepath", m_dir); - return false; - } - return true; - } - // Compute file name from udi - string makefilename(const string& udi) { - string digest, xdigest; - MD5String(udi, digest); - MD5HexPrint(digest, xdigest); - return path_cat(m_dir, xdigest); - } - - // Compute offset in cache file for the mbox offset of msgnum - // Msgnums are from 1 - int64_t cacheoffset(int msgnum) { - return M_o_b1size + (msgnum-1) * sizeof(int64_t); - } -}; - -static class MboxCache o_mcache; - -static const string cstr_keyquirks("mhmboxquirks"); - -MimeHandlerMbox::~MimeHandlerMbox() -{ - clear(); -} - -void MimeHandlerMbox::clear_impl() -{ - m_fn.erase(); - if (m_vfp) { - fclose((FILE *)m_vfp); - m_vfp = 0; - } - m_msgnum = m_lineno = 0; - m_ipath.erase(); - m_offsets.clear(); -} - -bool MimeHandlerMbox::set_document_file_impl(const string& mt, const string &fn) -{ - LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n"); - m_fn = fn; - if (m_vfp) { - fclose((FILE *)m_vfp); - m_vfp = 0; - } - - m_vfp = fopen(fn.c_str(), "rb"); - if (m_vfp == 0) { - LOGSYSERR("MimeHandlerMail::set_document_file", "fopen rb", fn); - return false; - } -#if defined O_NOATIME && O_NOATIME != 0 - if (fcntl(fileno((FILE *)m_vfp), F_SETFL, O_NOATIME) < 0) { - // perror("fcntl"); - } -#endif - // Used to use ftell() here: no good beyond 2GB - m_fsize = path_filesize(fn); - m_havedoc = true; - m_offsets.clear(); - m_quirks = 0; - - // Check for location-based quirks: - string quirks; - if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) { - if (quirks == "tbird") { - LOGDEB("MimeHandlerMbox: setting quirks TBIRD\n"); - m_quirks |= MBOXQUIRK_TBIRD; - } - } - - // And double check for thunderbird - string tbirdmsf = fn + ".msf"; - if ((m_quirks&MBOXQUIRK_TBIRD) == 0 && path_exists(tbirdmsf)) { - LOGDEB("MimeHandlerMbox: detected unconfigured tbird mbox in " << fn << - "\n"); - m_quirks |= MBOXQUIRK_TBIRD; - } - - return true; -} - -#define LL 1024 -typedef char line_type[LL+10]; -static inline void stripendnl(line_type& line, int& ll) -{ - ll = int(strlen(line)); - while (ll > 0) { - if (line[ll-1] == '\n' || line[ll-1] == '\r') { - line[ll-1] = 0; - ll--; - } else - break; - } -} - // The mbox format uses lines beginning with 'From ' as separator. // Mailers are supposed to quote any other lines beginning with // 'From ', turning it into '>From '. This should make it easy to detect @@ -365,98 +105,377 @@ static const string miniTbirdFrom{"^From $"}; static SimpleRegexp fromregex(frompat, SimpleRegexp::SRE_NOSUB); static SimpleRegexp minifromregex(miniTbirdFrom, SimpleRegexp::SRE_NOSUB); +static std::mutex o_mcache_mutex; + +/** + * Handles a cache for message numbers to offset translations. Permits direct + * accesses inside big folders instead of having to scan up to the right place + * + * Message offsets are saved to files stored under cfg(mboxcachedir), default + * confdir/mboxcache. Mbox files smaller than cfg(mboxcacheminmbs) are not + * cached. + * Cache files are named as the md5 of the file UDI, which is kept in + * the first block for possible collision detection. The 64 bits + * offsets for all message "From_" lines follow. The format is purely + * binary, values are not even byte-swapped to be proc-idependant. + */ + +#define M_o_b1size 1024 + +class MboxCache { +public: + MboxCache() { + // Can't access rclconfig here, we're a static object, would + // have to make sure it's initialized. + } + + ~MboxCache() {} + + int64_t get_offset(RclConfig *config, const string& udi, int msgnum) { + LOGDEB0("MboxCache::get_offsets: udi [" << udi << "] msgnum " + << msgnum << "\n"); + if (!ok(config)) { + LOGDEB("MboxCache::get_offsets: init failed\n"); + return -1; + } + std::unique_lock locker(o_mcache_mutex); + string fn = makefilename(udi); + ifstream instream(fn.c_str(), std::ifstream::binary); + if (!instream.good()) { + LOGSYSERR("MboxCache::get_offsets", "open", fn); + return false; + } + char blk1[M_o_b1size]; + instream.read(blk1, M_o_b1size); + if (!instream.good()) { + LOGSYSERR("MboxCache::get_offsets", "read blk1", ""); + return -1; + } + ConfSimple cf(string(blk1, M_o_b1size)); + string fudi; + if (!cf.get("udi", fudi) || fudi.compare(udi)) { + LOGINFO("MboxCache::get_offset:badudi fn " << fn << " udi [" + << udi << "], fudi [" << fudi << "]\n"); + return -1; + } + LOGDEB1("MboxCache::get_offsets: reading offsets file at offs " + << cacheoffset(msgnum) << "\n"); + instream.seekg(cacheoffset(msgnum)); + if (!instream.good()) { + LOGSYSERR("MboxCache::get_offsets", "seek", + lltodecstr(cacheoffset(msgnum))); + return -1; + } + int64_t offset = -1; + instream.read((char*)&offset, sizeof(int64_t)); + if (!instream.good()) { + LOGSYSERR("MboxCache::get_offsets", "read", ""); + return -1; + } + LOGDEB0("MboxCache::get_offsets: ret " << offset << "\n"); + return offset; + } + + // Save array of offsets for a given file, designated by Udi + void put_offsets(RclConfig *config, const string& udi, int64_t fsize, + vector& offs) { + LOGDEB0("MboxCache::put_offsets: " << offs.size() << " offsets\n"); + if (!ok(config) || !maybemakedir()) + return; + if (fsize < m_minfsize) { + LOGDEB0("MboxCache::put_offsets: fsize " << fsize << " < minsize " + << m_minfsize << endl); + return; + } + std::unique_lock locker(o_mcache_mutex); + string fn = makefilename(udi); + std::ofstream os(fn.c_str(), std::ios::out|std::ios::binary); + if (!os.good()) { + LOGSYSERR("MboxCache::put_offsets", "open", fn); + return; + } + string blk1("udi="); + blk1.append(udi); + blk1.append(cstr_newline); + blk1.resize(M_o_b1size, 0); + os.write(blk1.c_str(), M_o_b1size); + if (!os.good()) { + LOGSYSERR("MboxCache::put_offsets", "write blk1", ""); + return; + } + + for (const auto& off : offs) { + LOGDEB1("MboxCache::put_offsets: writing value " << off << + " at offset " << ftello(fp) << endl); + os.write((char*)&off, sizeof(int64_t)); + if (!os.good()) { + LOGSYSERR("MboxCache::put_offsets", "write", ""); + return; + } + } + os.flush(); + if (!os.good()) { + LOGSYSERR("MboxCache::put_offsets", "flush", ""); + return; + } + } + + // Check state, possibly initialize + bool ok(RclConfig *config) { + std::unique_lock locker(o_mcache_mutex); + if (m_minfsize == -1) + return false; + if (!m_ok) { + int minmbs = 5; + config->getConfParam("mboxcacheminmbs", &minmbs); + if (minmbs < 0) { + // minmbs set to negative to disable cache + m_minfsize = -1; + return false; + } + m_minfsize = minmbs * 1000 * 1000; + + m_dir = config->getMboxcacheDir(); + m_ok = true; + } + return m_ok; + } + +private: + bool m_ok{false}; + // Place where we store things + string m_dir; + // Don't cache smaller files. If -1, don't do anything. + int64_t m_minfsize{0}; + + // Create the cache directory if it does not exist + bool maybemakedir() { + if (!path_makepath(m_dir, 0700)) { + LOGSYSERR("MboxCache::maybemakedir", "path_makepath", m_dir); + return false; + } + return true; + } + // Compute file name from udi + string makefilename(const string& udi) { + string digest, xdigest; + MD5String(udi, digest); + MD5HexPrint(digest, xdigest); + return path_cat(m_dir, xdigest); + } + // Compute offset in cache file for the mbox offset of msgnum + // Msgnums are from 1 + int64_t cacheoffset(int msgnum) { + return M_o_b1size + (msgnum-1) * sizeof(int64_t); + } +}; + +static class MboxCache o_mcache; + +static const string cstr_keyquirks("mhmboxquirks"); + +enum Quirks {MBOXQUIRK_TBIRD=1}; + +class MimeHandlerMbox::Internal { +public: + Internal(MimeHandlerMbox *p) : pthis(p) {} + std::string fn; // File name + std::string ipath; + ifstream instream; + int msgnum{0}; // Current message number in folder. Starts at 1 + int64_t lineno{0}; // debug + int64_t fsize{0}; + std::vector offsets; + int quirks; + MimeHandlerMbox *pthis; + bool tryUseCache(int mtarg); +}; + +MimeHandlerMbox::MimeHandlerMbox(RclConfig *cnf, const std::string& id) + : RecollFilter(cnf, id) +{ + m = new Internal(this); +} + +MimeHandlerMbox::~MimeHandlerMbox() +{ + if (m) { + clear(); + delete m; + } +} + +void MimeHandlerMbox::clear_impl() +{ + m->fn.erase(); + m->ipath.erase(); + m->instream = ifstream(); + m->msgnum = m->lineno = m->fsize = 0; + m->offsets.clear(); + m->quirks = 0; +} + +bool MimeHandlerMbox::skip_to_document(const std::string& ipath) { + m->ipath = ipath; + return true; +} + +bool MimeHandlerMbox::set_document_file_impl(const string& mt, const string &fn) +{ + LOGDEB("MimeHandlerMbox::set_document_file(" << fn << ")\n"); + clear_impl(); + m->fn = fn; + m->instream = ifstream(fn.c_str(), std::ifstream::binary); + if (!m->instream.good()) { + LOGSYSERR("MimeHandlerMail::set_document_file", "ifstream", fn); + return false; + } + + // TBD +#if 0 && defined O_NOATIME && O_NOATIME != 0 + if (fcntl(fileno((FILE *)m->vfp), F_SETFL, O_NOATIME) < 0) { + // perror("fcntl"); + } +#endif + + m->fsize = path_filesize(fn); + m_havedoc = true; + + // Check for location-based quirks: + string quirks; + if (m_config && m_config->getConfParam(cstr_keyquirks, quirks)) { + if (quirks == "tbird") { + LOGDEB("MimeHandlerMbox: setting quirks TBIRD\n"); + m->quirks |= MBOXQUIRK_TBIRD; + } + } + + // And double check for thunderbird + string tbirdmsf = fn + ".msf"; + if (!(m->quirks & MBOXQUIRK_TBIRD) && path_exists(tbirdmsf)) { + LOGDEB("MimeHandlerMbox: detected unconf'd tbird mbox in "<< fn <<"\n"); + m->quirks |= MBOXQUIRK_TBIRD; + } + + return true; +} + +bool MimeHandlerMbox::Internal::tryUseCache(int mtarg) +{ + bool cachefound = false; + string line; + int64_t off; + + LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << mtarg << " m_udi[" << + pthis->m_udi << "]\n"); + if (pthis->m_udi.empty()) { + goto out; + } + if ((off = o_mcache.get_offset(pthis->m_config, pthis->m_udi, mtarg)) < 0) { + goto out; + } + instream.seekg(off); + if (!instream.good()) { + LOGSYSERR("tryUseCache", "seekg", ""); + goto out; + } + getline(instream, line, '\n'); + if (!instream.good()) { + LOGSYSERR("tryUseCache", "getline", ""); + goto out; + } + LOGDEB1("MimeHandlerMbox::tryUseCache:getl ok. line:[" << line << "]\n"); + + if ((fromregex(line) || + ((quirks & MBOXQUIRK_TBIRD) && minifromregex(line))) ) { + LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n"); + instream.seekg(off); + msgnum = mtarg -1; + cachefound = true; + } else { + LOGDEB0("MimeHandlerMbox: cache: regex failed for [" << line << "]\n"); + } + +out: + if (!cachefound) { + // No cached result: scan. + instream.seekg(0); + msgnum = 0; + } + return cachefound; +} + bool MimeHandlerMbox::next_document() { - if (m_vfp == 0) { + if (!m->instream.good()) { LOGERR("MimeHandlerMbox::next_document: not open\n"); return false; } if (!m_havedoc) { return false; } - FILE *fp = (FILE *)m_vfp; + int mtarg = 0; - if (!m_ipath.empty()) { - sscanf(m_ipath.c_str(), "%d", &mtarg); + if (!m->ipath.empty()) { + sscanf(m->ipath.c_str(), "%d", &mtarg); } else if (m_forPreview) { // Can't preview an mbox. LOGDEB("MimeHandlerMbox::next_document: can't preview folders!\n"); return false; } - LOGDEB0("MimeHandlerMbox::next_document: fn " << m_fn << ", msgnum " << - m_msgnum << " mtarg " << mtarg << " \n"); + LOGDEB0("MimeHandlerMbox::next_document: fn " << m->fn << ", msgnum " << + m->msgnum << " mtarg " << mtarg << " \n"); + if (mtarg == 0) mtarg = -1; - - // If we are called to retrieve a specific message, seek to bof - // (then scan up to the message). This is for the case where the - // same object is reused to fetch several messages (else the fp is - // just opened no need for a seek). We could also check if the - // current message number is lower than the requested one and - // avoid rereading the whole thing in this case. But I'm not sure - // we're ever used in this way (multiple retrieves on same - // object). So: + // If we are called to retrieve a specific message, try to use the + // offsets cache to try and position to the right header. bool storeoffsets = true; if (mtarg > 0) { - int64_t off; - line_type line; - LOGDEB0("MimeHandlerMbox::next_doc: mtarg " << mtarg << " m_udi[" << - m_udi << "]\n"); - if (!m_udi.empty()) { - LOGDEB("MimeHandlerMbox::next_doc: udi not empty\n"); - if ((off = o_mcache.get_offset(m_config, m_udi, mtarg)) >= 0) { - LOGDEB1("MimeHandlerMbox::next_doc: got offset " << off << - " from cache\n"); - if (fseeko(fp, off, SEEK_SET) >= 0) { - LOGDEB1("MimeHandlerMbox::next_doc: fseeko ok\n"); - if (fgets(line, LL, fp)) { - LOGDEB1("MimeHandlerMbox::next_doc: fgets ok. line:[" << - line << "]\n"); - if ((fromregex(line) || ((m_quirks & MBOXQUIRK_TBIRD) && - minifromregex(line))) ) { - LOGDEB0("MimeHandlerMbox: Cache: From_ Ok\n"); - fseeko(fp, off, SEEK_SET); - m_msgnum = mtarg -1; - storeoffsets = false; - } else { - LOGDEB0("MimeHandlerMbox: cache: regex failed\n"); - } - } - } - } - } - if (storeoffsets) { - // No cached result: scan. - fseek(fp, 0, SEEK_SET); - m_msgnum = 0; - } + storeoffsets = !m->tryUseCache(mtarg); } - off_t message_end = 0; + int64_t message_end = 0; bool iseof = false; bool hademptyline = true; string& msgtxt = m_metaData[cstr_dj_keycontent]; msgtxt.erase(); - line_type line; + string line; for (;;) { - message_end = ftello(fp); - if (!fgets(line, LL, fp)) { - LOGDEB2("MimeHandlerMbox:next: eof\n"); + message_end = m->instream.tellg(); + getline(m->instream, line, '\n'); + if (!m->instream.good()) { + ifstream::iostate st = m->instream.rdstate(); + if (st & std::ifstream::eofbit) { + LOGDEB0("MimeHandlerMbox:next: eof\n"); + } + if (st & std::ifstream::failbit) { + LOGDEB0("MimeHandlerMbox:next: fail\n"); + LOGSYSERR("MimeHandlerMbox:next:", "", ""); + } + if (st & std::ifstream::badbit) { + LOGDEB0("MimeHandlerMbox:next: bad\n"); + LOGSYSERR("MimeHandlerMbox:next:", "", ""); + } + if (st & std::ifstream::goodbit) { + LOGDEB0("MimeHandlerMbox:next: good\n"); + } + LOGDEB0("MimeHandlerMbox:next: eof at " << message_end << endl); iseof = true; - m_msgnum++; + m->msgnum++; break; } - m_lineno++; - int ll; - stripendnl(line, ll); + m->lineno++; + rtrimstring(line, "\r\n"); + int ll = line.size(); LOGDEB2("mhmbox:next: hadempty " << hademptyline << " lineno " << - m_lineno << " ll " << ll << " Line: [" << line << "]\n"); + m->lineno << " ll " << ll << " Line: [" << line << "]\n"); if (hademptyline) { if (ll > 0) { // Non-empty line with empty line flag set, reset flag // and check regex. - if (!(m_quirks & MBOXQUIRK_TBIRD)) { + if (!(m->quirks & MBOXQUIRK_TBIRD)) { // Tbird sometimes ommits the empty line, so avoid // resetting state (initially true) and hope for // the best @@ -466,18 +485,18 @@ bool MimeHandlerMbox::next_document() A LOT */ if (line[0] == 'F' && ( fromregex(line) || - ((m_quirks & MBOXQUIRK_TBIRD) && minifromregex(line))) + ((m->quirks & MBOXQUIRK_TBIRD) && minifromregex(line))) ) { - LOGDEB0("MimeHandlerMbox: msgnum " << m_msgnum << - ", From_ at line " << m_lineno << ": [" << line - << "]\n"); + LOGDEB1("MimeHandlerMbox: msgnum " << m->msgnum << + ", From_ at line " << m->lineno << " foffset " << + message_end << " line: [" << line << "]\n"); + if (storeoffsets) { - LOGDEB1("Pushing offset: " << message_end << endl); - m_offsets.push_back(message_end); + m->offsets.push_back(message_end); } - m_msgnum++; - if ((mtarg <= 0 && m_msgnum > 1) || - (mtarg > 0 && m_msgnum > mtarg)) { + m->msgnum++; + if ((mtarg <= 0 && m->msgnum > 1) || + (mtarg > 0 && m->msgnum > mtarg)) { // Got message, go do something with it break; } @@ -489,15 +508,14 @@ bool MimeHandlerMbox::next_document() hademptyline = true; } - if (mtarg <= 0 || m_msgnum == mtarg) { + if (mtarg <= 0 || m->msgnum == mtarg) { // Accumulate message lines - line[ll] = '\n'; - line[ll+1] = 0; + line += '\n'; msgtxt += line; if (msgtxt.size() > max_mbox_member_size) { LOGERR("mh_mbox: huge message (more than " << max_mbox_member_size/(1024*1024) << " MB) inside " << - m_fn << ", giving up\n"); + m->fn << ", giving up\n"); return false; } } @@ -505,16 +523,16 @@ bool MimeHandlerMbox::next_document() LOGDEB2("Message text length " << msgtxt.size() << "\n"); LOGDEB2("Message text: [" << msgtxt << "]\n"); char buf[20]; - // m_msgnum was incremented when hitting the next From_ or eof, so the data - // is for m_msgnum - 1 - sprintf(buf, "%d", m_msgnum - 1); + // m->msgnum was incremented when hitting the next From_ or eof, so the data + // is for m->msgnum - 1 + sprintf(buf, "%d", m->msgnum - 1); m_metaData[cstr_dj_keyipath] = buf; m_metaData[cstr_dj_keymt] = "message/rfc822"; if (iseof) { LOGDEB2("MimeHandlerMbox::next: eof hit\n"); m_havedoc = false; if (!m_udi.empty() && storeoffsets) { - o_mcache.put_offsets(m_config, m_udi, m_fsize, m_offsets); + o_mcache.put_offsets(m_config, m_udi, m->fsize, m->offsets); } } return msgtxt.empty() ? false : true; diff --git a/src/internfile/mh_mbox.h b/src/internfile/mh_mbox.h index a89521f3..7e48ac58 100644 --- a/src/internfile/mh_mbox.h +++ b/src/internfile/mh_mbox.h @@ -30,32 +30,19 @@ */ class MimeHandlerMbox : public RecollFilter { public: - MimeHandlerMbox(RclConfig *cnf, const std::string& id) - : RecollFilter(cnf, id), m_vfp(0), m_msgnum(0), - m_lineno(0), m_fsize(0) { - } + MimeHandlerMbox(RclConfig *cnf, const std::string& id); virtual ~MimeHandlerMbox(); virtual bool next_document() override; - virtual bool skip_to_document(const std::string& ipath) override{ - m_ipath = ipath; - return true; - } + virtual bool skip_to_document(const std::string& ipath) override; virtual void clear_impl() override; protected: virtual bool set_document_file_impl(const std::string&, const std::string&) override; + class Internal; private: - std::string m_fn; // File name - void *m_vfp; // File pointer for folder - int m_msgnum; // Current message number in folder. Starts at 1 - std::string m_ipath; - int64_t m_lineno; // debug - int64_t m_fsize; - std::vector m_offsets; - enum Quirks {MBOXQUIRK_TBIRD=1}; - int m_quirks; + Internal *m{nullptr}; }; #endif /* _MBOX_H_INCLUDED_ */