implemented maxmemberkb limit for multidoc (e.g. archive) members
This commit is contained in:
parent
1329265b7b
commit
29fe1e4927
@ -17,6 +17,11 @@ class RclExecM:
|
||||
def __init__(self):
|
||||
self.myname = os.path.basename(sys.argv[0])
|
||||
self.mimetype = ""
|
||||
self.maxmembersize = int(os.environ.get("RECOLL_FILTER_MAXMEMBERKB"))
|
||||
if self.maxmembersize:
|
||||
self.maxmembersize = self.maxmembersize * 1024
|
||||
else:
|
||||
self.maxmembersize = 50 * 1024 * 1024
|
||||
|
||||
def rclog(self, s, doexit = 0, exitvalue = 1):
|
||||
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
||||
|
||||
@ -49,7 +49,7 @@ class RarExtractor:
|
||||
|
||||
if not isdir:
|
||||
try:
|
||||
if rarinfo.file_size > 50 * 1024 * 1024:
|
||||
if rarinfo.file_size > self.em.maxmembersize:
|
||||
self.em.rclog("extractone: entry %s size %d too big" %
|
||||
(ipath, rarinfo.file_size))
|
||||
docdata = ""
|
||||
|
||||
@ -24,7 +24,7 @@ class TarExtractor:
|
||||
docdata = ""
|
||||
try:
|
||||
info = self.tar.getmember(ipath)
|
||||
if info.size > 50 * 1024 * 1024:
|
||||
if info.size > self.em.maxmembersize:
|
||||
# skip
|
||||
docdata = ""
|
||||
self.em.rclog("extractone: entry %s size %d too big" %
|
||||
|
||||
@ -47,7 +47,7 @@ class ZipExtractor:
|
||||
try:
|
||||
info = self.zip.getinfo(ipath)
|
||||
# There could be a 4GB Iso in the zip. We have to set a limit
|
||||
if info.file_size > 50 * 1024*1024:
|
||||
if info.file_size > self.em.maxmembersize:
|
||||
self.em.rclog("extractone: entry %s size %d too big" %
|
||||
(ipath, info.file_size))
|
||||
docdata = ""
|
||||
|
||||
@ -73,6 +73,10 @@ bool MimeHandlerExec::next_document()
|
||||
LOGDEB(("MimeHandlerExec::next_document(): helper known missing\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
int filtermaxseconds = 900;
|
||||
m_config->getConfParam("filtermaxseconds", &filtermaxseconds);
|
||||
|
||||
if (params.empty()) {
|
||||
// Hu ho
|
||||
LOGERR(("MimeHandlerExec::mkDoc: empty params\n"));
|
||||
@ -80,15 +84,11 @@ bool MimeHandlerExec::next_document()
|
||||
return false;
|
||||
}
|
||||
|
||||
int filtermaxseconds = 900;
|
||||
m_config->getConfParam("filtermaxseconds", &filtermaxseconds);
|
||||
|
||||
// Command name
|
||||
string cmd = params.front();
|
||||
|
||||
// Build parameter vector: delete cmd name and add the file name
|
||||
vector<string>::iterator it = params.begin();
|
||||
vector<string>myparams(++it, params.end());
|
||||
vector<string>myparams(params.begin() + 1, params.end());
|
||||
myparams.push_back(m_fn);
|
||||
if (!m_ipath.empty())
|
||||
myparams.push_back(m_ipath);
|
||||
@ -147,13 +147,18 @@ void MimeHandlerExec::finaldetails()
|
||||
{
|
||||
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||
|
||||
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
||||
// cfgFilterOutputCharset comes from the mimeconf filter
|
||||
// definition line If the value is "default", we use the charset
|
||||
// value defined in recoll.conf (which may vary depending on
|
||||
// directory)
|
||||
string& charset = m_metaData[cstr_dj_keycharset];
|
||||
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
|
||||
if (!stringlowercmp("default", charset)) {
|
||||
charset = m_dfltInputCharset;
|
||||
}
|
||||
|
||||
// The output mime type is html except if defined otherwise in the filter
|
||||
// definition.
|
||||
string& mt = m_metaData[cstr_dj_keymt];
|
||||
mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
||||
cfgFilterOutputMtype;
|
||||
|
||||
@ -50,13 +50,18 @@ bool MimeHandlerExecMultiple::startCmd()
|
||||
// Command name
|
||||
string cmd = params.front();
|
||||
|
||||
// Build parameter list: delete cmd name
|
||||
vector<string>::iterator it = params.begin();
|
||||
vector<string>myparams(++it, params.end());
|
||||
m_maxmemberkb = 50000;
|
||||
m_config->getConfParam("maxmemberkb", &m_maxmemberkb);
|
||||
ostringstream oss;
|
||||
oss << "RECOLL_FILTER_MAXMEMBERKB=" << m_maxmemberkb;
|
||||
m_cmd.putenv(oss.str());
|
||||
|
||||
// Start filter
|
||||
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||
"RECOLL_FILTER_FORPREVIEW=no");
|
||||
|
||||
// Build parameter list: delete cmd name
|
||||
vector<string>myparams(params.begin() + 1, params.end());
|
||||
|
||||
if (m_cmd.startExec(cmd, myparams, 1, 1) < 0) {
|
||||
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
|
||||
missingHelper = true;
|
||||
@ -116,6 +121,10 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
||||
return false;
|
||||
}
|
||||
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
|
||||
if (len / 1024 > m_maxmemberkb) {
|
||||
LOGERR(("MHExecMultiple: data len > maxmemberkb\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Hack: check for 'Document:' and read directly the document data
|
||||
// to m_metaData[cstr_dj_keycontent] to avoid an extra copy of the bulky
|
||||
@ -297,7 +306,6 @@ bool MimeHandlerExecMultiple::next_document()
|
||||
(void)txtdcode("mh_execm");
|
||||
}
|
||||
|
||||
|
||||
if (eofnext_received)
|
||||
m_havedoc = false;
|
||||
|
||||
|
||||
@ -107,6 +107,7 @@ private:
|
||||
bool startCmd();
|
||||
bool readDataElement(string& name, string& data);
|
||||
bool m_filefirst;
|
||||
int m_maxmemberkb;
|
||||
};
|
||||
|
||||
#endif /* _MH_EXECM_H_INCLUDED_ */
|
||||
|
||||
@ -590,8 +590,10 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
||||
return;
|
||||
}
|
||||
|
||||
// We are dealing with an inline part of text/plain or text/html type
|
||||
|
||||
// We are dealing with an inline part of text/plain or text/html
|
||||
// type There may be several such parts, which is why we don't
|
||||
// just return a text or html subdoc and let the filter stack
|
||||
// work: we want to concatenate them in place instead
|
||||
|
||||
LOGDEB2(("walkmime: final: body start offset %d, length %d\n",
|
||||
doc->getBodyStartOffset(), doc->getBodyLength()));
|
||||
|
||||
@ -159,6 +159,12 @@ indexallfilenames = 1
|
||||
#
|
||||
# indexedmimetypes = text/html application/pdf
|
||||
|
||||
#
|
||||
# Size limit for archive members. This is passed to the filters in the
|
||||
# environment as RECOLL_FILTER_MAXMEMBERKB
|
||||
#
|
||||
maxmemberkb = 50000
|
||||
|
||||
# Size limit for compressed files. We need to decompress these in a
|
||||
# temporary directory for identification, which can be wasteful in some
|
||||
# cases. Limit the waste. Negative means no limit. 0 results in no
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user