implemented maxmemberkb limit for multidoc (e.g. archive) members
This commit is contained in:
parent
1329265b7b
commit
29fe1e4927
@ -17,6 +17,11 @@ class RclExecM:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.myname = os.path.basename(sys.argv[0])
|
self.myname = os.path.basename(sys.argv[0])
|
||||||
self.mimetype = ""
|
self.mimetype = ""
|
||||||
|
self.maxmembersize = int(os.environ.get("RECOLL_FILTER_MAXMEMBERKB"))
|
||||||
|
if self.maxmembersize:
|
||||||
|
self.maxmembersize = self.maxmembersize * 1024
|
||||||
|
else:
|
||||||
|
self.maxmembersize = 50 * 1024 * 1024
|
||||||
|
|
||||||
def rclog(self, s, doexit = 0, exitvalue = 1):
|
def rclog(self, s, doexit = 0, exitvalue = 1):
|
||||||
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
|
||||||
|
|||||||
@ -49,7 +49,7 @@ class RarExtractor:
|
|||||||
|
|
||||||
if not isdir:
|
if not isdir:
|
||||||
try:
|
try:
|
||||||
if rarinfo.file_size > 50 * 1024 * 1024:
|
if rarinfo.file_size > self.em.maxmembersize:
|
||||||
self.em.rclog("extractone: entry %s size %d too big" %
|
self.em.rclog("extractone: entry %s size %d too big" %
|
||||||
(ipath, rarinfo.file_size))
|
(ipath, rarinfo.file_size))
|
||||||
docdata = ""
|
docdata = ""
|
||||||
|
|||||||
@ -24,7 +24,7 @@ class TarExtractor:
|
|||||||
docdata = ""
|
docdata = ""
|
||||||
try:
|
try:
|
||||||
info = self.tar.getmember(ipath)
|
info = self.tar.getmember(ipath)
|
||||||
if info.size > 50 * 1024 * 1024:
|
if info.size > self.em.maxmembersize:
|
||||||
# skip
|
# skip
|
||||||
docdata = ""
|
docdata = ""
|
||||||
self.em.rclog("extractone: entry %s size %d too big" %
|
self.em.rclog("extractone: entry %s size %d too big" %
|
||||||
|
|||||||
@ -47,7 +47,7 @@ class ZipExtractor:
|
|||||||
try:
|
try:
|
||||||
info = self.zip.getinfo(ipath)
|
info = self.zip.getinfo(ipath)
|
||||||
# There could be a 4GB Iso in the zip. We have to set a limit
|
# There could be a 4GB Iso in the zip. We have to set a limit
|
||||||
if info.file_size > 50 * 1024*1024:
|
if info.file_size > self.em.maxmembersize:
|
||||||
self.em.rclog("extractone: entry %s size %d too big" %
|
self.em.rclog("extractone: entry %s size %d too big" %
|
||||||
(ipath, info.file_size))
|
(ipath, info.file_size))
|
||||||
docdata = ""
|
docdata = ""
|
||||||
|
|||||||
@ -73,6 +73,10 @@ bool MimeHandlerExec::next_document()
|
|||||||
LOGDEB(("MimeHandlerExec::next_document(): helper known missing\n"));
|
LOGDEB(("MimeHandlerExec::next_document(): helper known missing\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int filtermaxseconds = 900;
|
||||||
|
m_config->getConfParam("filtermaxseconds", &filtermaxseconds);
|
||||||
|
|
||||||
if (params.empty()) {
|
if (params.empty()) {
|
||||||
// Hu ho
|
// Hu ho
|
||||||
LOGERR(("MimeHandlerExec::mkDoc: empty params\n"));
|
LOGERR(("MimeHandlerExec::mkDoc: empty params\n"));
|
||||||
@ -80,15 +84,11 @@ bool MimeHandlerExec::next_document()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int filtermaxseconds = 900;
|
|
||||||
m_config->getConfParam("filtermaxseconds", &filtermaxseconds);
|
|
||||||
|
|
||||||
// Command name
|
// Command name
|
||||||
string cmd = params.front();
|
string cmd = params.front();
|
||||||
|
|
||||||
// Build parameter vector: delete cmd name and add the file name
|
// Build parameter vector: delete cmd name and add the file name
|
||||||
vector<string>::iterator it = params.begin();
|
vector<string>myparams(params.begin() + 1, params.end());
|
||||||
vector<string>myparams(++it, params.end());
|
|
||||||
myparams.push_back(m_fn);
|
myparams.push_back(m_fn);
|
||||||
if (!m_ipath.empty())
|
if (!m_ipath.empty())
|
||||||
myparams.push_back(m_ipath);
|
myparams.push_back(m_ipath);
|
||||||
@ -147,13 +147,18 @@ void MimeHandlerExec::finaldetails()
|
|||||||
{
|
{
|
||||||
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
||||||
|
|
||||||
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
// cfgFilterOutputCharset comes from the mimeconf filter
|
||||||
|
// definition line If the value is "default", we use the charset
|
||||||
|
// value defined in recoll.conf (which may vary depending on
|
||||||
|
// directory)
|
||||||
string& charset = m_metaData[cstr_dj_keycharset];
|
string& charset = m_metaData[cstr_dj_keycharset];
|
||||||
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
|
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
|
||||||
if (!stringlowercmp("default", charset)) {
|
if (!stringlowercmp("default", charset)) {
|
||||||
charset = m_dfltInputCharset;
|
charset = m_dfltInputCharset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The output mime type is html except if defined otherwise in the filter
|
||||||
|
// definition.
|
||||||
string& mt = m_metaData[cstr_dj_keymt];
|
string& mt = m_metaData[cstr_dj_keymt];
|
||||||
mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
||||||
cfgFilterOutputMtype;
|
cfgFilterOutputMtype;
|
||||||
|
|||||||
@ -50,13 +50,18 @@ bool MimeHandlerExecMultiple::startCmd()
|
|||||||
// Command name
|
// Command name
|
||||||
string cmd = params.front();
|
string cmd = params.front();
|
||||||
|
|
||||||
// Build parameter list: delete cmd name
|
m_maxmemberkb = 50000;
|
||||||
vector<string>::iterator it = params.begin();
|
m_config->getConfParam("maxmemberkb", &m_maxmemberkb);
|
||||||
vector<string>myparams(++it, params.end());
|
ostringstream oss;
|
||||||
|
oss << "RECOLL_FILTER_MAXMEMBERKB=" << m_maxmemberkb;
|
||||||
|
m_cmd.putenv(oss.str());
|
||||||
|
|
||||||
// Start filter
|
|
||||||
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
m_cmd.putenv(m_forPreview ? "RECOLL_FILTER_FORPREVIEW=yes" :
|
||||||
"RECOLL_FILTER_FORPREVIEW=no");
|
"RECOLL_FILTER_FORPREVIEW=no");
|
||||||
|
|
||||||
|
// Build parameter list: delete cmd name
|
||||||
|
vector<string>myparams(params.begin() + 1, params.end());
|
||||||
|
|
||||||
if (m_cmd.startExec(cmd, myparams, 1, 1) < 0) {
|
if (m_cmd.startExec(cmd, myparams, 1, 1) < 0) {
|
||||||
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
|
m_reason = string("RECFILTERROR HELPERNOTFOUND ") + cmd;
|
||||||
missingHelper = true;
|
missingHelper = true;
|
||||||
@ -116,6 +121,10 @@ bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
|
LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
|
||||||
|
if (len / 1024 > m_maxmemberkb) {
|
||||||
|
LOGERR(("MHExecMultiple: data len > maxmemberkb\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Hack: check for 'Document:' and read directly the document data
|
// Hack: check for 'Document:' and read directly the document data
|
||||||
// to m_metaData[cstr_dj_keycontent] to avoid an extra copy of the bulky
|
// to m_metaData[cstr_dj_keycontent] to avoid an extra copy of the bulky
|
||||||
@ -297,7 +306,6 @@ bool MimeHandlerExecMultiple::next_document()
|
|||||||
(void)txtdcode("mh_execm");
|
(void)txtdcode("mh_execm");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (eofnext_received)
|
if (eofnext_received)
|
||||||
m_havedoc = false;
|
m_havedoc = false;
|
||||||
|
|
||||||
|
|||||||
@ -107,6 +107,7 @@ private:
|
|||||||
bool startCmd();
|
bool startCmd();
|
||||||
bool readDataElement(string& name, string& data);
|
bool readDataElement(string& name, string& data);
|
||||||
bool m_filefirst;
|
bool m_filefirst;
|
||||||
|
int m_maxmemberkb;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _MH_EXECM_H_INCLUDED_ */
|
#endif /* _MH_EXECM_H_INCLUDED_ */
|
||||||
|
|||||||
@ -590,8 +590,10 @@ void MimeHandlerMail::walkmime(Binc::MimePart* doc, int depth)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We are dealing with an inline part of text/plain or text/html type
|
// We are dealing with an inline part of text/plain or text/html
|
||||||
|
// type There may be several such parts, which is why we don't
|
||||||
|
// just return a text or html subdoc and let the filter stack
|
||||||
|
// work: we want to concatenate them in place instead
|
||||||
|
|
||||||
LOGDEB2(("walkmime: final: body start offset %d, length %d\n",
|
LOGDEB2(("walkmime: final: body start offset %d, length %d\n",
|
||||||
doc->getBodyStartOffset(), doc->getBodyLength()));
|
doc->getBodyStartOffset(), doc->getBodyLength()));
|
||||||
|
|||||||
@ -159,6 +159,12 @@ indexallfilenames = 1
|
|||||||
#
|
#
|
||||||
# indexedmimetypes = text/html application/pdf
|
# indexedmimetypes = text/html application/pdf
|
||||||
|
|
||||||
|
#
|
||||||
|
# Size limit for archive members. This is passed to the filters in the
|
||||||
|
# environment as RECOLL_FILTER_MAXMEMBERKB
|
||||||
|
#
|
||||||
|
maxmemberkb = 50000
|
||||||
|
|
||||||
# Size limit for compressed files. We need to decompress these in a
|
# Size limit for compressed files. We need to decompress these in a
|
||||||
# temporary directory for identification, which can be wasteful in some
|
# temporary directory for identification, which can be wasteful in some
|
||||||
# cases. Limit the waste. Negative means no limit. 0 results in no
|
# cases. Limit the waste. Negative means no limit. 0 results in no
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user