Index: improve processing/rejection for binary files disguising as scripts (ie: shar archives). Use "internal text/plain" instead of "exec rcltext" for script files so that normal text/plain processing is done (max size, splits). Reject text if more than 25% iconv errors

This commit is contained in:
Jean-Francois Dockes 2011-03-01 08:39:30 +01:00
parent dbc7bc28da
commit 292859a3ac
3 changed files with 17 additions and 9 deletions

View File

@ -117,10 +117,15 @@ bool MimeHandlerText::next_document()
// this validates the encoding.
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
m_dfltInputCharset.c_str()));
if (!transcode(m_text, m_metaData["content"], m_dfltInputCharset, "UTF-8")) {
int ecnt;
bool ret;
string& itext = m_metaData["content"];
if (!(ret=transcode(m_text, itext, m_dfltInputCharset, "UTF-8", &ecnt)) ||
ecnt > int(itext.size() / 4)) {
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
"for charset [%s]\n", m_dfltInputCharset.c_str()));
m_metaData["content"].erase();
"for input charset [%s] ret %d ecnt %d\n",
m_dfltInputCharset.c_str(), ret, ecnt));
itext.erase();
return false;
}
m_metaData["origcharset"] = m_dfltInputCharset;

View File

@ -53,6 +53,7 @@ static multimap<string, Dijon::Filter*> o_handlers;
* create appropriate handler object. */
static Dijon::Filter *mhFactory(const string &mime)
{
LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
string lmime(mime);
stringtolower(lmime);
if ("text/plain" == lmime) {
@ -123,7 +124,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
if (attrs.get("mimetype", value))
h->cfgFilterOutputMtype = stringtolower((const string&)value);
#if 1
#if 0
string scmd;
for (it = h->params.begin(); it != h->params.end(); it++) {
scmd += string("[") + *it + "] ";
@ -202,9 +203,11 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
// partly redundant with the localfields/rclaptg, but
// better and the latter will probably go away at some
// point in the future
LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
if (!cmdstr.empty())
h = mhFactory(cmdstr);
h = mhFactory(mtype);
else
h = mhFactory(mtype);
goto out;
} else if (!stringlowercmp("dll", handlertype)) {
} else {

View File

@ -65,15 +65,15 @@ application/vnd.sun.xml.writer.global = exec rclsoff
application/vnd.sun.xml.writer.template = exec rclsoff
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-abiword = exec rclabw
application/x-awk = exec rcltext; charset=default
application/x-awk = internal text/plain
application/x-dvi = exec rcldvi
application/x-flac = execm rclaudio
application/x-gnuinfo = execm rclinfo
application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx
application/x-perl = exec rcltext; charset=default
application/x-perl = internal text/plain
application/x-scribus = exec rclscribus
application/x-shellscript = exec rcltext; charset=default
application/x-shellscript = internal text/plain
application/x-tex = exec rcltex
text/x-tex = exec rcltex
application/x-chm = execm rclchm
@ -103,7 +103,7 @@ text/x-man = exec rclman
text/x-purple-log = exec rclpurple
text/x-purple-html-log = internal text/html
text/x-python = exec rclpython
text/x-shellscript = internal
text/x-shellscript = internal text/plain
## #############################################
# Icons to be used in the result list if required by gui config