Index: improve processing/rejection for binary files disguising as scripts (ie: shar archives). Use "internal text/plain" instead of "exec rcltext" for script files so that normal text/plain processing is done (max size, splits). Reject text if more than 25% iconv errors

This commit is contained in:
Jean-Francois Dockes 2011-03-01 08:39:30 +01:00
parent dbc7bc28da
commit 292859a3ac
3 changed files with 17 additions and 9 deletions

View File

@ -117,10 +117,15 @@ bool MimeHandlerText::next_document()
// this validates the encoding. // this validates the encoding.
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n", LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
m_dfltInputCharset.c_str())); m_dfltInputCharset.c_str()));
if (!transcode(m_text, m_metaData["content"], m_dfltInputCharset, "UTF-8")) { int ecnt;
bool ret;
string& itext = m_metaData["content"];
if (!(ret=transcode(m_text, itext, m_dfltInputCharset, "UTF-8", &ecnt)) ||
ecnt > int(itext.size() / 4)) {
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed " LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
"for charset [%s]\n", m_dfltInputCharset.c_str())); "for input charset [%s] ret %d ecnt %d\n",
m_metaData["content"].erase(); m_dfltInputCharset.c_str(), ret, ecnt));
itext.erase();
return false; return false;
} }
m_metaData["origcharset"] = m_dfltInputCharset; m_metaData["origcharset"] = m_dfltInputCharset;

View File

@ -53,6 +53,7 @@ static multimap<string, Dijon::Filter*> o_handlers;
* create appropriate handler object. */ * create appropriate handler object. */
static Dijon::Filter *mhFactory(const string &mime) static Dijon::Filter *mhFactory(const string &mime)
{ {
LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
string lmime(mime); string lmime(mime);
stringtolower(lmime); stringtolower(lmime);
if ("text/plain" == lmime) { if ("text/plain" == lmime) {
@ -123,7 +124,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
if (attrs.get("mimetype", value)) if (attrs.get("mimetype", value))
h->cfgFilterOutputMtype = stringtolower((const string&)value); h->cfgFilterOutputMtype = stringtolower((const string&)value);
#if 1 #if 0
string scmd; string scmd;
for (it = h->params.begin(); it != h->params.end(); it++) { for (it = h->params.begin(); it != h->params.end(); it++) {
scmd += string("[") + *it + "] "; scmd += string("[") + *it + "] ";
@ -202,9 +203,11 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
// partly redundant with the localfields/rclaptg, but // partly redundant with the localfields/rclaptg, but
// better and the latter will probably go away at some // better and the latter will probably go away at some
// point in the future // point in the future
LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
if (!cmdstr.empty()) if (!cmdstr.empty())
h = mhFactory(cmdstr); h = mhFactory(cmdstr);
h = mhFactory(mtype); else
h = mhFactory(mtype);
goto out; goto out;
} else if (!stringlowercmp("dll", handlertype)) { } else if (!stringlowercmp("dll", handlertype)) {
} else { } else {

View File

@ -65,15 +65,15 @@ application/vnd.sun.xml.writer.global = exec rclsoff
application/vnd.sun.xml.writer.template = exec rclsoff application/vnd.sun.xml.writer.template = exec rclsoff
application/vnd.wordperfect = exec wpd2html;mimetype=text/html application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-abiword = exec rclabw application/x-abiword = exec rclabw
application/x-awk = exec rcltext; charset=default application/x-awk = internal text/plain
application/x-dvi = exec rcldvi application/x-dvi = exec rcldvi
application/x-flac = execm rclaudio application/x-flac = execm rclaudio
application/x-gnuinfo = execm rclinfo application/x-gnuinfo = execm rclinfo
application/x-kword = exec rclkwd application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx application/x-lyx = exec rcllyx
application/x-perl = exec rcltext; charset=default application/x-perl = internal text/plain
application/x-scribus = exec rclscribus application/x-scribus = exec rclscribus
application/x-shellscript = exec rcltext; charset=default application/x-shellscript = internal text/plain
application/x-tex = exec rcltex application/x-tex = exec rcltex
text/x-tex = exec rcltex text/x-tex = exec rcltex
application/x-chm = execm rclchm application/x-chm = execm rclchm
@ -103,7 +103,7 @@ text/x-man = exec rclman
text/x-purple-log = exec rclpurple text/x-purple-log = exec rclpurple
text/x-purple-html-log = internal text/html text/x-purple-html-log = internal text/html
text/x-python = exec rclpython text/x-python = exec rclpython
text/x-shellscript = internal text/x-shellscript = internal text/plain
## ############################################# ## #############################################
# Icons to be used in the result list if required by gui config # Icons to be used in the result list if required by gui config