Index: improve processing/rejection for binary files disguising as scripts (ie: shar archives). Use "internal text/plain" instead of "exec rcltext" for script files so that normal text/plain processing is done (max size, splits). Reject text if more than 25% iconv errors
This commit is contained in:
parent
dbc7bc28da
commit
292859a3ac
@ -117,10 +117,15 @@ bool MimeHandlerText::next_document()
|
||||
// this validates the encoding.
|
||||
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
|
||||
m_dfltInputCharset.c_str()));
|
||||
if (!transcode(m_text, m_metaData["content"], m_dfltInputCharset, "UTF-8")) {
|
||||
int ecnt;
|
||||
bool ret;
|
||||
string& itext = m_metaData["content"];
|
||||
if (!(ret=transcode(m_text, itext, m_dfltInputCharset, "UTF-8", &ecnt)) ||
|
||||
ecnt > int(itext.size() / 4)) {
|
||||
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
|
||||
"for charset [%s]\n", m_dfltInputCharset.c_str()));
|
||||
m_metaData["content"].erase();
|
||||
"for input charset [%s] ret %d ecnt %d\n",
|
||||
m_dfltInputCharset.c_str(), ret, ecnt));
|
||||
itext.erase();
|
||||
return false;
|
||||
}
|
||||
m_metaData["origcharset"] = m_dfltInputCharset;
|
||||
|
||||
@ -53,6 +53,7 @@ static multimap<string, Dijon::Filter*> o_handlers;
|
||||
* create appropriate handler object. */
|
||||
static Dijon::Filter *mhFactory(const string &mime)
|
||||
{
|
||||
LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
|
||||
string lmime(mime);
|
||||
stringtolower(lmime);
|
||||
if ("text/plain" == lmime) {
|
||||
@ -123,7 +124,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
if (attrs.get("mimetype", value))
|
||||
h->cfgFilterOutputMtype = stringtolower((const string&)value);
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
string scmd;
|
||||
for (it = h->params.begin(); it != h->params.end(); it++) {
|
||||
scmd += string("[") + *it + "] ";
|
||||
@ -202,9 +203,11 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
// partly redundant with the localfields/rclaptg, but
|
||||
// better and the latter will probably go away at some
|
||||
// point in the future
|
||||
LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
|
||||
if (!cmdstr.empty())
|
||||
h = mhFactory(cmdstr);
|
||||
h = mhFactory(mtype);
|
||||
else
|
||||
h = mhFactory(mtype);
|
||||
goto out;
|
||||
} else if (!stringlowercmp("dll", handlertype)) {
|
||||
} else {
|
||||
|
||||
@ -65,15 +65,15 @@ application/vnd.sun.xml.writer.global = exec rclsoff
|
||||
application/vnd.sun.xml.writer.template = exec rclsoff
|
||||
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
||||
application/x-abiword = exec rclabw
|
||||
application/x-awk = exec rcltext; charset=default
|
||||
application/x-awk = internal text/plain
|
||||
application/x-dvi = exec rcldvi
|
||||
application/x-flac = execm rclaudio
|
||||
application/x-gnuinfo = execm rclinfo
|
||||
application/x-kword = exec rclkwd
|
||||
application/x-lyx = exec rcllyx
|
||||
application/x-perl = exec rcltext; charset=default
|
||||
application/x-perl = internal text/plain
|
||||
application/x-scribus = exec rclscribus
|
||||
application/x-shellscript = exec rcltext; charset=default
|
||||
application/x-shellscript = internal text/plain
|
||||
application/x-tex = exec rcltex
|
||||
text/x-tex = exec rcltex
|
||||
application/x-chm = execm rclchm
|
||||
@ -103,7 +103,7 @@ text/x-man = exec rclman
|
||||
text/x-purple-log = exec rclpurple
|
||||
text/x-purple-html-log = internal text/html
|
||||
text/x-python = exec rclpython
|
||||
text/x-shellscript = internal
|
||||
text/x-shellscript = internal text/plain
|
||||
|
||||
## #############################################
|
||||
# Icons to be used in the result list if required by gui config
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user