Index: improve processing/rejection for binary files disguising as scripts (ie: shar archives). Use "internal text/plain" instead of "exec rcltext" for script files so that normal text/plain processing is done (max size, splits). Reject text if more than 25% iconv errors
This commit is contained in:
parent
dbc7bc28da
commit
292859a3ac
@ -117,10 +117,15 @@ bool MimeHandlerText::next_document()
|
|||||||
// this validates the encoding.
|
// this validates the encoding.
|
||||||
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
|
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
|
||||||
m_dfltInputCharset.c_str()));
|
m_dfltInputCharset.c_str()));
|
||||||
if (!transcode(m_text, m_metaData["content"], m_dfltInputCharset, "UTF-8")) {
|
int ecnt;
|
||||||
|
bool ret;
|
||||||
|
string& itext = m_metaData["content"];
|
||||||
|
if (!(ret=transcode(m_text, itext, m_dfltInputCharset, "UTF-8", &ecnt)) ||
|
||||||
|
ecnt > int(itext.size() / 4)) {
|
||||||
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
|
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
|
||||||
"for charset [%s]\n", m_dfltInputCharset.c_str()));
|
"for input charset [%s] ret %d ecnt %d\n",
|
||||||
m_metaData["content"].erase();
|
m_dfltInputCharset.c_str(), ret, ecnt));
|
||||||
|
itext.erase();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_metaData["origcharset"] = m_dfltInputCharset;
|
m_metaData["origcharset"] = m_dfltInputCharset;
|
||||||
|
|||||||
@ -53,6 +53,7 @@ static multimap<string, Dijon::Filter*> o_handlers;
|
|||||||
* create appropriate handler object. */
|
* create appropriate handler object. */
|
||||||
static Dijon::Filter *mhFactory(const string &mime)
|
static Dijon::Filter *mhFactory(const string &mime)
|
||||||
{
|
{
|
||||||
|
LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
|
||||||
string lmime(mime);
|
string lmime(mime);
|
||||||
stringtolower(lmime);
|
stringtolower(lmime);
|
||||||
if ("text/plain" == lmime) {
|
if ("text/plain" == lmime) {
|
||||||
@ -123,7 +124,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
|||||||
if (attrs.get("mimetype", value))
|
if (attrs.get("mimetype", value))
|
||||||
h->cfgFilterOutputMtype = stringtolower((const string&)value);
|
h->cfgFilterOutputMtype = stringtolower((const string&)value);
|
||||||
|
|
||||||
#if 1
|
#if 0
|
||||||
string scmd;
|
string scmd;
|
||||||
for (it = h->params.begin(); it != h->params.end(); it++) {
|
for (it = h->params.begin(); it != h->params.end(); it++) {
|
||||||
scmd += string("[") + *it + "] ";
|
scmd += string("[") + *it + "] ";
|
||||||
@ -202,9 +203,11 @@ Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
|||||||
// partly redundant with the localfields/rclaptg, but
|
// partly redundant with the localfields/rclaptg, but
|
||||||
// better and the latter will probably go away at some
|
// better and the latter will probably go away at some
|
||||||
// point in the future
|
// point in the future
|
||||||
|
LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
|
||||||
if (!cmdstr.empty())
|
if (!cmdstr.empty())
|
||||||
h = mhFactory(cmdstr);
|
h = mhFactory(cmdstr);
|
||||||
h = mhFactory(mtype);
|
else
|
||||||
|
h = mhFactory(mtype);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!stringlowercmp("dll", handlertype)) {
|
} else if (!stringlowercmp("dll", handlertype)) {
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@ -65,15 +65,15 @@ application/vnd.sun.xml.writer.global = exec rclsoff
|
|||||||
application/vnd.sun.xml.writer.template = exec rclsoff
|
application/vnd.sun.xml.writer.template = exec rclsoff
|
||||||
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
||||||
application/x-abiword = exec rclabw
|
application/x-abiword = exec rclabw
|
||||||
application/x-awk = exec rcltext; charset=default
|
application/x-awk = internal text/plain
|
||||||
application/x-dvi = exec rcldvi
|
application/x-dvi = exec rcldvi
|
||||||
application/x-flac = execm rclaudio
|
application/x-flac = execm rclaudio
|
||||||
application/x-gnuinfo = execm rclinfo
|
application/x-gnuinfo = execm rclinfo
|
||||||
application/x-kword = exec rclkwd
|
application/x-kword = exec rclkwd
|
||||||
application/x-lyx = exec rcllyx
|
application/x-lyx = exec rcllyx
|
||||||
application/x-perl = exec rcltext; charset=default
|
application/x-perl = internal text/plain
|
||||||
application/x-scribus = exec rclscribus
|
application/x-scribus = exec rclscribus
|
||||||
application/x-shellscript = exec rcltext; charset=default
|
application/x-shellscript = internal text/plain
|
||||||
application/x-tex = exec rcltex
|
application/x-tex = exec rcltex
|
||||||
text/x-tex = exec rcltex
|
text/x-tex = exec rcltex
|
||||||
application/x-chm = execm rclchm
|
application/x-chm = execm rclchm
|
||||||
@ -103,7 +103,7 @@ text/x-man = exec rclman
|
|||||||
text/x-purple-log = exec rclpurple
|
text/x-purple-log = exec rclpurple
|
||||||
text/x-purple-html-log = internal text/html
|
text/x-purple-html-log = internal text/html
|
||||||
text/x-python = exec rclpython
|
text/x-python = exec rclpython
|
||||||
text/x-shellscript = internal
|
text/x-shellscript = internal text/plain
|
||||||
|
|
||||||
## #############################################
|
## #############################################
|
||||||
# Icons to be used in the result list if required by gui config
|
# Icons to be used in the result list if required by gui config
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user