indexing could crash on different "file -i" output for some (binary) file names

This commit is contained in:
Jean-Francois Dockes 2011-10-13 19:33:38 +02:00
parent e8f63ec124
commit 85191eba16

View File

@ -69,18 +69,36 @@ static string mimetypefromdata(const string &fn, bool usfc)
LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status));
return string();
}
// LOGDEB(("mimetypefromdata: %s [%s]\n", result.c_str(), fn.c_str()));
LOGDEB2(("mimetype: [%s] \"file\" output [%s]\n",
result.c_str(), fn.c_str()));
// The result of 'file' execution begins with the file name
// which may contain spaces. We happen to know its size, so
// strip it:
if (result.size() <= fn.size()) {
// The normal output from "file -i" looks like the following:
// thefilename.xxx: text/plain; charset=us-ascii
// Sometimes the semi-colon is missing like in:
// mimetype.cpp: text/x-c charset=us-ascii
// And sometimes we only get the mime type. This apparently happens
// when 'file' believes that the file name is binary
trimstring(result, " \t\n\r");
// If there is no colon and there is a slash, this is hopefuly
// the mime type
if (result.find_first_of(":") == string::npos &&
result.find_first_of("/") != string::npos) {
return result;
}
// Else the result should begin with the file name. Get rid of it:
if (result.find(fn) != 0) {
// Garbage "file" output. Maybe the result of a charset
// conversion attempt?
LOGERR(("mimetype: can't interpret 'file' output: [%s]\n",
result.c_str()));
return string();
}
result = result.substr(fn.size());
// Now looks like ": text/plain; charset=us-ascii"
// Now should look like ": text/plain; charset=us-ascii"
// Split it, and take second field
list<string> res;
stringToStrings(result, res);
@ -88,10 +106,9 @@ static string mimetypefromdata(const string &fn, bool usfc)
return string();
list<string>::iterator it = res.begin();
mime = *++it;
// Remove possible punctuation at the end. Note that this mangles
// text/x-c++ if there is no semi-colon... handled in mimeconf :(
if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
mime.erase(mime.length() -1);
// Remove possible semi-colon at the end
trimstring(mime, " \t;");
// File -i will sometimes return strange stuff (ie: "very small file")
if(mime.find("/") == string::npos)
mime.clear();