indexing could crash on different "file -i" output for some (binary) file names

2011-10-13 19:33:38 +02:00 · 2011-10-13 19:33:38 +02:00 · 85191eba16
commit 85191eba16
parent e8f63ec124
1 changed files with 27 additions and 10 deletions
--- a/src/index/mimetype.cpp
+++ b/src/index/mimetype.cpp
@ -69,18 +69,36 @@ static string mimetypefromdata(const string &fn, bool usfc)
 	    LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status));
 	    return string();
 	}
-	// LOGDEB(("mimetypefromdata: %s [%s]\n", result.c_str(), fn.c_str()));
+	LOGDEB2(("mimetype: [%s] \"file\" output [%s]\n", 
 		 result.c_str(), fn.c_str()));
 	// The normal output from "file -i" looks like the following:
 	//   thefilename.xxx: text/plain; charset=us-ascii
 	// Sometimes the semi-colon is missing like in:
 	//     mimetype.cpp: text/x-c charset=us-ascii
 	// And sometimes we only get the mime type. This apparently happens
 	// when 'file' believes that the file name is binary
-	// The result of 'file' execution begins with the file name
+	trimstring(result, " \t\n\r");
-	// which may contain spaces. We happen to know its size, so
+
-	// strip it:
+	// If there is no colon and there is a slash, this is hopefuly
-	if (result.size() <= fn.size()) {
+	// the mime type
 	if (result.find_first_of(":") == string::npos && 
 	    result.find_first_of("/") != string::npos) {
 	    return result;
 	}
 	// Else the result should begin with the file name. Get rid of it:
 	if (result.find(fn) != 0) {
 	    // Garbage "file" output. Maybe the result of a charset
 	    // conversion attempt?
 	    LOGERR(("mimetype: can't interpret 'file' output: [%s]\n",
 		    result.c_str()));
 	    return string();
 	}
 	result = result.substr(fn.size());
-	// Now looks like ": text/plain; charset=us-ascii"
+
 	// Now should look like ": text/plain; charset=us-ascii"
 	// Split it, and take second field
 	list<string> res;
 	stringToStrings(result, res);
@ -88,10 +106,9 @@ static string mimetypefromdata(const string &fn, bool usfc)
 	    return string();
 	list<string>::iterator it = res.begin();
 	mime = *++it;
-	// Remove possible punctuation at the end. Note that this mangles 
+	// Remove possible semi-colon at the end
-	// text/x-c++ if there is no semi-colon... handled in mimeconf :(
+	trimstring(mime, " \t;");
-	if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
+
 	    mime.erase(mime.length() -1);
 	// File -i will sometimes return strange stuff (ie: "very small file")
 	if(mime.find("/") == string::npos) 
 	    mime.clear();