replaced /usr/bin/file exec with internal code
This commit is contained in:
parent
1293f0d834
commit
11bb233ba5
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.8 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.9 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@ -14,9 +14,16 @@ using std::list;
|
|||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
#include "conftree.h"
|
#include "conftree.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
|
#include "idfile.h"
|
||||||
|
|
||||||
|
// The system 'file' utility is not that great for us. For exemple it
|
||||||
|
// will mistake mail folders for simple text files if there is no
|
||||||
|
// 'Received' header, which would be the case, for exemple in a 'Sent'
|
||||||
|
// folder. Also "file -i" does not exist on all systems
|
||||||
static string mimetypefromdata(const string &fn)
|
static string mimetypefromdata(const string &fn)
|
||||||
{
|
{
|
||||||
|
string mime;
|
||||||
|
#ifdef USE_SYSTEM_FILE_UTILITY
|
||||||
list<string> args;
|
list<string> args;
|
||||||
|
|
||||||
args.push_back("-i");
|
args.push_back("-i");
|
||||||
@ -36,11 +43,13 @@ static string mimetypefromdata(const string &fn)
|
|||||||
return "";
|
return "";
|
||||||
list<string>::iterator it = res.begin();
|
list<string>::iterator it = res.begin();
|
||||||
it++;
|
it++;
|
||||||
string mime = *it;
|
mime = *it;
|
||||||
|
|
||||||
if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
|
if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
|
||||||
mime.erase(mime.length() -1);
|
mime.erase(mime.length() -1);
|
||||||
|
#else
|
||||||
|
mime = idFile(fn.c_str());
|
||||||
|
#endif
|
||||||
return mime;
|
return mime;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,8 +73,8 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
|||||||
continue;
|
continue;
|
||||||
if (!stringicmp(fn.substr(fn.length() - it->length(),string::npos),
|
if (!stringicmp(fn.substr(fn.length() - it->length(),string::npos),
|
||||||
*it)) {
|
*it)) {
|
||||||
LOGINFO(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
LOGDEB(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
||||||
it->c_str()));
|
it->c_str()));
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -85,11 +94,6 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Look at file data ? Only when no suffix or always ?
|
// Look at file data ? Only when no suffix or always ?
|
||||||
// Also 'file' is not that great for us. For exemple it will
|
|
||||||
// mistake mail folders for simple text files if there is no 'Received'
|
|
||||||
// header, which would be the case, for exemple in a 'Sent' folder. Also
|
|
||||||
// I'm not sure that file -i exists on all systems
|
|
||||||
|
|
||||||
//if (suff.empty()) // causes problems with shifted files, like
|
//if (suff.empty()) // causes problems with shifted files, like
|
||||||
// messages.1, messages.2 etc...
|
// messages.1, messages.2 etc...
|
||||||
return mimetypefromdata(fn);
|
return mimetypefromdata(fn);
|
||||||
|
|||||||
@ -8,13 +8,13 @@ all: $(LIBS)
|
|||||||
|
|
||||||
OBJS = conftree.o csguess.o debuglog.o \
|
OBJS = conftree.o csguess.o debuglog.o \
|
||||||
execmd.o wipedir.o \
|
execmd.o wipedir.o \
|
||||||
fstreewalk.o html.o mail.o htmlparse.o indexer.o internfile.o \
|
fstreewalk.o html.o mail.o htmlparse.o idfile.o indexer.o internfile.o \
|
||||||
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
|
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
|
||||||
rclconfig.o rcldb.o rclinit.o readfile.o smallut.o \
|
rclconfig.o rcldb.o rclinit.o readfile.o smallut.o \
|
||||||
textsplit.o transcode.o \
|
textsplit.o transcode.o \
|
||||||
unacpp.o unac.o
|
unacpp.o unac.o
|
||||||
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
|
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
|
||||||
../utils/execmd.cpp ../utils/wipedir.cpp \
|
../utils/execmd.cpp ../utils/idfile.cpp ../utils/wipedir.cpp \
|
||||||
../utils/fstreewalk.cpp ../common/html.cpp ../common/mail.cpp \
|
../utils/fstreewalk.cpp ../common/html.cpp ../common/mail.cpp \
|
||||||
../common/htmlparse.cpp \
|
../common/htmlparse.cpp \
|
||||||
../index/indexer.cpp ../common/internfile.cpp \
|
../index/indexer.cpp ../common/internfile.cpp \
|
||||||
@ -48,6 +48,8 @@ html.o : ../common/html.cpp
|
|||||||
$(CXX) $(CXXFLAGS) -c $<
|
$(CXX) $(CXXFLAGS) -c $<
|
||||||
htmlparse.o : ../common/htmlparse.cpp
|
htmlparse.o : ../common/htmlparse.cpp
|
||||||
$(CXX) $(CXXFLAGS) -c $<
|
$(CXX) $(CXXFLAGS) -c $<
|
||||||
|
idfile.o : ../utils/idfile.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -c $<
|
||||||
indexer.o : ../index/indexer.cpp
|
indexer.o : ../index/indexer.cpp
|
||||||
$(CXX) $(CXXFLAGS) -c $<
|
$(CXX) $(CXXFLAGS) -c $<
|
||||||
internfile.o : ../common/internfile.cpp
|
internfile.o : ../common/internfile.cpp
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# @(#$Id: mimemap,v 1.4 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: mimemap,v 1.5 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
|
|
||||||
# Recoll: associations of file name extensions to mime types
|
# Recoll: associations of file name extensions to mime types
|
||||||
.txt = text/plain
|
.txt = text/plain
|
||||||
@ -41,8 +41,8 @@
|
|||||||
# suffixes listed in there speeds up things quite a lot by avoiding
|
# suffixes listed in there speeds up things quite a lot by avoiding
|
||||||
# unneeded decompression or 'file' calls
|
# unneeded decompression or 'file' calls
|
||||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz \
|
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz \
|
||||||
.c .h .cpp .m4 .tcl .js .sh .pl .awk \
|
.c .h .cpp .m4 .tcl .js .sh .pl .awk \
|
||||||
.o .lib .dll .a \
|
.o .lib .dll .a \
|
||||||
.dat .bak .rdf .log .db .ini .gnm .gnumeric \
|
.dat .bak .rdf .log .db .ini .gnm .gnumeric .msf \
|
||||||
.jpg .gif .bmp .xpm
|
.jpg .gif .bmp .xpm
|
||||||
|
|
||||||
|
|||||||
@ -33,6 +33,13 @@ trtranscode.o : ../utils/transcode.cpp
|
|||||||
$(CXX) $(CXXFLAGS) -DTEST_TRANSCODE -c -o trtranscode.o \
|
$(CXX) $(CXXFLAGS) -DTEST_TRANSCODE -c -o trtranscode.o \
|
||||||
transcode.cpp
|
transcode.cpp
|
||||||
|
|
||||||
|
IDFILE_OBJS= tridfile.o $(BIGLIB)
|
||||||
|
idfile : $(IDFILE_OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) -o idfile $(IDFILE_OBJS) $(LIBICONV)
|
||||||
|
tridfile.o : ../utils/idfile.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -DTEST_IDFILE -c -o tridfile.o \
|
||||||
|
idfile.cpp
|
||||||
|
|
||||||
MIMEPARSE_OBJS= trmimeparse.o $(BIGLIB)
|
MIMEPARSE_OBJS= trmimeparse.o $(BIGLIB)
|
||||||
trmimeparse : $(MIMEPARSE_OBJS)
|
trmimeparse : $(MIMEPARSE_OBJS)
|
||||||
$(CXX) $(CXXFLAGS) -o trmimeparse $(MIMEPARSE_OBJS) $(LIBICONV)
|
$(CXX) $(CXXFLAGS) -o trmimeparse $(MIMEPARSE_OBJS) $(LIBICONV)
|
||||||
|
|||||||
127
src/utils/idfile.cpp
Normal file
127
src/utils/idfile.cpp
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
#ifndef lint
|
||||||
|
static char rcsid[] = "@(#$Id: idfile.cpp,v 1.1 2005-04-07 09:05:39 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
|
#endif
|
||||||
|
#ifndef TEST_IDFILE
|
||||||
|
#include <unistd.h> // for access(2)
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include "debuglog.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
// Mail headers we compare to:
|
||||||
|
static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ",
|
||||||
|
"Date: ", "Subject: ", "Status: "};
|
||||||
|
static const int mailhsl[] = {6, 10, 12, 4, 6, 9, 8};
|
||||||
|
static const int nmh = sizeof(mailhs) / sizeof(char *);
|
||||||
|
|
||||||
|
const int wantnhead = 3;
|
||||||
|
|
||||||
|
string idFile(const char *fn)
|
||||||
|
{
|
||||||
|
ifstream input;
|
||||||
|
input.open(fn, ios::in);
|
||||||
|
if (!input.is_open()) {
|
||||||
|
LOGERR(("idFile: could not open [%s]\n", fn));
|
||||||
|
return string("");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool line1HasFrom = false;
|
||||||
|
int lookslikemail = 0;
|
||||||
|
|
||||||
|
// emacs VM sometimes inserts very long lines with continuations or
|
||||||
|
// not (for folder information). This forces us to look at many
|
||||||
|
// lines and long ones
|
||||||
|
for (int lnum = 1; lnum < 200; lnum++) {
|
||||||
|
|
||||||
|
#define LL 1024
|
||||||
|
char cline[LL+1];
|
||||||
|
cline[LL] = 0;
|
||||||
|
input.getline(cline, LL-1);
|
||||||
|
if (input.fail()) {
|
||||||
|
if (input.bad()) {
|
||||||
|
LOGERR(("idfile: error while reading [%s]\n", fn));
|
||||||
|
return string("");
|
||||||
|
}
|
||||||
|
// Must be eof ?
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGDEB2(("idfile: lnum %d : [%s]\n", lnum, cline));
|
||||||
|
// Check for a few things that can't be found in a mail file,
|
||||||
|
// (optimization to get a quick negative
|
||||||
|
|
||||||
|
// Lines must begin with whitespace or have a colon in the
|
||||||
|
// first 50 chars (hope no one comes up with a longer header
|
||||||
|
// name !
|
||||||
|
if (!isspace(cline[0])) {
|
||||||
|
char *cp = strchr(cline, ':');
|
||||||
|
if (cp == 0 || (cp - cline) > 70) {
|
||||||
|
LOGDEB2(("idfile: can't be mail header line: [%s]\n", cline));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int ll = strlen(cline);
|
||||||
|
if (ll > 1000) {
|
||||||
|
LOGDEB2(("idFile: Line too long\n"));
|
||||||
|
return string("");
|
||||||
|
}
|
||||||
|
if (lnum == 1) {
|
||||||
|
if (!strncmp("From ", cline, 5)) {
|
||||||
|
line1HasFrom = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < nmh; i++) {
|
||||||
|
if (!strncasecmp(mailhs[i], cline, mailhsl[i])) {
|
||||||
|
//fprintf(stderr, "Got [%s]\n", mailhs[i]);
|
||||||
|
lookslikemail++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (lookslikemail >= wantnhead)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (line1HasFrom)
|
||||||
|
lookslikemail++;
|
||||||
|
|
||||||
|
if (lookslikemail >= wantnhead)
|
||||||
|
return line1HasFrom ? string("text/x-mail") : string("message/rfc822");
|
||||||
|
|
||||||
|
return string("");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "debuglog.h"
|
||||||
|
#include "idfile.h"
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
if (argc != 2) {
|
||||||
|
cerr << "Usage: idfile filename" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
DebugLog::getdbl()->setloglevel(DEBDEB1);
|
||||||
|
DebugLog::setfilename("stderr");
|
||||||
|
string mime = idFile(argv[1]);
|
||||||
|
cout << argv[1] << " : " << mime << endl;
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
12
src/utils/idfile.h
Normal file
12
src/utils/idfile.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef _IDFILE_H_INCLUDED_
|
||||||
|
#define _IDFILE_H_INCLUDED_
|
||||||
|
/* @(#$Id: idfile.h,v 1.1 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
// Return mime type for file or empty string. The system's file utility does
|
||||||
|
// a bad job on mail folders. idFile only looks for mail file types for now,
|
||||||
|
// but this may change
|
||||||
|
extern std::string idFile(const char *fn);
|
||||||
|
|
||||||
|
#endif /* _IDFILE_H_INCLUDED_ */
|
||||||
Loading…
x
Reference in New Issue
Block a user