replaced /usr/bin/file exec with internal code
This commit is contained in:
parent
1293f0d834
commit
11bb233ba5
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.8 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.9 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
@ -14,9 +14,16 @@ using std::list;
|
||||
#include "execmd.h"
|
||||
#include "conftree.h"
|
||||
#include "smallut.h"
|
||||
#include "idfile.h"
|
||||
|
||||
// The system 'file' utility is not that great for us. For exemple it
|
||||
// will mistake mail folders for simple text files if there is no
|
||||
// 'Received' header, which would be the case, for exemple in a 'Sent'
|
||||
// folder. Also "file -i" does not exist on all systems
|
||||
static string mimetypefromdata(const string &fn)
|
||||
{
|
||||
string mime;
|
||||
#ifdef USE_SYSTEM_FILE_UTILITY
|
||||
list<string> args;
|
||||
|
||||
args.push_back("-i");
|
||||
@ -36,11 +43,13 @@ static string mimetypefromdata(const string &fn)
|
||||
return "";
|
||||
list<string>::iterator it = res.begin();
|
||||
it++;
|
||||
string mime = *it;
|
||||
mime = *it;
|
||||
|
||||
if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
|
||||
mime.erase(mime.length() -1);
|
||||
|
||||
#else
|
||||
mime = idFile(fn.c_str());
|
||||
#endif
|
||||
return mime;
|
||||
}
|
||||
|
||||
@ -64,8 +73,8 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
||||
continue;
|
||||
if (!stringicmp(fn.substr(fn.length() - it->length(),string::npos),
|
||||
*it)) {
|
||||
LOGINFO(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
||||
it->c_str()));
|
||||
LOGDEB(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
||||
it->c_str()));
|
||||
return "";
|
||||
}
|
||||
}
|
||||
@ -85,11 +94,6 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
||||
}
|
||||
|
||||
// Look at file data ? Only when no suffix or always ?
|
||||
// Also 'file' is not that great for us. For exemple it will
|
||||
// mistake mail folders for simple text files if there is no 'Received'
|
||||
// header, which would be the case, for exemple in a 'Sent' folder. Also
|
||||
// I'm not sure that file -i exists on all systems
|
||||
|
||||
//if (suff.empty()) // causes problems with shifted files, like
|
||||
// messages.1, messages.2 etc...
|
||||
return mimetypefromdata(fn);
|
||||
|
||||
@ -8,13 +8,13 @@ all: $(LIBS)
|
||||
|
||||
OBJS = conftree.o csguess.o debuglog.o \
|
||||
execmd.o wipedir.o \
|
||||
fstreewalk.o html.o mail.o htmlparse.o indexer.o internfile.o \
|
||||
fstreewalk.o html.o mail.o htmlparse.o idfile.o indexer.o internfile.o \
|
||||
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
|
||||
rclconfig.o rcldb.o rclinit.o readfile.o smallut.o \
|
||||
textsplit.o transcode.o \
|
||||
unacpp.o unac.o
|
||||
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
|
||||
../utils/execmd.cpp ../utils/wipedir.cpp \
|
||||
../utils/execmd.cpp ../utils/idfile.cpp ../utils/wipedir.cpp \
|
||||
../utils/fstreewalk.cpp ../common/html.cpp ../common/mail.cpp \
|
||||
../common/htmlparse.cpp \
|
||||
../index/indexer.cpp ../common/internfile.cpp \
|
||||
@ -48,6 +48,8 @@ html.o : ../common/html.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
htmlparse.o : ../common/htmlparse.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
idfile.o : ../utils/idfile.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
indexer.o : ../index/indexer.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
internfile.o : ../common/internfile.cpp
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: mimemap,v 1.4 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: mimemap,v 1.5 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll: associations of file name extensions to mime types
|
||||
.txt = text/plain
|
||||
@ -41,8 +41,8 @@
|
||||
# suffixes listed in there speeds up things quite a lot by avoiding
|
||||
# unneeded decompression or 'file' calls
|
||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz \
|
||||
.c .h .cpp .m4 .tcl .js .sh .pl .awk \
|
||||
.o .lib .dll .a \
|
||||
.dat .bak .rdf .log .db .ini .gnm .gnumeric \
|
||||
.jpg .gif .bmp .xpm
|
||||
.c .h .cpp .m4 .tcl .js .sh .pl .awk \
|
||||
.o .lib .dll .a \
|
||||
.dat .bak .rdf .log .db .ini .gnm .gnumeric .msf \
|
||||
.jpg .gif .bmp .xpm
|
||||
|
||||
|
||||
@ -33,6 +33,13 @@ trtranscode.o : ../utils/transcode.cpp
|
||||
$(CXX) $(CXXFLAGS) -DTEST_TRANSCODE -c -o trtranscode.o \
|
||||
transcode.cpp
|
||||
|
||||
IDFILE_OBJS= tridfile.o $(BIGLIB)
|
||||
idfile : $(IDFILE_OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o idfile $(IDFILE_OBJS) $(LIBICONV)
|
||||
tridfile.o : ../utils/idfile.cpp
|
||||
$(CXX) $(CXXFLAGS) -DTEST_IDFILE -c -o tridfile.o \
|
||||
idfile.cpp
|
||||
|
||||
MIMEPARSE_OBJS= trmimeparse.o $(BIGLIB)
|
||||
trmimeparse : $(MIMEPARSE_OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o trmimeparse $(MIMEPARSE_OBJS) $(LIBICONV)
|
||||
|
||||
127
src/utils/idfile.cpp
Normal file
127
src/utils/idfile.cpp
Normal file
@ -0,0 +1,127 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: idfile.cpp,v 1.1 2005-04-07 09:05:39 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_IDFILE
|
||||
#include <unistd.h> // for access(2)
|
||||
#include <ctype.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#include "debuglog.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
// Mail headers we compare to:
|
||||
static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ",
|
||||
"Date: ", "Subject: ", "Status: "};
|
||||
static const int mailhsl[] = {6, 10, 12, 4, 6, 9, 8};
|
||||
static const int nmh = sizeof(mailhs) / sizeof(char *);
|
||||
|
||||
const int wantnhead = 3;
|
||||
|
||||
string idFile(const char *fn)
|
||||
{
|
||||
ifstream input;
|
||||
input.open(fn, ios::in);
|
||||
if (!input.is_open()) {
|
||||
LOGERR(("idFile: could not open [%s]\n", fn));
|
||||
return string("");
|
||||
}
|
||||
|
||||
bool line1HasFrom = false;
|
||||
int lookslikemail = 0;
|
||||
|
||||
// emacs VM sometimes inserts very long lines with continuations or
|
||||
// not (for folder information). This forces us to look at many
|
||||
// lines and long ones
|
||||
for (int lnum = 1; lnum < 200; lnum++) {
|
||||
|
||||
#define LL 1024
|
||||
char cline[LL+1];
|
||||
cline[LL] = 0;
|
||||
input.getline(cline, LL-1);
|
||||
if (input.fail()) {
|
||||
if (input.bad()) {
|
||||
LOGERR(("idfile: error while reading [%s]\n", fn));
|
||||
return string("");
|
||||
}
|
||||
// Must be eof ?
|
||||
break;
|
||||
}
|
||||
|
||||
LOGDEB2(("idfile: lnum %d : [%s]\n", lnum, cline));
|
||||
// Check for a few things that can't be found in a mail file,
|
||||
// (optimization to get a quick negative
|
||||
|
||||
// Lines must begin with whitespace or have a colon in the
|
||||
// first 50 chars (hope no one comes up with a longer header
|
||||
// name !
|
||||
if (!isspace(cline[0])) {
|
||||
char *cp = strchr(cline, ':');
|
||||
if (cp == 0 || (cp - cline) > 70) {
|
||||
LOGDEB2(("idfile: can't be mail header line: [%s]\n", cline));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int ll = strlen(cline);
|
||||
if (ll > 1000) {
|
||||
LOGDEB2(("idFile: Line too long\n"));
|
||||
return string("");
|
||||
}
|
||||
if (lnum == 1) {
|
||||
if (!strncmp("From ", cline, 5)) {
|
||||
line1HasFrom = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < nmh; i++) {
|
||||
if (!strncasecmp(mailhs[i], cline, mailhsl[i])) {
|
||||
//fprintf(stderr, "Got [%s]\n", mailhs[i]);
|
||||
lookslikemail++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lookslikemail >= wantnhead)
|
||||
break;
|
||||
}
|
||||
if (line1HasFrom)
|
||||
lookslikemail++;
|
||||
|
||||
if (lookslikemail >= wantnhead)
|
||||
return line1HasFrom ? string("text/x-mail") : string("message/rfc822");
|
||||
|
||||
return string("");
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "idfile.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
cerr << "Usage: idfile filename" << endl;
|
||||
exit(1);
|
||||
}
|
||||
DebugLog::getdbl()->setloglevel(DEBDEB1);
|
||||
DebugLog::setfilename("stderr");
|
||||
string mime = idFile(argv[1]);
|
||||
cout << argv[1] << " : " << mime << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
12
src/utils/idfile.h
Normal file
12
src/utils/idfile.h
Normal file
@ -0,0 +1,12 @@
|
||||
#ifndef _IDFILE_H_INCLUDED_
|
||||
#define _IDFILE_H_INCLUDED_
|
||||
/* @(#$Id: idfile.h,v 1.1 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
// Return mime type for file or empty string. The system's file utility does
|
||||
// a bad job on mail folders. idFile only looks for mail file types for now,
|
||||
// but this may change
|
||||
extern std::string idFile(const char *fn);
|
||||
|
||||
#endif /* _IDFILE_H_INCLUDED_ */
|
||||
Loading…
x
Reference in New Issue
Block a user