add config parameter to decide if we use the file command as a final step of mimetype identification
This commit is contained in:
parent
52519d6d7c
commit
548a4c1a27
@ -3,7 +3,7 @@ include ../mk/sysconf
|
|||||||
BIGLIB = ../lib/librcl.a
|
BIGLIB = ../lib/librcl.a
|
||||||
MIMELIB = ../bincimapmime/libmime.a
|
MIMELIB = ../bincimapmime/libmime.a
|
||||||
|
|
||||||
PROGS = recollindex csguess
|
PROGS = recollindex csguess mimetype
|
||||||
all: $(PROGS)
|
all: $(PROGS)
|
||||||
|
|
||||||
RECOLLINDEX_OBJS= recollindex.o $(BIGLIB) $(MIMELIB)
|
RECOLLINDEX_OBJS= recollindex.o $(BIGLIB) $(MIMELIB)
|
||||||
@ -21,6 +21,14 @@ trcsguess.o : csguess.cpp
|
|||||||
$(CXX) $(CXXFLAGS) -DTEST_CSGUESS -c -o trcsguess.o \
|
$(CXX) $(CXXFLAGS) -DTEST_CSGUESS -c -o trcsguess.o \
|
||||||
csguess.cpp
|
csguess.cpp
|
||||||
|
|
||||||
|
MIMETYPE_OBJS= trmimetype.o $(BIGLIB)
|
||||||
|
mimetype : $(MIMETYPE_OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) \
|
||||||
|
$(LIBICONV)
|
||||||
|
trmimetype.o : mimetype.cpp
|
||||||
|
$(CXX) $(CXXFLAGS) -DTEST_MIMETYPE -c -o trmimetype.o \
|
||||||
|
mimetype.cpp
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o $(PROGS)
|
rm -f *.o $(PROGS)
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.13 2005-11-05 14:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.14 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
@ -15,7 +15,6 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.13 2005-11-05 14:40:50 dockes Exp
|
|||||||
#include "conftree.h"
|
#include "conftree.h"
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "fstreewalk.h"
|
#include "fstreewalk.h"
|
||||||
#include "mimetype.h"
|
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "readfile.h"
|
#include "readfile.h"
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
@ -32,11 +31,12 @@ using namespace std;
|
|||||||
#define deleteZ(X) {delete X;X = 0;}
|
#define deleteZ(X) {delete X;X = 0;}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/// A class to index a list of top directories into one database.
|
||||||
* Bunch holder for data used while indexing a directory tree. This also the
|
///
|
||||||
* tree walker callback object (the processone method gets called for every
|
/// Inherits FsTreeWalkerCB so that its processone() method is
|
||||||
* file or directory).
|
/// called by the file-system tree walk code for each file and
|
||||||
*/
|
/// directory, and keeps all state used while indexing a
|
||||||
|
/// directory tree.
|
||||||
class DbIndexer : public FsTreeWalkerCB {
|
class DbIndexer : public FsTreeWalkerCB {
|
||||||
FsTreeWalker walker;
|
FsTreeWalker walker;
|
||||||
RclConfig *config;
|
RclConfig *config;
|
||||||
@ -45,9 +45,9 @@ class DbIndexer : public FsTreeWalkerCB {
|
|||||||
Rcl::Db db;
|
Rcl::Db db;
|
||||||
string tmpdir;
|
string tmpdir;
|
||||||
public:
|
public:
|
||||||
|
/// Constructor does nothing but store parameters
|
||||||
DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top)
|
DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top)
|
||||||
: config(cnf), dbdir(dbd), topdirs(top)
|
: config(cnf), dbdir(dbd), topdirs(top) {}
|
||||||
{ }
|
|
||||||
|
|
||||||
virtual ~DbIndexer() {
|
virtual ~DbIndexer() {
|
||||||
// Maybe clean up temporary directory
|
// Maybe clean up temporary directory
|
||||||
@ -60,19 +60,21 @@ class DbIndexer : public FsTreeWalkerCB {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Start indexing.
|
||||||
|
bool index();
|
||||||
|
|
||||||
|
/// Tree walker callback method
|
||||||
FsTreeWalker::Status
|
FsTreeWalker::Status
|
||||||
processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
|
processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
|
||||||
|
|
||||||
// The top level entry point.
|
|
||||||
bool index();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Top level file system tree index method for updating a given database.
|
/// Top level file system tree index method for updating a given database.
|
||||||
//
|
///
|
||||||
// We create the temporary directory, open the database, then call a
|
/// We create the temporary directory, open the database, then call a
|
||||||
// file system walk for each top-level directory.
|
/// file system walk for each top-level directory.
|
||||||
// When walking is done, we create the stem databases and close the main db.
|
/// When walking is done, we create the stem databases and close the
|
||||||
|
/// main db.
|
||||||
bool DbIndexer::index()
|
bool DbIndexer::index()
|
||||||
{
|
{
|
||||||
string tdir;
|
string tdir;
|
||||||
@ -90,9 +92,13 @@ bool DbIndexer::index()
|
|||||||
it != topdirs->end(); it++) {
|
it != topdirs->end(); it++) {
|
||||||
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
|
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
|
||||||
dbdir.c_str()));
|
dbdir.c_str()));
|
||||||
|
|
||||||
|
// Set the current directory in config so that subsequent
|
||||||
|
// getConfParams() will get local values
|
||||||
config->setKeyDir(*it);
|
config->setKeyDir(*it);
|
||||||
|
|
||||||
// Set up skipped patterns for this subtree
|
// Set up skipped patterns for this subtree. This probably should be
|
||||||
|
// done in the directory change code in processone() instead.
|
||||||
{
|
{
|
||||||
walker.clearSkippedNames();
|
walker.clearSkippedNames();
|
||||||
string skipped;
|
string skipped;
|
||||||
@ -106,6 +112,7 @@ bool DbIndexer::index()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Walk the directory tree
|
||||||
if (walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
if (walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
||||||
LOGERR(("DbIndexer::index: error while indexing %s\n",
|
LOGERR(("DbIndexer::index: error while indexing %s\n",
|
||||||
it->c_str()));
|
it->c_str()));
|
||||||
@ -113,6 +120,9 @@ bool DbIndexer::index()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get rid of all database entries that don't exist in the
|
||||||
|
// filesystem anymore.
|
||||||
db.purge();
|
db.purge();
|
||||||
|
|
||||||
// Create stemming databases
|
// Create stemming databases
|
||||||
@ -135,22 +145,23 @@ bool DbIndexer::index()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/// This method gets called for every file and directory found by the
|
||||||
* This function gets called for every file and directory found by the
|
/// tree walker.
|
||||||
* tree walker. It checks with the db if the file has changed and needs to
|
///
|
||||||
* be reindexed. If so, it calls internfile() which will identify the
|
/// It checks with the db if the file has changed and needs to be
|
||||||
* file type and call an appropriate handler to create documents in
|
/// reindexed. If so, it calls internfile() which will identify the
|
||||||
* internal form, which we then add to the database.
|
/// file type and call an appropriate handler to convert the document into
|
||||||
*
|
/// internal format, which we then add to the database.
|
||||||
* Accent and majuscule handling are performed by the db module when doing
|
///
|
||||||
* the actual indexing work. The Rcl::Doc created by internfile()
|
/// Accent and majuscule handling are performed by the db module when doing
|
||||||
contains pretty raw utf8 data.
|
/// the actual indexing work. The Rcl::Doc created by internfile()
|
||||||
*/
|
/// contains pretty raw utf8 data.
|
||||||
FsTreeWalker::Status
|
FsTreeWalker::Status
|
||||||
DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||||
FsTreeWalker::CbFlag flg)
|
FsTreeWalker::CbFlag flg)
|
||||||
{
|
{
|
||||||
// If we're changing directories, possibly adjust parameters.
|
// If we're changing directories, possibly adjust parameters (set
|
||||||
|
// the current directory in configuration object)
|
||||||
if (flg == FsTreeWalker::FtwDirEnter ||
|
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||||
flg == FsTreeWalker::FtwDirReturn) {
|
flg == FsTreeWalker::FtwDirReturn) {
|
||||||
config->setKeyDir(fn);
|
config->setKeyDir(fn);
|
||||||
@ -189,9 +200,13 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
return FsTreeWalker::FtwOk;
|
return FsTreeWalker::FtwOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
// ConIndexer methods: ConfIndexer is the top-level object, that can index
|
||||||
|
// multiple directories to multiple databases.
|
||||||
|
|
||||||
ConfIndexer::~ConfIndexer()
|
ConfIndexer::~ConfIndexer()
|
||||||
{
|
{
|
||||||
deleteZ(indexer);
|
deleteZ(dbindexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ConfIndexer::index()
|
bool ConfIndexer::index()
|
||||||
@ -245,12 +260,12 @@ bool ConfIndexer::index()
|
|||||||
//}
|
//}
|
||||||
//cout << endl;
|
//cout << endl;
|
||||||
|
|
||||||
indexer = new DbIndexer(config, dbit->first, &dbit->second);
|
dbindexer = new DbIndexer(config, dbit->first, &dbit->second);
|
||||||
if (!indexer->index()) {
|
if (!dbindexer->index()) {
|
||||||
deleteZ(indexer);
|
deleteZ(dbindexer);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
deleteZ(indexer);
|
deleteZ(dbindexer);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,21 +1,31 @@
|
|||||||
#ifndef _INDEXER_H_INCLUDED_
|
#ifndef _INDEXER_H_INCLUDED_
|
||||||
#define _INDEXER_H_INCLUDED_
|
#define _INDEXER_H_INCLUDED_
|
||||||
/* @(#$Id: indexer.h,v 1.5 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: indexer.h,v 1.6 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An internal class to process all directories indexed into the same database.
|
||||||
|
*/
|
||||||
class DbIndexer;
|
class DbIndexer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The file system indexing object. Processes the configuration, then invokes
|
* The file system indexing object. Processes the configuration, then invokes
|
||||||
* file system walking to populate/update the database(s).
|
* file system walking to populate/update the database(s).
|
||||||
|
*
|
||||||
|
* Multiple top-level directories can be listed in the
|
||||||
|
* configuration. Each can be indexed to a different
|
||||||
|
* database. Directories are first grouped by database, then an
|
||||||
|
* internal class (DbIndexer) is used to process each group.
|
||||||
*/
|
*/
|
||||||
class ConfIndexer {
|
class ConfIndexer {
|
||||||
RclConfig *config;
|
RclConfig *config;
|
||||||
DbIndexer *indexer; // Internal object used to store opaque private data
|
DbIndexer *dbindexer; // Object to process directories for a given db
|
||||||
public:
|
public:
|
||||||
enum runStatus {IndexerOk, IndexerError};
|
enum runStatus {IndexerOk, IndexerError};
|
||||||
ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {}
|
ConfIndexer(RclConfig *cnf) : config(cnf), dbindexer(0) {}
|
||||||
virtual ~ConfIndexer();
|
virtual ~ConfIndexer();
|
||||||
|
/** Worker function: doe the actual indexing */
|
||||||
bool index();
|
bool index();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -1,12 +1,13 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.9 2005-04-07 09:05:39 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.10 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef TEST_MIMETYPE
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
using std::string;
|
|
||||||
#include <list>
|
#include <list>
|
||||||
|
|
||||||
|
using std::string;
|
||||||
using std::list;
|
using std::list;
|
||||||
|
|
||||||
#include "mimetype.h"
|
#include "mimetype.h"
|
||||||
@ -16,44 +17,72 @@ using std::list;
|
|||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "idfile.h"
|
#include "idfile.h"
|
||||||
|
|
||||||
// The system 'file' utility is not that great for us. For exemple it
|
#define USE_SYSTEM_FILE_COMMAND
|
||||||
// will mistake mail folders for simple text files if there is no
|
|
||||||
// 'Received' header, which would be the case, for exemple in a 'Sent'
|
/// Identification of file from contents. This is called for files with
|
||||||
// folder. Also "file -i" does not exist on all systems
|
/// unrecognized extensions (none, or not known either for indexing or
|
||||||
static string mimetypefromdata(const string &fn)
|
/// stop list)
|
||||||
|
///
|
||||||
|
/// The system 'file' utility is not that great for us. For exemple it
|
||||||
|
/// will mistake mail folders for simple text files if there is no
|
||||||
|
/// 'Received' header, which would be the case, for exemple in a 'Sent'
|
||||||
|
/// folder. Also "file -i" does not exist on all systems, and it's
|
||||||
|
/// quite costly.
|
||||||
|
/// So we first call the internal file identifier, which currently
|
||||||
|
/// only knows about mail, but in which we can add the more
|
||||||
|
/// current/interesting file types.
|
||||||
|
/// As a last resort we execute 'file'
|
||||||
|
|
||||||
|
static string mimetypefromdata(const string &fn, bool usfc)
|
||||||
{
|
{
|
||||||
string mime;
|
string mime;
|
||||||
#ifdef USE_SYSTEM_FILE_UTILITY
|
|
||||||
list<string> args;
|
|
||||||
|
|
||||||
args.push_back("-i");
|
// In any case first try the internal identifier
|
||||||
args.push_back(fn);
|
|
||||||
ExecCmd ex;
|
|
||||||
string result;
|
|
||||||
string cmd = "file";
|
|
||||||
int status = ex.doexec(cmd, args, 0, &result);
|
|
||||||
if (status) {
|
|
||||||
LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status));
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
// LOGDEB(("mimetypefromdata: %s [%s]\n", result.c_str(), fn.c_str()));
|
|
||||||
list<string> res;
|
|
||||||
ConfTree::stringToStrings(result, res);
|
|
||||||
if (res.size() <= 1)
|
|
||||||
return "";
|
|
||||||
list<string>::iterator it = res.begin();
|
|
||||||
it++;
|
|
||||||
mime = *it;
|
|
||||||
|
|
||||||
if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
|
|
||||||
mime.erase(mime.length() -1);
|
|
||||||
#else
|
|
||||||
mime = idFile(fn.c_str());
|
mime = idFile(fn.c_str());
|
||||||
|
|
||||||
|
#ifdef USE_SYSTEM_FILE_COMMAND
|
||||||
|
if (usfc && mime == "") {
|
||||||
|
// Last resort: use "file -i"
|
||||||
|
list<string> args;
|
||||||
|
|
||||||
|
args.push_back("-i");
|
||||||
|
args.push_back(fn);
|
||||||
|
ExecCmd ex;
|
||||||
|
string result;
|
||||||
|
string cmd = "file";
|
||||||
|
int status = ex.doexec(cmd, args, 0, &result);
|
||||||
|
if (status) {
|
||||||
|
LOGERR(("mimetypefromdata: doexec: status 0x%x\n", status));
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
// LOGDEB(("mimetypefromdata: %s [%s]\n", result.c_str(), fn.c_str()));
|
||||||
|
|
||||||
|
// The result of 'file' execution begins with the file name
|
||||||
|
// which may contain spaces. We happen to know its size, so
|
||||||
|
// strip it:
|
||||||
|
result = result.substr(fn.size());
|
||||||
|
// Now looks like ": text/plain; charset=us-ascii"
|
||||||
|
// Split it, and take second field
|
||||||
|
list<string> res;
|
||||||
|
ConfTree::stringToStrings(result, res);
|
||||||
|
if (res.size() <= 1)
|
||||||
|
return "";
|
||||||
|
list<string>::iterator it = res.begin();
|
||||||
|
it++;
|
||||||
|
mime = *it;
|
||||||
|
// Remove possible punctuation at the end
|
||||||
|
if (mime.length() > 0 && !isalpha(mime[mime.length() - 1]))
|
||||||
|
mime.erase(mime.length() -1);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return mime;
|
return mime;
|
||||||
}
|
}
|
||||||
|
|
||||||
string mimetype(const string &fn, ConfTree *mtypes)
|
/// Guess mime type, first from suffix, then from file data. We also
|
||||||
|
/// have a list of suffixes that we don't touch at all (ie: .jpg,
|
||||||
|
/// etc...)
|
||||||
|
string mimetype(const string &fn, ConfTree *mtypes, bool usfc)
|
||||||
{
|
{
|
||||||
if (mtypes == 0)
|
if (mtypes == 0)
|
||||||
return "";
|
return "";
|
||||||
@ -94,35 +123,46 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Look at file data ? Only when no suffix or always ?
|
// Look at file data ? Only when no suffix or always ?
|
||||||
//if (suff.empty()) // causes problems with shifted files, like
|
#if 0
|
||||||
// messages.1, messages.2 etc...
|
// Don't do this only for empty suffixes: would cause problems
|
||||||
return mimetypefromdata(fn);
|
// with shifted files, like messages.1, messages.2 etc... And others too
|
||||||
|
if (suff.empty())
|
||||||
return "";
|
#endif
|
||||||
|
return mimetypefromdata(fn, usfc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef _TEST_MIMETYPE_
|
#else // TEST->
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
const char *tvec[] = {
|
|
||||||
"/toto/tutu",
|
#include "debuglog.h"
|
||||||
"/",
|
#include "rclconfig.h"
|
||||||
"toto.txt",
|
#include "rclinit.h"
|
||||||
"toto.TXT",
|
#include "mimetype.h"
|
||||||
"toto.C.txt",
|
|
||||||
"toto.C1",
|
|
||||||
"",
|
|
||||||
};
|
|
||||||
const int n = sizeof(tvec) / sizeof(char*);
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
int main(int argc, const char **argv)
|
int main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
map<string, string>mtypes;
|
string reason;
|
||||||
mtypes[".txt"] = "text/plain";
|
RclConfig *config = recollinit(0, 0, reason);
|
||||||
|
|
||||||
for (int i = 0; i < n; i++) {
|
if (config == 0 || !config->ok()) {
|
||||||
cout << tvec[i] << " -> " << mimetype(string(tvec[i]), mtypes) << endl;
|
string str = "Configuration problem: ";
|
||||||
|
str += reason;
|
||||||
|
fprintf(stderr, "%s\n", str.c_str());
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while (--argc > 0) {
|
||||||
|
string filename = *++argv;
|
||||||
|
cout << filename << " -> " <<
|
||||||
|
mimetype(filename, config->getMimeMap(), true) << endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#endif // TEST
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#ifndef _MIMETYPE_H_INCLUDED_
|
#ifndef _MIMETYPE_H_INCLUDED_
|
||||||
#define _MIMETYPE_H_INCLUDED_
|
#define _MIMETYPE_H_INCLUDED_
|
||||||
/* @(#$Id: mimetype.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mimetype.h,v 1.3 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "conftree.h"
|
#include "conftree.h"
|
||||||
@ -11,7 +11,7 @@
|
|||||||
* This may imply more than matching the suffix, the name must be usable
|
* This may imply more than matching the suffix, the name must be usable
|
||||||
* to actually access file data.
|
* to actually access file data.
|
||||||
*/
|
*/
|
||||||
string mimetype(const std::string &filename, ConfTree *mtypes);
|
string mimetype(const std::string &filename, ConfTree *mtypes, bool usfc);
|
||||||
|
|
||||||
|
|
||||||
#endif /* _MIMETYPE_H_INCLUDED_ */
|
#endif /* _MIMETYPE_H_INCLUDED_ */
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.6 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.7 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@ -86,7 +86,18 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
|||||||
const string& td)
|
const string& td)
|
||||||
: fn(f), config(cnf), tdir(td), handler(0)
|
: fn(f), config(cnf), tdir(td), handler(0)
|
||||||
{
|
{
|
||||||
mime = mimetype(fn, config->getMimeMap());
|
// Note that we are actually going to access the file, so that it's ok
|
||||||
|
// to check this config variable at every call even if it can only change
|
||||||
|
// when we change directories
|
||||||
|
string usfc;
|
||||||
|
int usfci;
|
||||||
|
if (!cnf->getConfParam("usesystemfilecommand", usfc))
|
||||||
|
usfci = 0;
|
||||||
|
else
|
||||||
|
usfci = atoi(usfc.c_str()) ? 1 : 0;
|
||||||
|
LOGDEB1(("FileInterner::FileInterner: usfci now %d\n", usfci));
|
||||||
|
|
||||||
|
mime = mimetype(fn, config->getMimeMap(), usfci);
|
||||||
if (mime.empty()) {
|
if (mime.empty()) {
|
||||||
// No mime type: not listed in our map, or present in stop list
|
// No mime type: not listed in our map, or present in stop list
|
||||||
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", fn.c_str()));
|
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", fn.c_str()));
|
||||||
@ -104,7 +115,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
|||||||
LOGDEB(("internfile: after ucomp: tdir %s, tfile %s\n",
|
LOGDEB(("internfile: after ucomp: tdir %s, tfile %s\n",
|
||||||
tdir.c_str(), tfile.c_str()));
|
tdir.c_str(), tfile.c_str()));
|
||||||
fn = tfile;
|
fn = tfile;
|
||||||
mime = mimetype(fn, config->getMimeMap());
|
mime = mimetype(fn, config->getMimeMap(), usfci);
|
||||||
if (mime.empty()) {
|
if (mime.empty()) {
|
||||||
// No mime type ?? pass on.
|
// No mime type ?? pass on.
|
||||||
LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str()));
|
LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str()));
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# @(#$Id: recoll.conf,v 1.5 2005-10-20 11:38:53 dockes Exp $ (C) 2004 J.F.Dockes
|
# @(#$Id: recoll.conf,v 1.6 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes
|
||||||
|
|
||||||
# Recoll default configuration file. This should be copied to
|
# Recoll default configuration file. This should be copied to
|
||||||
# ~/.recoll/recoll.conf
|
# ~/.recoll/recoll.conf
|
||||||
@ -40,6 +40,11 @@ defaultlanguage = english
|
|||||||
# Guessing charsets usually does not work well
|
# Guessing charsets usually does not work well
|
||||||
guesscharset = 0
|
guesscharset = 0
|
||||||
|
|
||||||
|
# Should we use the system's 'file -i' command as a final step in file type
|
||||||
|
# identification ? This may be useful, but will usually cause the
|
||||||
|
# indexation of many bogus 'text' files
|
||||||
|
usesystemfilecommand = 1
|
||||||
|
|
||||||
# You could specify different parameters for a subdirectory like this. No
|
# You could specify different parameters for a subdirectory like this. No
|
||||||
# tilde substitution there for now, sorry:
|
# tilde substitution there for now, sorry:
|
||||||
#[/home/me/englishdocs/plain]
|
#[/home/me/englishdocs/plain]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user