*** empty log message ***
This commit is contained in:
parent
50b927f65c
commit
68fb37b5f9
40
src/common/rclinit.cpp
Normal file
40
src/common/rclinit.cpp
Normal file
@ -0,0 +1,40 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclinit.cpp,v 1.1 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "rclconfig.h"
|
||||
|
||||
RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int))
|
||||
{
|
||||
atexit(cleanup);
|
||||
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGHUP, sigcleanup);
|
||||
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGINT, sigcleanup);
|
||||
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGQUIT, sigcleanup);
|
||||
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGTERM, sigcleanup);
|
||||
|
||||
DebugLog::getdbl()->setloglevel(DEBDEB1);
|
||||
DebugLog::setfilename("stderr");
|
||||
RclConfig *config = new RclConfig;
|
||||
if (!config || !config->ok()) {
|
||||
fprintf(stderr, "Config could not be built\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
string logfilename, loglevel;
|
||||
if (config->getConfParam(string("logfilename"), logfilename))
|
||||
DebugLog::setfilename(logfilename.c_str());
|
||||
if (config->getConfParam(string("loglevel"), loglevel)) {
|
||||
int lev = atoi(loglevel.c_str());
|
||||
DebugLog::getdbl()->setloglevel(lev);
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
9
src/common/rclinit.h
Normal file
9
src/common/rclinit.h
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef _RCLINIT_H_INCLUDED_
|
||||
#define _RCLINIT_H_INCLUDED_
|
||||
/* @(#$Id: rclinit.h,v 1.1 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
class RclConfig;
|
||||
|
||||
extern RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int));
|
||||
|
||||
#endif /* _RCLINIT_H_INCLUDED_ */
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.7 2005-04-04 13:18:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.8 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
@ -13,6 +13,7 @@ using std::list;
|
||||
#include "debuglog.h"
|
||||
#include "execmd.h"
|
||||
#include "conftree.h"
|
||||
#include "smallut.h"
|
||||
|
||||
static string mimetypefromdata(const string &fn)
|
||||
{
|
||||
@ -61,8 +62,9 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
||||
it != stoplist.end(); it++) {
|
||||
if (it->length() > fn.length())
|
||||
continue;
|
||||
if (!fn.compare(fn.length() - it->length(), string::npos, *it)) {
|
||||
LOGDEB1(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
||||
if (!stringicmp(fn.substr(fn.length() - it->length(),string::npos),
|
||||
*it)) {
|
||||
LOGINFO(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
|
||||
it->c_str()));
|
||||
return "";
|
||||
}
|
||||
@ -82,13 +84,16 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
||||
return mtype;
|
||||
}
|
||||
|
||||
// Look at file data ? Only when no suffix or always
|
||||
// Look at file data ? Only when no suffix or always ?
|
||||
// Also 'file' is not that great for us. For exemple it will
|
||||
// mistake mail folders for simple text files if there is no 'Received'
|
||||
// header, which would be the case, for exemple in a 'Sent' folder. Also
|
||||
// I'm not sure that file -i exists on all systems
|
||||
//if (suff.empty())
|
||||
return mimetypefromdata(fn);
|
||||
|
||||
//if (suff.empty()) // causes problems with shifted files, like
|
||||
// messages.1, messages.2 etc...
|
||||
return mimetypefromdata(fn);
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.9 2005-02-01 08:42:56 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.10 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "rclinit.h"
|
||||
#include "indexer.h"
|
||||
|
||||
ConfIndexer *indexer;
|
||||
@ -25,24 +26,9 @@ static void sigcleanup(int sig)
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
atexit(cleanup);
|
||||
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGHUP, sigcleanup);
|
||||
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGINT, sigcleanup);
|
||||
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGQUIT, sigcleanup);
|
||||
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGTERM, sigcleanup);
|
||||
RclConfig *config = recollinit(cleanup, sigcleanup);
|
||||
|
||||
DebugLog::getdbl()->setloglevel(DEBDEB1);
|
||||
DebugLog::setfilename("stderr");
|
||||
RclConfig config;
|
||||
if (!config.ok()) {
|
||||
fprintf(stderr, "Config could not be built\n");
|
||||
exit(1);
|
||||
}
|
||||
indexer = new ConfIndexer(&config);
|
||||
|
||||
indexer = new ConfIndexer(config);
|
||||
|
||||
exit(!indexer->index());
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.4 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.5 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
@ -88,7 +88,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
|
||||
{
|
||||
mime = mimetype(fn, config->getMimeMap());
|
||||
if (mime.empty()) {
|
||||
// No mime type: not listed in our map.
|
||||
// No mime type: not listed in our map, or present in stop list
|
||||
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", fn.c_str()));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.3 2005-04-04 13:18:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.4 2005-04-05 09:35:35 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
@ -202,7 +202,7 @@ MimeHandlerMail::processone(const string &fn, Binc::MimeDocument& doc,
|
||||
doc.isMultipart(), doc.getSubType().c_str()));
|
||||
walkmime(conf, docout.text, doc, 0);
|
||||
|
||||
//LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str()));
|
||||
// LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str()));
|
||||
return MimeHandler::MHDone;
|
||||
}
|
||||
|
||||
@ -231,21 +231,27 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
|
||||
} else if (!stringicmp("alternative", doc.getSubType())) {
|
||||
std::vector<Binc::MimePart>::iterator ittxt, ithtml;
|
||||
ittxt = ithtml = doc.members.end();
|
||||
for (it = doc.members.begin(); it != doc.members.end();it++) {
|
||||
int i = 1;
|
||||
for (it = doc.members.begin(); it != doc.members.end();it++, i++) {
|
||||
// Get and parse content-type header
|
||||
Binc::HeaderItem hi;
|
||||
if (!doc.h.getFirstHeader("Content-Type", hi))
|
||||
if (!it->h.getFirstHeader("Content-Type", hi)) {
|
||||
LOGDEB(("No content-type header for part %d\n", i));
|
||||
continue;
|
||||
}
|
||||
MimeHeaderValue content_type;
|
||||
parseMimeHeaderValue(hi.getValue(), content_type);
|
||||
LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
|
||||
if (!stringlowercmp("text/plain", content_type.value))
|
||||
ittxt = it;
|
||||
else if (!stringlowercmp("text/html", content_type.value))
|
||||
ithtml = it;
|
||||
}
|
||||
if (ittxt != doc.members.end()) {
|
||||
LOGDEB2(("walkmime: alternative: chose text/plain part\n"))
|
||||
walkmime(cnf, out, *ittxt, depth+1);
|
||||
} else if (ithtml != doc.members.end()) {
|
||||
LOGDEB2(("walkmime: alternative: chose text/html part\n"))
|
||||
walkmime(cnf, out, *ithtml, depth+1);
|
||||
}
|
||||
}
|
||||
@ -337,5 +343,6 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
|
||||
}
|
||||
|
||||
out += string("\r\n") + transcoded;
|
||||
LOGDEB2(("walkmime: out now: [%s]\n", out.c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -10,7 +10,7 @@ OBJS = conftree.o csguess.o debuglog.o \
|
||||
execmd.o wipedir.o \
|
||||
fstreewalk.o html.o mail.o htmlparse.o indexer.o internfile.o \
|
||||
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
|
||||
rclconfig.o rcldb.o readfile.o smallut.o \
|
||||
rclconfig.o rcldb.o rclinit.o readfile.o smallut.o \
|
||||
textsplit.o transcode.o \
|
||||
unacpp.o unac.o
|
||||
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
|
||||
@ -20,8 +20,8 @@ SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
|
||||
../index/indexer.cpp ../common/internfile.cpp \
|
||||
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
|
||||
../common/myhtmlparse.cpp ../utils/pathut.cpp \
|
||||
../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \
|
||||
../utils/smallut.cpp \
|
||||
../common/rclconfig.cpp ../common/rcldb.cpp ../common/rclinit.cpp \
|
||||
../utils/readfile.cpp ../utils/smallut.cpp \
|
||||
../common/textsplit.cpp ../utils/transcode.cpp \
|
||||
../common/unacpp.cpp ../unac/unac.c
|
||||
|
||||
@ -66,6 +66,8 @@ pathut.o : ../utils/pathut.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
rclconfig.o : ../common/rclconfig.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
rclinit.o : ../common/rclinit.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
rcldb.o : ../common/rcldb.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
readfile.o : ../utils/readfile.cpp
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
#include "recoll.h"
|
||||
#include "smallut.h"
|
||||
#include "wipedir.h"
|
||||
#include "rclinit.h"
|
||||
|
||||
RclConfig *rclconfig;
|
||||
Rcl::Db *rcldb;
|
||||
@ -33,10 +34,9 @@ void recollCleanup()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void sigcleanup(int)
|
||||
{
|
||||
fprintf(stderr, "sigcleanup\n");
|
||||
// fprintf(stderr, "sigcleanup\n");
|
||||
// Cant call exit from here, because the atexit cleanup does some
|
||||
// thread stuff that we can't do from signal context.
|
||||
// Just set a flag and let the watchdog timer do the work
|
||||
@ -54,23 +54,14 @@ int main( int argc, char ** argv )
|
||||
w.connect(timer, SIGNAL(timeout()), &w, SLOT(checkExit()));
|
||||
timer->start(100);
|
||||
|
||||
atexit(recollCleanup);
|
||||
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGHUP, sigcleanup);
|
||||
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGINT, sigcleanup);
|
||||
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGQUIT, sigcleanup);
|
||||
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGTERM, sigcleanup);
|
||||
rclconfig = recollinit(recollCleanup, sigcleanup);
|
||||
|
||||
|
||||
rclconfig = new RclConfig;
|
||||
if (!rclconfig || !rclconfig->ok()) {
|
||||
QMessageBox::critical(0, "Recoll",
|
||||
QString("Could not find configuration"));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
string dbdir;
|
||||
if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
|
||||
// Note: this will have to be replaced by a call to a
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.26 2005-04-04 13:18:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.27 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -317,9 +317,17 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
||||
splitter.text_to_words(noacc);
|
||||
|
||||
newdocument.add_term("T" + doc.mimetype);
|
||||
string pathterm = doc.ipath.empty() ?
|
||||
"P" + fn : "P" + fn + "|" + doc.ipath;
|
||||
|
||||
string pathterm = "P" + fn;
|
||||
newdocument.add_term(pathterm);
|
||||
|
||||
string uniterm;
|
||||
if (!doc.ipath.empty()) {
|
||||
uniterm = "Q" + fn + "|" + doc.ipath;
|
||||
newdocument.add_term(uniterm);
|
||||
}
|
||||
|
||||
|
||||
const char *fnc = fn.c_str();
|
||||
|
||||
// Document data record. omindex has the following nl separated fields:
|
||||
@ -342,7 +350,6 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
||||
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
||||
newdocument.set_data(record);
|
||||
|
||||
|
||||
time_t mtime = atol(doc.mtime.c_str());
|
||||
struct tm *tm = localtime(&mtime);
|
||||
char buf[9];
|
||||
@ -360,7 +367,8 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
||||
// entry.
|
||||
try {
|
||||
Xapian::docid did =
|
||||
ndb->wdb.replace_document(pathterm, newdocument);
|
||||
ndb->wdb.replace_document(uniterm.empty() ? pathterm : uniterm,
|
||||
newdocument);
|
||||
if (did < ndb->updated.size()) {
|
||||
ndb->updated[did] = true;
|
||||
LOGDEB(("Rcl::Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
||||
@ -385,33 +393,44 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
|
||||
// If no document exist with this path, we do need update
|
||||
string pathterm = "P" + filename;
|
||||
if (!ndb->wdb.term_exists(pathterm)) {
|
||||
pathterm += string("|") + "1";
|
||||
if (!ndb->wdb.term_exists(pathterm)) {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Look for all documents with this path. Check the update time (once).
|
||||
// If the db is up to date, set the update flags for all documents
|
||||
Xapian::PostingIterator doc;
|
||||
try {
|
||||
Xapian::PostingIterator did = ndb->wdb.postlist_begin(pathterm);
|
||||
if (did == ndb->wdb.postlist_end(pathterm))
|
||||
return true;
|
||||
Xapian::Document doc = ndb->wdb.get_document(*did);
|
||||
string data = doc.get_data();
|
||||
const char *cp = strstr(data.c_str(), "mtime=");
|
||||
cp += 6;
|
||||
long mtime = atol(cp);
|
||||
if (mtime >= stp->st_mtime) {
|
||||
Xapian::PostingIterator did0 = ndb->wdb.postlist_begin(pathterm);
|
||||
for (Xapian::PostingIterator did = did0;
|
||||
did != ndb->wdb.postlist_end(pathterm); did++) {
|
||||
|
||||
Xapian::Document doc = ndb->wdb.get_document(*did);
|
||||
|
||||
// Check the date once. no need to look at the others if the
|
||||
// db needs updating.
|
||||
if (did == did0) {
|
||||
string data = doc.get_data();
|
||||
const char *cp = strstr(data.c_str(), "mtime=");
|
||||
cp += 6;
|
||||
long mtime = atol(cp);
|
||||
if (mtime < stp->st_mtime) {
|
||||
// Db is not up to date. Let's index the file
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Db is up to date. Make a note that this document exists.
|
||||
if (*did < ndb->updated.size())
|
||||
ndb->updated[*did] = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} catch (...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Compute name of stem db for given base database and language
|
||||
@ -582,7 +601,7 @@ bool Rcl::Db::purge()
|
||||
ndb->wdb.delete_document(did);
|
||||
LOGDEB(("Rcl::Db::purge: deleted document #%d\n", did));
|
||||
} catch (const Xapian::DocNotFoundError &) {
|
||||
LOGDEB(("Rcl::Db::purge: document #%d not found\n", did));
|
||||
LOGDEB2(("Rcl::Db::purge: document #%d not found\n", did));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: mimemap,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: mimemap,v 1.4 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll: associations of file name extensions to mime types
|
||||
.txt = text/plain
|
||||
@ -37,8 +37,12 @@
|
||||
.rtf = text/rtf
|
||||
|
||||
|
||||
# A list of stuff that we don't want to touch at all
|
||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz
|
||||
# A list of stuff that we don't want to touch at all (for now). Having the
|
||||
# suffixes listed in there speeds up things quite a lot by avoiding
|
||||
# unneeded decompression or 'file' calls
|
||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz \
|
||||
.c .h .cpp .m4 .tcl .js .sh .pl .awk \
|
||||
.o .lib .dll .a \
|
||||
.dat .bak .rdf .log .db .ini .gnm .gnumeric \
|
||||
.jpg .gif .bmp .xpm
|
||||
|
||||
[FILE]
|
||||
# This section for future non suffix-based extension (ie detect mail folders)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user