*** empty log message ***

This commit is contained in:
dockes 2005-04-05 09:35:35 +00:00
parent 50b927f65c
commit 68fb37b5f9
10 changed files with 137 additions and 74 deletions

40
src/common/rclinit.cpp Normal file
View File

@ -0,0 +1,40 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclinit.cpp,v 1.1 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <stdio.h>
#include <signal.h>
#include "debuglog.h"
#include "rclconfig.h"
RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int))
{
atexit(cleanup);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, sigcleanup);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, sigcleanup);
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
signal(SIGQUIT, sigcleanup);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
signal(SIGTERM, sigcleanup);
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
RclConfig *config = new RclConfig;
if (!config || !config->ok()) {
fprintf(stderr, "Config could not be built\n");
exit(1);
}
string logfilename, loglevel;
if (config->getConfParam(string("logfilename"), logfilename))
DebugLog::setfilename(logfilename.c_str());
if (config->getConfParam(string("loglevel"), loglevel)) {
int lev = atoi(loglevel.c_str());
DebugLog::getdbl()->setloglevel(lev);
}
return config;
}

9
src/common/rclinit.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef _RCLINIT_H_INCLUDED_
#define _RCLINIT_H_INCLUDED_
/* @(#$Id: rclinit.h,v 1.1 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes */
class RclConfig;
extern RclConfig *recollinit(void (*cleanup)(void), void (*sigcleanup)(int));
#endif /* _RCLINIT_H_INCLUDED_ */

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.7 2005-04-04 13:18:46 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.8 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <ctype.h>
@ -13,6 +13,7 @@ using std::list;
#include "debuglog.h"
#include "execmd.h"
#include "conftree.h"
#include "smallut.h"
static string mimetypefromdata(const string &fn)
{
@ -61,8 +62,9 @@ string mimetype(const string &fn, ConfTree *mtypes)
it != stoplist.end(); it++) {
if (it->length() > fn.length())
continue;
if (!fn.compare(fn.length() - it->length(), string::npos, *it)) {
LOGDEB1(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
if (!stringicmp(fn.substr(fn.length() - it->length(),string::npos),
*it)) {
LOGINFO(("mimetype: fn %s in stoplist (%s)\n", fn.c_str(),
it->c_str()));
return "";
}
@ -82,13 +84,16 @@ string mimetype(const string &fn, ConfTree *mtypes)
return mtype;
}
// Look at file data ? Only when no suffix or always
// Look at file data ? Only when no suffix or always ?
// Also 'file' is not that great for us. For exemple it will
// mistake mail folders for simple text files if there is no 'Received'
// header, which would be the case, for exemple in a 'Sent' folder. Also
// I'm not sure that file -i exists on all systems
//if (suff.empty())
//if (suff.empty()) // causes problems with shifted files, like
// messages.1, messages.2 etc...
return mimetypefromdata(fn);
return "";
}

View File

@ -1,11 +1,12 @@
#ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.9 2005-02-01 08:42:56 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.10 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <stdio.h>
#include <signal.h>
#include "debuglog.h"
#include "rclinit.h"
#include "indexer.h"
ConfIndexer *indexer;
@ -25,24 +26,9 @@ static void sigcleanup(int sig)
int main(int argc, const char **argv)
{
atexit(cleanup);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, sigcleanup);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, sigcleanup);
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
signal(SIGQUIT, sigcleanup);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
signal(SIGTERM, sigcleanup);
RclConfig *config = recollinit(cleanup, sigcleanup);
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
RclConfig config;
if (!config.ok()) {
fprintf(stderr, "Config could not be built\n");
exit(1);
}
indexer = new ConfIndexer(&config);
indexer = new ConfIndexer(config);
exit(!indexer->index());
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.4 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.5 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <unistd.h>
#include <sys/types.h>
@ -88,7 +88,7 @@ FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
{
mime = mimetype(fn, config->getMimeMap());
if (mime.empty()) {
// No mime type: not listed in our map.
// No mime type: not listed in our map, or present in stop list
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", fn.c_str()));
return;
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.3 2005-04-04 13:18:46 dockes Exp $ (C) 2005 J.F.Dockes";
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.4 2005-04-05 09:35:35 dockes Exp $ (C) 2005 J.F.Dockes";
#endif
#include <stdio.h>
@ -231,21 +231,27 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
} else if (!stringicmp("alternative", doc.getSubType())) {
std::vector<Binc::MimePart>::iterator ittxt, ithtml;
ittxt = ithtml = doc.members.end();
for (it = doc.members.begin(); it != doc.members.end();it++) {
int i = 1;
for (it = doc.members.begin(); it != doc.members.end();it++, i++) {
// Get and parse content-type header
Binc::HeaderItem hi;
if (!doc.h.getFirstHeader("Content-Type", hi))
if (!it->h.getFirstHeader("Content-Type", hi)) {
LOGDEB(("No content-type header for part %d\n", i));
continue;
}
MimeHeaderValue content_type;
parseMimeHeaderValue(hi.getValue(), content_type);
LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
if (!stringlowercmp("text/plain", content_type.value))
ittxt = it;
else if (!stringlowercmp("text/html", content_type.value))
ithtml = it;
}
if (ittxt != doc.members.end()) {
LOGDEB2(("walkmime: alternative: chose text/plain part\n"))
walkmime(cnf, out, *ittxt, depth+1);
} else if (ithtml != doc.members.end()) {
LOGDEB2(("walkmime: alternative: chose text/html part\n"))
walkmime(cnf, out, *ithtml, depth+1);
}
}
@ -337,5 +343,6 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
}
out += string("\r\n") + transcoded;
LOGDEB2(("walkmime: out now: [%s]\n", out.c_str()));
}
}

View File

@ -10,7 +10,7 @@ OBJS = conftree.o csguess.o debuglog.o \
execmd.o wipedir.o \
fstreewalk.o html.o mail.o htmlparse.o indexer.o internfile.o \
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
rclconfig.o rcldb.o readfile.o smallut.o \
rclconfig.o rcldb.o rclinit.o readfile.o smallut.o \
textsplit.o transcode.o \
unacpp.o unac.o
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
@ -20,8 +20,8 @@ SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
../index/indexer.cpp ../common/internfile.cpp \
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
../common/myhtmlparse.cpp ../utils/pathut.cpp \
../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \
../utils/smallut.cpp \
../common/rclconfig.cpp ../common/rcldb.cpp ../common/rclinit.cpp \
../utils/readfile.cpp ../utils/smallut.cpp \
../common/textsplit.cpp ../utils/transcode.cpp \
../common/unacpp.cpp ../unac/unac.c
@ -66,6 +66,8 @@ pathut.o : ../utils/pathut.cpp
$(CXX) $(CXXFLAGS) -c $<
rclconfig.o : ../common/rclconfig.cpp
$(CXX) $(CXXFLAGS) -c $<
rclinit.o : ../common/rclinit.cpp
$(CXX) $(CXXFLAGS) -c $<
rcldb.o : ../common/rcldb.cpp
$(CXX) $(CXXFLAGS) -c $<
readfile.o : ../utils/readfile.cpp

View File

@ -12,6 +12,7 @@
#include "recoll.h"
#include "smallut.h"
#include "wipedir.h"
#include "rclinit.h"
RclConfig *rclconfig;
Rcl::Db *rcldb;
@ -33,10 +34,9 @@ void recollCleanup()
}
}
static void sigcleanup(int)
{
fprintf(stderr, "sigcleanup\n");
// fprintf(stderr, "sigcleanup\n");
// Cant call exit from here, because the atexit cleanup does some
// thread stuff that we can't do from signal context.
// Just set a flag and let the watchdog timer do the work
@ -54,23 +54,14 @@ int main( int argc, char ** argv )
w.connect(timer, SIGNAL(timeout()), &w, SLOT(checkExit()));
timer->start(100);
atexit(recollCleanup);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, sigcleanup);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, sigcleanup);
if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
signal(SIGQUIT, sigcleanup);
if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
signal(SIGTERM, sigcleanup);
rclconfig = recollinit(recollCleanup, sigcleanup);
rclconfig = new RclConfig;
if (!rclconfig || !rclconfig->ok()) {
QMessageBox::critical(0, "Recoll",
QString("Could not find configuration"));
exit(1);
}
string dbdir;
if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
// Note: this will have to be replaced by a call to a

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.26 2005-04-04 13:18:46 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.27 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <stdio.h>
#include <sys/stat.h>
@ -317,9 +317,17 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
splitter.text_to_words(noacc);
newdocument.add_term("T" + doc.mimetype);
string pathterm = doc.ipath.empty() ?
"P" + fn : "P" + fn + "|" + doc.ipath;
string pathterm = "P" + fn;
newdocument.add_term(pathterm);
string uniterm;
if (!doc.ipath.empty()) {
uniterm = "Q" + fn + "|" + doc.ipath;
newdocument.add_term(uniterm);
}
const char *fnc = fn.c_str();
// Document data record. omindex has the following nl separated fields:
@ -342,7 +350,6 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
newdocument.set_data(record);
time_t mtime = atol(doc.mtime.c_str());
struct tm *tm = localtime(&mtime);
char buf[9];
@ -360,7 +367,8 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
// entry.
try {
Xapian::docid did =
ndb->wdb.replace_document(pathterm, newdocument);
ndb->wdb.replace_document(uniterm.empty() ? pathterm : uniterm,
newdocument);
if (did < ndb->updated.size()) {
ndb->updated[did] = true;
LOGDEB(("Rcl::Db::add: docid %d updated [%s , %s]\n", did, fnc,
@ -385,33 +393,44 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
return false;
Native *ndb = (Native *)pdata;
// If no document exist with this path, we do need update
string pathterm = "P" + filename;
if (!ndb->wdb.term_exists(pathterm)) {
pathterm += string("|") + "1";
if (!ndb->wdb.term_exists(pathterm)) {
return true;
}
}
// Look for all documents with this path. Check the update time (once).
// If the db is up to date, set the update flags for all documents
Xapian::PostingIterator doc;
try {
Xapian::PostingIterator did = ndb->wdb.postlist_begin(pathterm);
if (did == ndb->wdb.postlist_end(pathterm))
return true;
Xapian::PostingIterator did0 = ndb->wdb.postlist_begin(pathterm);
for (Xapian::PostingIterator did = did0;
did != ndb->wdb.postlist_end(pathterm); did++) {
Xapian::Document doc = ndb->wdb.get_document(*did);
// Check the date once. no need to look at the others if the
// db needs updating.
if (did == did0) {
string data = doc.get_data();
const char *cp = strstr(data.c_str(), "mtime=");
cp += 6;
long mtime = atol(cp);
if (mtime >= stp->st_mtime) {
if (mtime < stp->st_mtime) {
// Db is not up to date. Let's index the file
return true;
}
}
// Db is up to date. Make a note that this document exists.
if (*did < ndb->updated.size())
ndb->updated[*did] = true;
return false;
}
} catch (...) {
return true;
}
return true;
return false;
}
/// Compute name of stem db for given base database and language
@ -582,7 +601,7 @@ bool Rcl::Db::purge()
ndb->wdb.delete_document(did);
LOGDEB(("Rcl::Db::purge: deleted document #%d\n", did));
} catch (const Xapian::DocNotFoundError &) {
LOGDEB(("Rcl::Db::purge: document #%d not found\n", did));
LOGDEB2(("Rcl::Db::purge: document #%d not found\n", did));
}
}
}

View File

@ -1,4 +1,4 @@
# @(#$Id: mimemap,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimemap,v 1.4 2005-04-05 09:35:35 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll: associations of file name extensions to mime types
.txt = text/plain
@ -37,8 +37,12 @@
.rtf = text/rtf
# A list of stuff that we don't want to touch at all
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz
# A list of stuff that we don't want to touch at all (for now). Having the
# suffixes listed in there speeds up things quite a lot by avoiding
# unneeded decompression or 'file' calls
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz \
.c .h .cpp .m4 .tcl .js .sh .pl .awk \
.o .lib .dll .a \
.dat .bak .rdf .log .db .ini .gnm .gnumeric \
.jpg .gif .bmp .xpm
[FILE]
# This section for future non suffix-based extension (ie detect mail folders)