first incarnation of indexing thread

This commit is contained in:
dockes 2005-01-31 14:31:10 +00:00
parent a9200df71a
commit a93e619960
18 changed files with 601 additions and 259 deletions

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.4 2005-01-25 14:37:20 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.5 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <iostream>
@ -9,22 +9,18 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.4 2005-01-25 14:37:20 dockes Ex
#include "conftree.h"
#include "debuglog.h"
static DebugLog debuglog;
DebugLog *dbl = &debuglog;
class loginitializer {
public:
loginitializer() {
dbl->setlogfilename("stderr");
dbl->setloglevel(10);
}
};
static loginitializer lgntlzr;
using namespace std;
RclConfig::RclConfig()
: m_ok(false), conf(0), mimemap(0), mimeconf(0)
{
static int loginit = 0;
if (!loginit) {
DebugLog::setfilename("stderr");
DebugLog::getdbl()->setloglevel(10);
loginit = 1;
}
const char *cp = getenv("RECOLL_CONFDIR");
if (cp) {
confdir = cp;

203
src/index/indexer.cpp Normal file
View File

@ -0,0 +1,203 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.1 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
#include <strings.h>
#include <iostream>
#include <list>
#include <map>
#include "pathut.h"
#include "conftree.h"
#include "rclconfig.h"
#include "fstreewalk.h"
#include "mimetype.h"
#include "rcldb.h"
#include "readfile.h"
#include "indexer.h"
#include "csguess.h"
#include "transcode.h"
#include "mimehandler.h"
#include "debuglog.h"
using namespace std;
#ifndef deleteZ
#define deleteZ(X) {delete X;X = 0;}
#endif
/**
* Bunch holder for data used while indexing a directory tree
*/
class DbIndexer {
FsTreeWalker walker;
RclConfig *config;
string dbdir;
list<string> *topdirs;
Rcl::Db db;
public:
DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top)
: config(cnf), dbdir(dbd), topdirs(top)
{ }
friend FsTreeWalker::Status
indexfile(void *, const std::string &, const struct stat *,
FsTreeWalker::CbFlag);
bool index();
};
bool DbIndexer::index()
{
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
LOGERR(("DbIndexer::index: error opening database in %s\n",
dbdir.c_str()));
return false;
}
for (list<string>::const_iterator it = topdirs->begin();
it != topdirs->end(); it++) {
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
dbdir.c_str()));
if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) {
LOGERR(("DbIndexer::index: error while indexing %s\n",
it->c_str()));
db.close();
return false;
}
}
db.purge();
if (!db.close()) {
LOGERR(("DbIndexer::index: error closing database in %s\n",
dbdir.c_str()));
return false;
}
return true;
}
/**
* This function gets called for every file and directory found by the
* tree walker. It checks with the db if the file has changed and needs to
* be reindexed. If so, it calls an appropriate handler depending on the mime
* type, which is responsible for populating an Rcl::Doc.
* Accent and majuscule handling are performed by the db module when doing
* the actual indexing work.
*/
FsTreeWalker::Status
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
FsTreeWalker::CbFlag flg)
{
DbIndexer *me = (DbIndexer *)cdata;
// If we're changing directories, possibly adjust parameters.
if (flg == FsTreeWalker::FtwDirEnter ||
flg == FsTreeWalker::FtwDirReturn) {
me->config->setKeyDir(fn);
return FsTreeWalker::FtwOk;
}
string mime = mimetype(fn, me->config->getMimeMap());
if (mime.empty()) {
// No mime type ?? pass on.
LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str()));
return FsTreeWalker::FtwOk;
}
// Look for appropriate handler
MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
if (!fun) {
// No handler for this type, for now :(
LOGDEB(("indexfile: %s : no handler\n", mime.c_str()));
return FsTreeWalker::FtwOk;
}
LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str()));
// Check db up to date ?
if (!me->db.needUpdate(fn, stp))
return FsTreeWalker::FtwOk;
// Turn file into a document. The document has fields for title, body
// etc., all text converted to utf8
Rcl::Doc doc;
if (!fun(me->config, fn, mime, doc))
return FsTreeWalker::FtwOk;
// Set up common fields:
doc.mimetype = mime;
char ascdate[20];
sprintf(ascdate, "%ld", long(stp->st_mtime));
doc.mtime = ascdate;
// Do database-specific work to update document data
if (!me->db.add(fn, doc))
return FsTreeWalker::FtwError;
return FsTreeWalker::FtwOk;
}
ConfIndexer::~ConfIndexer()
{
deleteZ(indexer);
}
bool ConfIndexer::index()
{
ConfTree *conf = config->getConfig();
// Retrieve the list of directories to be indexed.
string topdirs;
if (conf->get("topdirs", topdirs, "") == 0) {
LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
return false;
}
// Group the directories by database: it is important that all
// directories for a database be indexed at once so that deleted
// file cleanup works
vector<string> tdl; // List of directories to be indexed
if (!ConfTree::stringToStrings(topdirs, tdl)) {
LOGERR(("ConfIndexer::index: parse error for directory list\n"));
return false;
}
vector<string>::iterator dirit;
map<string, list<string> > dbmap;
map<string, list<string> >::iterator dbit;
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
string db;
string dir = path_tildexpand(*dirit);
if (conf->get("dbdir", db, dir) == 0) {
LOGERR(("ConfIndexer::index: no database directory in "
"configuration for %s\n", dir.c_str()));
return false;
}
db = path_tildexpand(db);
dbit = dbmap.find(db);
if (dbit == dbmap.end()) {
list<string> l;
l.push_back(dir);
dbmap[db] = l;
} else {
dbit->second.push_back(dir);
}
}
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
//cout << dbit->first << " -> ";
//list<string>::const_iterator dit;
//for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) {
// cout << *dit << " ";
//}
//cout << endl;
indexer = new DbIndexer(config, dbit->first, &dbit->second);
if (!indexer->index()) {
deleteZ(indexer);
return false;
}
deleteZ(indexer);
}
return true;
}

View File

@ -1,18 +1,17 @@
#ifndef _INDEXER_H_INCLUDED_
#define _INDEXER_H_INCLUDED_
/* @(#$Id: indexer.h,v 1.3 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: indexer.h,v 1.4 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */
#include "rclconfig.h"
#if 0
class FsIndexer {
const ConfTree &conf;
class DbIndexer;
class ConfIndexer {
RclConfig *config;
DbIndexer *indexer;
public:
enum runStatus {IndexerOk, IndexerError};
Indexer(const ConfTree &cnf): conf(cnf) {}
virtual ~Indexer() {}
runStatus run() = 0;
ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {}
virtual ~ConfIndexer();
bool index();
};
#endif
#endif /* _INDEXER_H_INCLUDED_ */

View File

@ -1,139 +1,13 @@
#ifndef lint
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.8 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
#include <stdio.h>
#include <signal.h>
#include <strings.h>
#include <iostream>
#include <list>
#include <map>
#include "pathut.h"
#include "conftree.h"
#include "rclconfig.h"
#include "fstreewalk.h"
#include "mimetype.h"
#include "rcldb.h"
#include "readfile.h"
#include "indexer.h"
#include "csguess.h"
#include "transcode.h"
#include "mimehandler.h"
#include "debuglog.h"
using namespace std;
/**
* Bunch holder for data used while indexing a directory tree
*/
class DirIndexer {
FsTreeWalker walker;
RclConfig *config;
list<string> *topdirs;
string dbdir;
Rcl::Db db;
public:
DirIndexer(RclConfig *cnf, const string &dbd, list<string> *top)
: config(cnf), topdirs(top), dbdir(dbd)
{ }
friend FsTreeWalker::Status
indexfile(void *, const std::string &, const struct stat *,
FsTreeWalker::CbFlag);
bool index();
};
bool DirIndexer::index()
{
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
LOGERR(("DirIndexer::index: error opening database in %s\n",
dbdir.c_str()));
return false;
}
for (list<string>::const_iterator it = topdirs->begin();
it != topdirs->end(); it++) {
LOGDEB(("DirIndexer::index: Indexing %s into %s\n", it->c_str(),
dbdir.c_str()));
if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) {
LOGERR(("DirIndexer::index: error while indexing %s\n",
it->c_str()));
db.close();
return false;
}
}
db.purge();
if (!db.close()) {
LOGERR(("DirIndexer::index: error closing database in %s\n",
dbdir.c_str()));
return false;
}
return true;
}
/**
* This function gets called for every file and directory found by the
* tree walker. It checks with the db if the file has changed and needs to
* be reindexed. If so, it calls an appropriate handler depending on the mime
* type, which is responsible for populating an Rcl::Doc.
* Accent and majuscule handling are performed by the db module when doing
* the actual indexing work.
*/
FsTreeWalker::Status
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
FsTreeWalker::CbFlag flg)
{
DirIndexer *me = (DirIndexer *)cdata;
if (flg == FsTreeWalker::FtwDirEnter ||
flg == FsTreeWalker::FtwDirReturn) {
me->config->setKeyDir(fn);
return FsTreeWalker::FtwOk;
}
string mime = mimetype(fn, me->config->getMimeMap());
if (mime.length() == 0) {
LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str()));
// No mime type ?? pass on.
return FsTreeWalker::FtwOk;
}
LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str()));
// Look for appropriate handler
MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
if (!fun) {
// No handler for this type, for now :(
return FsTreeWalker::FtwOk;
}
if (!me->db.needUpdate(fn, stp))
return FsTreeWalker::FtwOk;
// Turn file into a document. The document has fields for title, body
// etc., all text converted to utf8
Rcl::Doc doc;
if (!fun(me->config, fn, mime, doc))
return FsTreeWalker::FtwOk;
// Set up common fields:
doc.mimetype = mime;
char ascdate[20];
sprintf(ascdate, "%ld", long(stp->st_mtime));
doc.mtime = ascdate;
// Set up xapian document, add postings and misc fields,
// add to or update database.
if (!me->db.add(fn, doc))
return FsTreeWalker::FtwError;
return FsTreeWalker::FtwOk;
}
DirIndexer *indexer;
ConfIndexer *indexer;
static void cleanup()
{
@ -161,61 +35,11 @@ int main(int argc, const char **argv)
signal(SIGTERM, sigcleanup);
RclConfig config;
if (!config.ok())
cerr << "Config could not be built" << endl;
ConfTree *conf = config.getConfig();
// Retrieve the list of directories to be indexed.
string topdirs;
if (conf->get("topdirs", topdirs, "") == 0) {
cerr << "No top directories in configuration" << endl;
if (!config.ok()) {
fprintf(stderr, "Config could not be built\n");
exit(1);
}
// Group the directories by database: it is important that all
// directories for a database be indexed at once so that deleted
// file cleanup works
vector<string> tdl; // List of directories to be indexed
if (!ConfTree::stringToStrings(topdirs, tdl)) {
cerr << "Parse error for directory list" << endl;
exit(1);
}
vector<string>::iterator dirit;
map<string, list<string> > dbmap;
map<string, list<string> >::iterator dbit;
for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
string db;
if (conf->get("dbdir", db, *dirit) == 0) {
cerr << "No database directory in configuration for "
<< *dirit << endl;
exit(1);
}
dbit = dbmap.find(db);
if (dbit == dbmap.end()) {
list<string> l;
l.push_back(*dirit);
dbmap[db] = l;
} else {
dbit->second.push_back(*dirit);
}
}
for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
cout << dbit->first << " -> ";
list<string>::const_iterator dit;
for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) {
cout << *dit << " ";
}
cout << endl;
indexer = new DirIndexer(&config, dbit->first, &dbit->second);
if (!indexer->index()) {
delete indexer;
indexer = 0;
exit(1);
}
delete indexer;
indexer = 0;
}
indexer = new ConfIndexer(&config);
exit(!indexer->index());
}

View File

@ -8,13 +8,14 @@ LIBS = librcl.a
all: $(LIBS)
OBJS = conftree.o csguess.o debuglog.o \
fstreewalk.o html.o htmlparse.o \
fstreewalk.o html.o htmlparse.o indexer.o \
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
rclconfig.o rcldb.o readfile.o smallut.o \
textsplit.o transcode.o \
unacpp.o unac.o
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
../index/indexer.cpp \
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
../common/myhtmlparse.cpp ../utils/pathut.cpp \
../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \
@ -41,6 +42,8 @@ html.o : ../common/html.cpp
$(CXX) $(CXXFLAGS) -c $<
htmlparse.o : ../common/htmlparse.cpp
$(CXX) $(CXXFLAGS) -c $<
indexer.o : ../index/indexer.cpp
$(CXX) $(CXXFLAGS) -c $<
mimehandler.o : ../common/mimehandler.cpp
$(CXX) $(CXXFLAGS) -c $<
mimeparse.o : ../utils/mimeparse.cpp

50
src/qtgui/idxthread.cpp Normal file
View File

@ -0,0 +1,50 @@
#include <stdio.h>
#include <qthread.h>
#include "indexer.h"
#include "debuglog.h"
class IdxThread : public QThread {
virtual void run();
public:
ConfIndexer *indexer;
};
int startindexing;
int indexingdone;
bool indexingstatus;
int stopidxthread;
void IdxThread::run()
{
DebugLog::getdbl()->setloglevel(DEBDEB1);
for (;;) {
if (stopidxthread) {
delete indexer;
return;
}
if (startindexing) {
indexingdone = indexingstatus = startindexing = 0;
fprintf(stderr, "Index thread :start index\n");
indexingstatus = indexer->index();
indexingdone = 1;
}
msleep(100);
}
}
static IdxThread idxthread;
void start_idxthread(RclConfig *cnf)
{
ConfIndexer *ix = new ConfIndexer(cnf);
idxthread.indexer = ix;
idxthread.start();
}
void stop_idxthread()
{
stopidxthread = 1;
while (idxthread.running())
sleep(1);
}

View File

@ -1,35 +1,54 @@
#include <signal.h>
#include <qapplication.h>
#include <qthread.h>
#include <qtimer.h>
#include <qmessagebox.h>
#include "recollmain.h"
#include "rcldb.h"
#include "rclconfig.h"
#include "pathut.h"
RclConfig *rclconfig;
Rcl::Db *rcldb;
static void cleanup()
extern void start_idxthread(RclConfig *cnf);
extern void stop_idxthread();
extern int startindexing;
void recollCleanup()
{
stop_idxthread();
delete rcldb;
rcldb = 0;
delete rclconfig;
rclconfig = 0;
}
int recollNeedsExit;
static void sigcleanup(int sig)
{
fprintf(stderr, "sigcleanup\n");
cleanup();
exit(1);
// Cant call exit from here, because the atexit cleanup does some
// thread stuff that we can't do from signal context.
// Just set a flag and let the watchdog timer do the work
recollNeedsExit = 1;
}
int main( int argc, char ** argv )
{
QApplication a( argc, argv );
QApplication a(argc, argv);
RecollMain w;
w.show();
a.connect( &a, SIGNAL( lastWindowClosed() ), &a, SLOT( quit() ) );
a.connect(&a, SIGNAL(lastWindowClosed()), &a, SLOT(quit()));
QTimer *timer = new QTimer(&a);
w.connect(timer, SIGNAL(timeout()), &w, SLOT(checkExit()));
timer->start(100);
atexit(cleanup);
atexit(recollCleanup);
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, sigcleanup);
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
@ -54,14 +73,19 @@ int main( int argc, char ** argv )
QString("No db directory in configuration"));
exit(1);
}
dbdir = path_tildexpand(dbdir);
rcldb = new Rcl::Db;
if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
QMessageBox::critical(0, "Recoll",
QString("Could not open database in ") +
QString(dbdir));
exit(1);
startindexing = 1;
QMessageBox::information(0, "Recoll",
QString("Could not open database in ") +
QString(dbdir) + ". Starting indexation");
startindexing = 1;
}
start_idxthread(rclconfig);
return a.exec();
}

View File

@ -3,7 +3,8 @@ LANGUAGE = C++
CONFIG += qt warn_on release
SOURCES += main.cpp
SOURCES += main.cpp \
idxthread.cpp
FORMS = recollmain.ui

View File

@ -214,12 +214,6 @@
</action>
</actions>
<connections>
<connection>
<sender>fileExitAction</sender>
<signal>activated()</signal>
<receiver>RecollMain</receiver>
<slot>fileExit()</slot>
</connection>
<connection>
<sender>fileExitAction</sender>
<signal>activated()</signal>
@ -272,6 +266,7 @@
</variables>
<slots>
<slot>fileExit()</slot>
<slot>checkExit()</slot>
<slot>reslistTE_doubleClicked( int par, int car )</slot>
<slot>reslistTE_clicked( int par, int car )</slot>
<slot>queryText_returnPressed()</slot>

View File

@ -27,12 +27,19 @@
extern RclConfig *rclconfig;
extern Rcl::Db *rcldb;
extern void recollCleanup();
void RecollMain::fileExit()
{
LOGDEB(("RecollMain: fileExit\n"));
exit(0);
}
extern int recollNeedsExit;
void RecollMain::checkExit()
{
if (recollNeedsExit)
fileExit();
}
static string plaintorich(const string &in)
{
@ -175,12 +182,30 @@ void RecollMain::reslistTE_clicked(int par, int car)
void RecollMain::queryText_returnPressed()
{
LOGDEB(("RecollMain::queryText_returnPressed()\n"));
if (!rcldb->isopen()) {
string dbdir;
if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
QMessageBox::critical(0, "Recoll",
QString("No db directory in configuration"));
exit(1);
}
dbdir = path_tildexpand(dbdir);
if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
QMessageBox::information(0, "Recoll",
QString("Could not open database in ") +
QString(dbdir) + " wait for indexing " +
"to complete?");
return;
}
}
reslist_current = -1;
reslist_winfirst = -1;
QCString u8 = queryText->text().utf8();
rcldb->setQuery(string((const char *)u8));
if (!rcldb->setQuery(string((const char *)u8)))
return;
listNextPB_clicked();
}
@ -218,13 +243,18 @@ void RecollMain::listNextPB_clicked()
Rcl::Doc doc;
doc.erase();
int percent;
if (!rcldb->getDoc(reslist_winfirst + i, doc, &percent))
break;
int resCnt = rcldb->getResCnt();
int last = MIN(resCnt, reslist_winfirst+respagesize);
if (i == 0) {
reslistTE->clear();
previewTextEdit->clear();
}
if (!rcldb->getDoc(reslist_winfirst + i, doc, &percent)) {
if (i == 0)
reslist_winfirst = -1;
break;
}
int resCnt = rcldb->getResCnt();
int last = MIN(resCnt, reslist_winfirst+respagesize);
if (i == 0) {
reslistTE->append("<qt><head></head><body><p>");
char line[80];
sprintf(line, "<p><b>Displaying results %d-%d out of %d</b><br>",
@ -276,3 +306,4 @@ void RecollMain::listNextPB_clicked()
reslist_winfirst = 0;
}
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: qtry.cpp,v 1.3 2005-01-26 11:47:27 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: qtry.cpp,v 1.4 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
// Tests with the query interface
@ -15,6 +15,7 @@ static char rcsid[] = "@(#$Id: qtry.cpp,v 1.3 2005-01-26 11:47:27 dockes Exp $ (
#include "rcldb.h"
#include "transcode.h"
#include "mimehandler.h"
#include "pathut.h"
using namespace std;
@ -72,7 +73,7 @@ int main(int argc, char **argv)
cerr << "No database directory in configuration" << endl;
exit(1);
}
dbdir = path_tildexpand(dbdir);
Rcl::Db *rcldb = new Rcl::Db;
if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.13 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.14 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <sys/stat.h>
@ -87,7 +87,7 @@ bool Rcl::Db::open(const string& dir, OpenMode mode)
LOGERR(("Rcl::Db::open: already open\n"));
return false;
}
string ermsg;
try {
switch (mode) {
case DbUpd:
@ -110,15 +110,16 @@ bool Rcl::Db::open(const string& dir, OpenMode mode)
ndb->isopen = true;
return true;
} catch (const Xapian::Error &e) {
cerr << "Exception: " << e.get_msg() << endl;
ermsg = e.get_msg();
} catch (const string &s) {
cerr << "Exception: " << s << endl;
ermsg = s;
} catch (const char *s) {
cerr << "Exception: " << s << endl;
ermsg = s;
} catch (...) {
cerr << "Caught unknown exception" << endl;
ermsg = "Caught unknown exception";
}
LOGERR(("Rcl::Db::open: got exception\n"));
LOGERR(("Rcl::Db::open: exception while opening '%s': %s\n",
dir.c_str(), ermsg.c_str()));
return false;
}
@ -156,6 +157,14 @@ bool Rcl::Db::close()
return false;
}
bool Rcl::Db::isopen()
{
if (pdata == 0)
return false;
Native *ndb = (Native *)pdata;
return ndb->isopen;
}
// A small class to hold state while splitting text
class wsData {
public:
@ -366,6 +375,10 @@ static bool splitQCb(void *cdata, const std::string &term, int )
bool Rcl::Db::setQuery(const std::string &querystring)
{
LOGDEB(("Rcl::Db::setQuery: %s\n", querystring.c_str()));
Native *ndb = (Native *)pdata;
if (!ndb)
return false;
wsQData splitData;
TextSplit splitter(splitQCb, &splitData);
@ -375,7 +388,6 @@ bool Rcl::Db::setQuery(const std::string &querystring)
}
splitter.text_to_words(noacc);
Native *ndb = (Native *)pdata;
ndb->query = Xapian::Query(Xapian::Query::OP_OR, splitData.terms.begin(),
splitData.terms.end());

View File

@ -1,6 +1,6 @@
#ifndef _DB_H_INCLUDED_
#define _DB_H_INCLUDED_
/* @(#$Id: rcldb.h,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: rcldb.h,v 1.8 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -61,6 +61,7 @@ class Db {
enum OpenMode {DbRO, DbUpd, DbTrunc};
bool open(const std::string &dbdir, OpenMode mode);
bool close();
bool isopen();
// Update-related functions
bool add(const std::string &filename, const Doc &doc);

View File

@ -1,5 +1,5 @@
CXXFLAGS = -I.
CXXFLAGS = -I. -g
BIGLIB = ../lib/librcl.a
@ -15,7 +15,7 @@ trfstreewalk.o : fstreewalk.cpp fstreewalk.h
PATHUT_OBJS= trpathut.o pathut.o
trpathut : $(PATHUT_OBJS)
$(CXX) -o trpathut $(PATHUT_OBJS)
$(CXX) $(CXXFLAGS) -o trpathut $(PATHUT_OBJS)
trpathut.o : pathut.cpp pathut.h
$(CXX) -o trpathut.o -c $(CXXFLAGS) \
-DTEST_PATHUT pathut.cpp

View File

@ -1,15 +1,17 @@
#ifndef lint
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.3 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#ifndef TEST_PATHUT
#include <pwd.h>
#include <iostream>
#include "pathut.h"
using std::string;
std::string path_getfather(const std::string &s) {
std::string father = s;
string path_getfather(const string &s) {
string father = s;
// ??
if (father.empty())
@ -22,8 +24,8 @@ std::string path_getfather(const std::string &s) {
father.erase(father.length()-1);
}
std::string::size_type slp = father.rfind('/');
if (slp == std::string::npos)
string::size_type slp = father.rfind('/');
if (slp == string::npos)
return "./";
father.erase(slp);
@ -31,33 +33,57 @@ std::string path_getfather(const std::string &s) {
return father;
}
std::string path_getsimple(const std::string &s) {
std::string simple = s;
string path_getsimple(const string &s) {
string simple = s;
if (simple.empty())
return simple;
std::string::size_type slp = simple.rfind('/');
if (slp == std::string::npos)
string::size_type slp = simple.rfind('/');
if (slp == string::npos)
return simple;
simple.erase(0, slp+1);
return simple;
}
std::string path_home()
string path_home()
{
uid_t uid = getuid();
struct passwd *entry = getpwuid(uid);
if (entry == 0)
if (entry == 0) {
const char *cp = getenv("HOME");
if (cp)
return cp;
else
return "/";
}
std::string homedir = entry->pw_dir;
string homedir = entry->pw_dir;
path_catslash(homedir);
return homedir;
}
extern string path_tildexpand(const string &s)
{
if (s.empty() || s[0] != '~')
return s;
string o = s;
if (s.length() == 1) {
o.replace(0, 1, path_home());
} else if (s[1] == '/') {
o.replace(0, 2, path_home());
} else {
string::size_type pos = s.find('/');
int l = (pos == string::npos) ? s.length() - 1 : pos - 1;
struct passwd *entry = getpwnam(s.substr(1, l).c_str());
if (entry)
o.replace(0, l+1, entry->pw_dir);
}
return o;
}
#else // TEST_PATHUT
#include <iostream>
@ -71,15 +97,29 @@ const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
"/dir/.c",
};
const string ttvec[] = {"/dir", "", "~", "~/sub", "~root", "~root/sub",
"~nosuch", "~nosuch/sub"};
int nttvec = sizeof(ttvec) / sizeof(string);
int main(int argc, const char **argv)
{
#if 0
for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
cout << tstvec[i] << " FATHER " << path_getfather(tstvec[i]) << endl;
}
for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
cout << tstvec[i] << " SIMPLE " << path_getsimple(tstvec[i]) << endl;
}
#endif
string s;
for (int i = 0; i < nttvec; i++) {
cout << "tildexp: '" << ttvec[i] << "' -> '" <<
path_tildexpand(ttvec[i]) << "'" << endl;
}
return 0;
}

View File

@ -1,6 +1,6 @@
#ifndef _PATHUT_H_INCLUDED_
#define _PATHUT_H_INCLUDED_
/* @(#$Id: pathut.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: pathut.h,v 1.3 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -16,5 +16,6 @@ inline void path_cat(std::string &s1, const std::string &s2) {
extern std::string path_getsimple(const std::string &s);
extern std::string path_getfather(const std::string &s);
extern std::string path_home();
extern std::string path_tildexpand(const std::string &s);
#endif /* _PATHUT_H_INCLUDED_ */

148
src/utils/smallut.cpp Normal file
View File

@ -0,0 +1,148 @@
#ifndef lint
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#ifndef TEST_SMALLUT
#include <string>
#include "smallut.h"
#include <ctype.h>
#define MIN(A,B) ((A)<(B)?(A):(B))
int stringicmp(const string & s1, const string& s2)
{
string::const_iterator it1 = s1.begin();
string::const_iterator it2 = s2.begin();
int size1 = s1.length(), size2 = s2.length();
char c1, c2;
if (size1 > size2) {
while (it1 != s1.end()) {
c1 = ::toupper(*it1);
c2 = ::toupper(*it2);
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++it1; ++it2;
}
return size1 == size2 ? 0 : 1;
} else {
while (it2 != s2.end()) {
c1 = ::toupper(*it1);
c2 = ::toupper(*it2);
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++it1; ++it2;
}
return size1 == size2 ? 0 : -1;
}
}
// s1 is already lowercase
int stringlowercmp(const string & s1, const string& s2)
{
string::const_iterator it1 = s1.begin();
string::const_iterator it2 = s2.begin();
int size1 = s1.length(), size2 = s2.length();
char c2;
if (size1 > size2) {
while (it1 != s1.end()) {
c2 = ::tolower(*it2);
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
}
return size1 == size2 ? 0 : 1;
} else {
while (it2 != s2.end()) {
c2 = ::tolower(*it2);
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
}
return size1 == size2 ? 0 : -1;
}
}
// s1 is already uppercase
int stringuppercmp(const string & s1, const string& s2)
{
string::const_iterator it1 = s1.begin();
string::const_iterator it2 = s2.begin();
int size1 = s1.length(), size2 = s2.length();
char c2;
if (size1 > size2) {
while (it1 != s1.end()) {
c2 = ::toupper(*it2);
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
}
return size1 == size2 ? 0 : 1;
} else {
while (it2 != s2.end()) {
c2 = ::toupper(*it2);
if (*it1 != c2) {
return *it1 > c2 ? 1 : -1;
}
++it1; ++it2;
}
return size1 == size2 ? 0 : -1;
}
}
#else
#include <string>
#include "smallut.h"
struct spair {
const char *s1;
const char *s2;
};
struct spair pairs[] = {
{"", ""},
{"", "a"},
{"a", ""},
{"a", "a"},
{"A", "a"},
{"a", "A"},
{"A", "A"},
{"12", "12"},
{"a", "ab"},
{"ab", "a"},
{"A", "Ab"},
{"a", "Ab"},
};
int npairs = sizeof(pairs) / sizeof(struct spair);
int main(int argc, char **argv)
{
for (int i = 0; i < npairs; i++) {
{
int c = stringicmp(pairs[i].s1, pairs[i].s2);
printf("'%s' %s '%s' ", pairs[i].s1,
c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2);
}
{
int cl = stringlowercmp(pairs[i].s1, pairs[i].s2);
printf("L '%s' %s '%s' ", pairs[i].s1,
cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2);
}
{
int cu = stringuppercmp(pairs[i].s1, pairs[i].s2);
printf("U '%s' %s '%s' ", pairs[i].s1,
cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2);
}
printf("\n");
}
}
#endif

13
src/utils/smallut.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_
/* @(#$Id: smallut.h,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
using std::string;
extern int stringicmp(const string& s1, const string& s2);
extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2);
#endif /* _SMALLUT_H_INCLUDED_ */