diff --git a/src/common/Makefile b/src/common/Makefile index 240e0b2e..3fd41e9b 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -1,24 +1,22 @@ - +BIGLIB = ../lib/librcl.a CXXFLAGS = -Wall -g -I. -I../unac -I../utils -I/usr/local/include -CFLAGS = -g -I. -I../unac -I/usr/local/include -DUNAC_VERSION=\"1.0.7\" -PROGS = unacpp +PROGS = unacpp textsplit all: $(PROGS) -UNACPP_OBJS= trunacpp.o unacpp.o unac.o readfile.o +UNACPP_OBJS= trunacpp.o $(BIGLIB) unacpp : $(UNACPP_OBJS) $(CXX) $(CXXFLAGS) -o unacpp $(UNACPP_OBJS) \ -L/usr/local/lib -liconv - -unac.o : ../unac/unac.c ../unac/unac.h - $(CC) $(CFLAGS) -c -o unac.o ../unac/unac.c - -readfile.o : ../utils/readfile.cpp ../utils/readfile.h - $(CXX) $(CXXFLAGS) -c -o readfile.o ../utils/readfile.cpp - trunacpp.o : unacpp.cpp unacpp.h $(CXX) $(CXXFLAGS) -DTEST_UNACPP -c -o trunacpp.o unacpp.cpp +TEXTSPLIT_OBJS= trtextsplit.o $(BIGLIB) +textsplit : $(TEXTSPLIT_OBJS) + $(CXX) $(CXXFLAGS) -o textsplit $(TEXTSPLIT_OBJS) +trtextsplit.o : textsplit.cpp + $(CXX) $(CXXFLAGS) -DTEST_TEXTSPLIT -c -o trtextsplit.o \ + textsplit.cpp clean: rm -f *.o $(PROGS) diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index 463dbe6a..1468f633 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.3 2005-01-24 13:17:58 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.4 2005-01-25 14:37:20 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -7,6 +7,18 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.3 2005-01-24 13:17:58 dockes Ex #include "rclconfig.h" #include "pathut.h" #include "conftree.h" +#include "debuglog.h" + +static DebugLog debuglog; +DebugLog *dbl = &debuglog; +class loginitializer { + public: + loginitializer() { + dbl->setlogfilename("stderr"); + dbl->setloglevel(10); + } +}; +static loginitializer lgntlzr; using namespace std; @@ -54,6 +66,7 @@ RclConfig::RclConfig() cerr << "No mime conf file" << endl; return; } + setKeyDir(string("")); // mimeconf->list(); m_ok = true; return; diff --git a/src/index/Makefile b/src/index/Makefile index 1970ecc9..9110a867 100644 --- a/src/index/Makefile +++ b/src/index/Makefile @@ -1,67 +1,23 @@ CXXFLAGS = -Wall -g -I. -I../utils -I../common -I/usr/local/include +BIGLIB = ../lib/librcl.a -PROGS = transcode textsplit idxdriver csguess +PROGS = idxdriver csguess all: $(PROGS) -IDXDRIVER_OBJS= idxdriver.o pathut.o conftree.o rclconfig.o fstreewalk.o \ - mimetype.o rcldb.o readfile.o transcode.o csguess.o \ - textsplit.o unac.o unacpp.o - +IDXDRIVER_OBJS= idxdriver.o $(BIGLIB) idxdriver : $(IDXDRIVER_OBJS) $(CXX) $(CXXFLAGS) -o idxdriver $(IDXDRIVER_OBJS) \ -L/usr/local/lib -lxapian -liconv -TEXTSPLIT_OBJS= trtextsplit.o textsplit.o readfile.o -textsplit : $(TEXTSPLIT_OBJS) - $(CXX) $(CXXFLAGS) -o textsplit $(TEXTSPLIT_OBJS) -trtextsplit.o : ../common/textsplit.cpp ../common/textsplit.h - $(CXX) $(CXXFLAGS) -DTEST_TEXTSPLIT -c -o trtextsplit.o \ - ../common/textsplit.cpp - -pathut.o : ../utils/pathut.cpp ../utils/pathut.h - $(CXX) $(CXXFLAGS) -c -o pathut.o ../utils/pathut.cpp -conftree.o : ../utils/conftree.cpp ../utils/conftree.h - $(CXX) $(CXXFLAGS) -c -o conftree.o ../utils/conftree.cpp -rclconfig.o : ../common/rclconfig.cpp ../common/rclconfig.h - $(CXX) $(CXXFLAGS) -c -o rclconfig.o ../common/rclconfig.cpp -fstreewalk.o : ../utils/fstreewalk.cpp ../utils/fstreewalk.h - $(CXX) $(CXXFLAGS) -c -o fstreewalk.o ../utils/fstreewalk.cpp -readfile.o : ../utils/readfile.cpp ../utils/readfile.h - $(CXX) $(CXXFLAGS) -c -o readfile.o ../utils/readfile.cpp - - -unacpp.o : ../common/unacpp.cpp ../common/unacpp.h - $(CXX) $(CXXFLAGS) -I../unac -c -o unacpp.o ../common/unacpp.cpp - - -rcldb.o : ../common/rcldb.cpp ../common/rcldb.h - $(CXX) $(CXXFLAGS) -c -o rcldb.o ../common/rcldb.cpp -textsplit.o : ../common/textsplit.cpp ../common/textsplit.h - $(CXX) $(CXXFLAGS) -c -o textsplit.o ../common/textsplit.cpp - -CSGUESS_OBJS= trcsguess.o csguess.o readfile.o +CSGUESS_OBJS= trcsguess.o $(BIGLIB) csguess : $(CSGUESS_OBJS) $(CXX) $(CXXFLAGS) -o csguess $(CSGUESS_OBJS) \ -L/usr/local/lib -liconv -trcsguess.o : csguess.cpp csguess.h +trcsguess.o : csguess.cpp $(CXX) $(CXXFLAGS) -DTEST_CSGUESS -c -o trcsguess.o \ csguess.cpp -TRANSCODE_OBJS= trtranscode.o transcode.o readfile.o -transcode : $(TRANSCODE_OBJS) - $(CXX) $(CXXFLAGS) -o transcode $(TRANSCODE_OBJS) \ - -L/usr/local/lib -liconv -trtranscode.o : transcode.cpp transcode.h - $(CXX) $(CXXFLAGS) -DTEST_TRANSCODE -c -o trtranscode.o \ - transcode.cpp - - -CFLAGS = -g -I. -I../unac -I/usr/local/include -DUNAC_VERSION=\"1.0.7\" -unac.o : ../unac/unac.c ../unac/unac.h - $(CC) $(CFLAGS) -c -o unac.o ../unac/unac.c - - clean: rm -f *.o $(PROGS) diff --git a/src/index/indexer.h b/src/index/indexer.h index 33be1c67..ff5f9237 100644 --- a/src/index/indexer.h +++ b/src/index/indexer.h @@ -1,14 +1,9 @@ #ifndef _INDEXER_H_INCLUDED_ #define _INDEXER_H_INCLUDED_ -/* @(#$Id: indexer.h,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: indexer.h,v 1.3 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes */ #include "rclconfig.h" -/* Definition for document interner functions */ -typedef bool (*MimeHandlerFunc)(RclConfig *, const string &, - const string &, Rcl::Doc&); - - #if 0 class FsIndexer { const ConfTree &conf; diff --git a/src/index/recollindex.cpp b/src/index/recollindex.cpp index 53d843dd..fefcf0dd 100644 --- a/src/index/recollindex.cpp +++ b/src/index/recollindex.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.4 2004-12-17 13:01:01 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.5 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -18,101 +18,11 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.4 2004-12-17 13:01:01 dockes #include "indexer.h" #include "csguess.h" #include "transcode.h" +#include "mimehandler.h" using namespace std; -bool textPlainToDoc(RclConfig *conf, const string &fn, - const string &mtype, Rcl::Doc &docout) -{ - string otext; - if (!file_to_string(fn, otext)) - return false; - - // Try to guess charset, then convert to utf-8, and fill document - // fields The charset guesser really doesnt work well in general - // and should be avoided (especially for short documents) - string charset; - if (conf->guesscharset) { - charset = csguess(otext, conf->defcharset); - } else - charset = conf->defcharset; - string utf8; - cerr << "textPlainToDoc: transcod from " << charset << " to UTF-8" - << endl; - - if (!transcode(otext, utf8, charset, "UTF-8")) { - cerr << "textPlainToDoc: transcode failed: charset '" << charset - << "' to UTF-8: "<< utf8 << endl; - otext.erase(); - return 0; - } - - Rcl::Doc out; - out.origcharset = charset; - out.text = utf8; - //out.text = otext; - docout = out; - cerr << utf8 << endl; - return true; -} - -// Map of mime types to internal interner functions. This could just as well -// be an if else if suite inside getMimeHandler(), but this is prettier ? -static map ihandlers; -// Static object to get the map to be initialized at program start. -class IHandler_Init { - public: - IHandler_Init() { - ihandlers["text/plain"] = textPlainToDoc; - // Add new associations here when needed - } -}; -static IHandler_Init ihandleriniter; - - -/** - * Return handler function for given mime type - */ -MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers) -{ - // Return handler definition for mime type - string hs; - if (!mhandlers->get(mtype, hs, "")) - return 0; - - // Break definition into type and name - vector toks; - ConfTree::stringToStrings(hs, toks); - if (toks.size() < 1) { - cerr << "Bad mimeconf line for " << mtype << endl; - return 0; - } - - // Retrieve handler function according to type - if (!strcasecmp(toks[0].c_str(), "internal")) { - cerr << "Internal Handler" << endl; - map::const_iterator it = - ihandlers.find(mtype); - if (it == ihandlers.end()) { - cerr << "Internal handler not found for " << mtype << endl; - return 0; - } - cerr << "Got handler" << endl; - return it->second; - } else if (!strcasecmp(toks[0].c_str(), "dll")) { - if (toks.size() != 2) - return 0; - return 0; - } else if (!strcasecmp(toks[0].c_str(), "exec")) { - if (toks.size() != 2) - return 0; - return 0; - } else { - return 0; - } -} - /** * Bunch holder for data used while indexing a directory tree */ @@ -151,7 +61,11 @@ void DirIndexer::index() /** * This function gets called for every file and directory found by the - * tree walker. Adjust parameters and index files if/when needed. + * tree walker. It checks with the db is the file has changed and needs to + * be reindexed. If so, it calls an appropriate handler depending on the mime + * type, which is responsible for populating an Rcl::Doc. + * Accent and majuscule handling are performed by the db module when doing + * the actual indexing work. */ FsTreeWalker::Status indexfile(void *cdata, const std::string &fn, const struct stat *stp, @@ -209,7 +123,6 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp, } - int main(int argc, const char **argv) { RclConfig *config = new RclConfig; diff --git a/src/lib/Makefile b/src/lib/Makefile new file mode 100644 index 00000000..c98709ba --- /dev/null +++ b/src/lib/Makefile @@ -0,0 +1,65 @@ +CXXFLAGS = -Wall -g -I. -I../index -I../utils -I../common \ + -I../unac -I/usr/local/include + +UNACCFLAGS = -g -I. -I../unac -I/usr/local/include -DUNAC_VERSION=\"1.0.7\" + + +LIBS = librcl.a +all: $(LIBS) + +OBJS = conftree.o csguess.o debuglog.o \ + fstreewalk.o \ + mimehandler.o mimetype.o pathut.o \ + rclconfig.o rcldb.o readfile.o \ + textsplit.o transcode.o \ + unacpp.o unac.o +SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \ + ../utils/fstreewalk.cpp \ + ../common/mimehandler.cpp ../index/mimetype.cpp ../utils/pathut.cpp \ + ../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \ + ../common/textsplit.cpp ../utils/transcode.cpp \ + ../common/unacpp.cpp ../unac/unac.c + +librcl.a : $(OBJS) + ar ru librcl.a $(OBJS) + +unac.o : ../unac/unac.c ../unac/unac.h + $(CC) $(UNACCFLAGS) -c $< + +# $(CXX) $(CXXFLAGS) -c $< +conftree.o : ../utils/conftree.cpp + $(CXX) $(CXXFLAGS) -c $< +csguess.o : ../index/csguess.cpp + $(CXX) $(CXXFLAGS) -c $< +debuglog.o : ../utils/debuglog.cpp + $(CXX) $(CXXFLAGS) -c $< +fstreewalk.o : ../utils/fstreewalk.cpp + $(CXX) $(CXXFLAGS) -c $< +mimehandler.o : ../common/mimehandler.cpp + $(CXX) $(CXXFLAGS) -c $< +mimetype.o : ../index/mimetype.cpp + $(CXX) $(CXXFLAGS) -c $< +pathut.o : ../utils/pathut.cpp + $(CXX) $(CXXFLAGS) -c $< +rclconfig.o : ../common/rclconfig.cpp + $(CXX) $(CXXFLAGS) -c $< +rcldb.o : ../common/rcldb.cpp + $(CXX) $(CXXFLAGS) -c $< +readfile.o : ../utils/readfile.cpp + $(CXX) $(CXXFLAGS) -c $< +textsplit.o : ../common/textsplit.cpp + $(CXX) $(CXXFLAGS) -c $< +transcode.o : ../utils/transcode.cpp + $(CXX) $(CXXFLAGS) -c $< +unacpp.o : ../common/unacpp.cpp + $(CXX) $(CXXFLAGS) -c $< + + +clean: + rm -f *.o $(LIBS) + +alldeps:depend +depend: + $(CXX) $(CXXFLAGS) -M $(SRCS) > alldeps +include alldeps + diff --git a/src/qtgui/main.cpp b/src/qtgui/main.cpp index d7988f8d..1c41a359 100644 --- a/src/qtgui/main.cpp +++ b/src/qtgui/main.cpp @@ -1,11 +1,68 @@ +#include #include -#include "form1.h" +#include +#include "recollmain.h" +#include "rcldb.h" +#include "rclconfig.h" + +RclConfig *rclconfig; +Rcl::Db *rcldb; + +static void cleanup() +{ + delete rcldb; + rcldb = 0; + delete rclconfig; + rclconfig = 0; +} +static void sigcleanup(int sig) +{ + fprintf(stderr, "sigcleanup\n"); + cleanup(); + exit(1); +} int main( int argc, char ** argv ) { QApplication a( argc, argv ); - Form1 w; + RecollMain w; w.show(); a.connect( &a, SIGNAL( lastWindowClosed() ), &a, SLOT( quit() ) ); + + rclconfig = new RclConfig; + if (!rclconfig || !rclconfig->ok()) { + QMessageBox::critical(0, "Recoll", + QString("Could not find configuration")); + exit(1); + } + string dbdir; + if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) { + // Note: this will have to be replaced by a call to a + // configuration buildin dialog for initial configuration + QMessageBox::critical(0, "Recoll", + QString("No db directory in configuration")); + exit(1); + } + + rcldb = new Rcl::Db; + + if (!rcldb->open(dbdir, Rcl::Db::DbRO)) { + QMessageBox::critical(0, "Recoll", + QString("Could not open database in ") + + QString(dbdir)); + exit(1); + } + atexit(cleanup); + if (signal(SIGHUP, SIG_IGN) != SIG_IGN) + signal(SIGHUP, sigcleanup); + if (signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, sigcleanup); + if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) + signal(SIGQUIT, sigcleanup); + if (signal(SIGTERM, SIG_IGN) != SIG_IGN) + signal(SIGTERM, sigcleanup); + + + return a.exec(); } diff --git a/src/qtgui/recoll.pro b/src/qtgui/recoll.pro new file mode 100644 index 00000000..84317e09 --- /dev/null +++ b/src/qtgui/recoll.pro @@ -0,0 +1,30 @@ +TEMPLATE = app +LANGUAGE = C++ + +CONFIG += qt warn_on release + +SOURCES += main.cpp + +FORMS = recollmain.ui + +IMAGES = images/filenew \ + images/fileopen \ + images/filesave \ + images/print \ + images/undo \ + images/redo \ + images/editcut \ + images/editcopy \ + images/editpaste \ + images/searchfind + +unix { + UI_DIR = .ui + MOC_DIR = .moc + OBJECTS_DIR = .obj + LIBS += ../lib/librcl.a -L/usr/local/lib -lxapian -liconv + INCLUDEPATH += ../common ../index ../query ../unac ../utils +} + + + diff --git a/src/qtgui/recollmain.ui b/src/qtgui/recollmain.ui new file mode 100644 index 00000000..96e29b15 --- /dev/null +++ b/src/qtgui/recollmain.ui @@ -0,0 +1,245 @@ + +RecollMain + + + RecollMain + + + + 0 + 0 + 774 + 619 + + + + recoll + + + + layout7 + + + + 11 + 11 + 752 + 41 + + + + + unnamed + + + + queryText + + + LineEditPanel + + + Sunken + + + + + Search + + + pushButton1 + + + + + spacer1 + + + Horizontal + + + Expanding + + + + 40 + 20 + + + + + + + + splitter9 + + + + 11 + 58 + 752 + 491 + + + + Horizontal + + + + resTextEdit + + + + + splitter8 + + + Vertical + + + + textEdit12 + + + + + textEdit13 + + + + + + + + MenuBar + + + + + + + + + + + + + + + + + + + fileExitAction + + + Exit + + + E&xit + + + + + + + + helpContentsAction + + + Contents + + + &Contents... + + + + + + + + helpIndexAction + + + Index + + + &Index... + + + + + + + + helpAboutAction + + + About + + + &About + + + + + + + + + fileExitAction + activated() + RecollMain + fileExit() + + + helpIndexAction + activated() + RecollMain + helpIndex() + + + helpContentsAction + activated() + RecollMain + helpContents() + + + helpAboutAction + activated() + RecollMain + helpAbout() + + + resTextEdit + clicked(int,int) + RecollMain + resTextEdit_clicked(int,int) + + + resTextEdit + returnPressed() + RecollMain + resTextEdit_returnPressed() + + + fileExitAction + activated() + RecollMain + fileExit() + + + + recollmain.ui.h + + + fileExit() + helpIndex() + helpContents() + helpAbout() + resTextEdit_clicked( int par, int car ) + resTextEdit_returnPressed() + + + + diff --git a/src/qtgui/recollmain.ui.h b/src/qtgui/recollmain.ui.h new file mode 100644 index 00000000..90b56b36 --- /dev/null +++ b/src/qtgui/recollmain.ui.h @@ -0,0 +1,61 @@ +/**************************************************************************** +** ui.h extension file, included from the uic-generated form implementation. +** +** If you want to add, delete, or rename functions or slots, use +** Qt Designer to update this file, preserving your code. +** +** You should not define a constructor or destructor in this file. +** Instead, write your code in functions called init() and destroy(). +** These will automatically be called by the form's constructor and +** destructor. +*****************************************************************************/ + +void RecollMain::fileExit() +{ + exit(0); +} + + +void RecollMain::helpIndex() +{ + +} + + +void RecollMain::helpContents() +{ + +} + + +void RecollMain::helpAbout() +{ + +} + + +void RecollMain::resTextEdit_clicked( int par, int car ) +{ + fprintf(stderr, "Clicked at paragraph %d, char %d\n", par, car); +} + +#include "qfontdialog.h" + +#define BS 200000 +void RecollMain::resTextEdit_returnPressed() +{ + fprintf(stderr, "ReturnPressed()\n"); + resTextEdit->setFont( QFontDialog::getFont( 0, resTextEdit->font() ) ); + const char *fname = "utf8.txt"; + FILE *fp = fopen(fname, "r"); + if (fp) { + char buf[BS]; + memset(buf,0, sizeof(buf)); + int n = fread(buf, 1, BS-1, fp); + fclose(fp); + QString str = QString::fromUtf8(buf, n); + resTextEdit->setTextFormat(RichText); + resTextEdit->setText(str); + } + +} diff --git a/src/query/Makefile b/src/query/Makefile index 368483e0..24f82a43 100644 --- a/src/query/Makefile +++ b/src/query/Makefile @@ -1,49 +1,29 @@ CXXFLAGS = -Wall -g -I. -I../index -I../utils -I../common -I/usr/local/include +BIGLIB=../lib/librcl.a PROGS = qtry qxtry xadump all: $(PROGS) -XADUMP_OBJS= xadump.o transcode.o +XADUMP_OBJS= xadump.o $(BIGLIB) xadump : $(XADUMP_OBJS) $(CXX) $(CXXFLAGS) -o xadump $(XADUMP_OBJS) \ -L/usr/local/lib -lxapian -liconv -QXTRY_OBJS= qxtry.o transcode.o +QXTRY_OBJS= qxtry.o $(BIGLIB) qxtry : $(QXTRY_OBJS) $(CXX) $(CXXFLAGS) -o qxtry $(QXTRY_OBJS) \ -L/usr/local/lib -lxapian -liconv -QTRY_OBJS= qtry.o conftree.o rclconfig.o \ - rcldb.o transcode.o \ - textsplit.o unac.o unacpp.o pathut.o - +QTRY_OBJS= qtry.o $(BIGLIB) qtry : $(QTRY_OBJS) $(CXX) $(CXXFLAGS) -o qtry $(QTRY_OBJS) \ -L/usr/local/lib -lxapian -liconv -transcode.o : ../index/transcode.cpp ../index/transcode.h - $(CXX) $(CXXFLAGS) -c -o transcode.o ../index/transcode.cpp - -conftree.o : ../utils/conftree.cpp ../utils/conftree.h - $(CXX) $(CXXFLAGS) -c -o conftree.o ../utils/conftree.cpp -rclconfig.o : ../common/rclconfig.cpp ../common/rclconfig.h - $(CXX) $(CXXFLAGS) -c -o rclconfig.o ../common/rclconfig.cpp -unacpp.o : ../common/unacpp.cpp ../common/unacpp.h - $(CXX) $(CXXFLAGS) -I../unac -c -o unacpp.o ../common/unacpp.cpp -pathut.o : ../utils/pathut.cpp ../utils/pathut.h - $(CXX) $(CXXFLAGS) -c -o pathut.o ../utils/pathut.cpp -rcldb.o : ../common/rcldb.cpp ../common/rcldb.h - $(CXX) $(CXXFLAGS) -c -o rcldb.o ../common/rcldb.cpp -textsplit.o : ../common/textsplit.cpp ../common/textsplit.h - $(CXX) $(CXXFLAGS) -c -o textsplit.o ../common/textsplit.cpp - - -CFLAGS = -g -I. -I../unac -I/usr/local/include -DUNAC_VERSION=\"1.0.7\" -unac.o : ../unac/unac.c ../unac/unac.h - $(CC) $(CFLAGS) -c -o unac.o ../unac/unac.c - +$(BIGLIB): + cd ../lib;make + clean: rm -f *.o $(PROGS) alldeps:depend diff --git a/src/query/qtry.cpp b/src/query/qtry.cpp index d757e351..971dc0d0 100644 --- a/src/query/qtry.cpp +++ b/src/query/qtry.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: qtry.cpp,v 1.1 2005-01-24 13:17:58 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: qtry.cpp,v 1.2 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes"; #endif // Tests with the query interface @@ -14,6 +14,7 @@ static char rcsid[] = "@(#$Id: qtry.cpp,v 1.1 2005-01-24 13:17:58 dockes Exp $ ( #include "rclconfig.h" #include "rcldb.h" #include "transcode.h" +#include "mimehandler.h" using namespace std; @@ -86,7 +87,11 @@ int main(int argc, char **argv) db->setQuery(query); int i = 0; Rcl::Doc doc; - while (db->getDoc(i++, doc)) { + for (i=0;;i++) { + doc.erase(); + if (!db->getDoc(i, doc)) + break; + cout << "Url: " << doc.url << endl; cout << "Mimetype: " << doc.mimetype << endl; cout << "Mtime: " << doc.mtime << endl; @@ -96,8 +101,29 @@ int main(int argc, char **argv) cout << "Keywords: " << doc.keywords << endl; cout << "Abstract: " << doc.abstract << endl; cout << endl; - - doc.erase(); + + // Go to the file system to retrieve / convert the document text + // for preview: + + // Look for appropriate handler + MimeHandlerFunc fun = getMimeHandler(doc.mimetype, + config->getMimeConf()); + if (!fun) { + cout << "No mime handler !" << endl; + continue; + } + string fn = doc.url.substr(6, string::npos); + cout << "Filename: " << fn << endl; + + Rcl::Doc fdoc; + if (!fun(config, fn, doc.mimetype, fdoc)) { + cout << "Failed to convert/preview document!" << endl; + continue; + } + string outencoding = "iso8859-1"; + string printable; + transcode(fdoc.text, printable, "UTF-8", outencoding); + cout << printable << endl; } delete db; cerr << "Exiting" << endl; diff --git a/src/query/xadump.cpp b/src/query/xadump.cpp index 89154c44..6b12a2f6 100644 --- a/src/query/xadump.cpp +++ b/src/query/xadump.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: xadump.cpp,v 1.2 2004-12-17 15:50:48 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: xadump.cpp,v 1.3 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -17,7 +17,13 @@ using namespace std; static string thisprog; static string usage = - " -d -e " + " -d -e \n" + " -i docid -D : get document data for docid\n" + " -t term -E : term existence test\n" + " -t term -F : retrieve term frequency data\n" + " -t term -P : retrieve postings for term\n" + " -i docid -T : term list for doc docid\n" + " -T : list all terms\n" " \n\n" ; @@ -130,7 +136,6 @@ int main(int argc, char **argv) - } catch (const Xapian::Error &e) { cout << "Exception: " << e.get_msg() << endl; } catch (const string &s) { diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index d3dcda07..a6d1a3f0 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.6 2005-01-24 13:17:58 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.7 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -15,11 +15,12 @@ using namespace std; #include "transcode.h" #include "unacpp.h" #include "conftree.h" +#include "debuglog.h" #include "xapian.h" -// Data for a xapian database. There could actually be 2 different ones for -// indexing or query as there is not much in common. +// Data for a xapian database. There could actually be 2 different +// ones for indexing or query as there is not much in common. class Native { public: bool isopen; @@ -42,30 +43,30 @@ Rcl::Db::Db() Rcl::Db::~Db() { - cerr << "Rcl::Db::~Db" << endl; + LOGDEB(("Rcl::Db::~Db\n")); if (pdata == 0) return; Native *ndb = (Native *)pdata; - cerr << "Db::~Db: isopen " << ndb->isopen << " iswritable " << - ndb->iswritable << endl; + LOGDEB(("Db::~Db: isopen %d iswritable %d\n", ndb->isopen, + ndb->iswritable)); + if (ndb->isopen == false) + return; try { - // There is nothing to do for an ro db. - if (ndb->isopen == false || ndb->iswritable == false) { - cerr << "Deleting native database" << endl; - delete ndb; - return; - } - ndb->wdb.flush(); + LOGDEB(("Rcl::Db::~Db: deleting native database\n")); + if (ndb->iswritable == true) + ndb->wdb.flush(); delete ndb; + return; } catch (const Xapian::Error &e) { - cout << "Exception: " << e.get_msg() << endl; + cerr << "Exception: " << e.get_msg() << endl; } catch (const string &s) { - cout << "Exception: " << s << endl; + cerr << "Exception: " << s << endl; } catch (const char *s) { - cout << "Exception: " << s << endl; + cerr << "Exception: " << s << endl; } catch (...) { - cout << "Caught unknown exception" << endl; + cerr << "Caught unknown exception" << endl; } + LOGERR(("Rcl::Db::~Db: got exception\n")); } bool Rcl::Db::open(const string& dir, OpenMode mode) @@ -73,8 +74,14 @@ bool Rcl::Db::open(const string& dir, OpenMode mode) if (pdata == 0) return false; Native *ndb = (Native *)pdata; - cerr << "Db::open: isopen " << ndb->isopen << " iswritable " << - ndb->iswritable << endl; + LOGDEB(("Db::open: isopen %d iswritable %d\n", ndb->isopen, + ndb->iswritable)); + + if (ndb->isopen) { + LOGERR(("Rcl::Db::open: already open\n")); + return false; + } + try { switch (mode) { case DbUpd: @@ -95,44 +102,46 @@ bool Rcl::Db::open(const string& dir, OpenMode mode) ndb->isopen = true; return true; } catch (const Xapian::Error &e) { - cout << "Exception: " << e.get_msg() << endl; + cerr << "Exception: " << e.get_msg() << endl; } catch (const string &s) { - cout << "Exception: " << s << endl; + cerr << "Exception: " << s << endl; } catch (const char *s) { - cout << "Exception: " << s << endl; + cerr << "Exception: " << s << endl; } catch (...) { - cout << "Caught unknown exception" << endl; + cerr << "Caught unknown exception" << endl; } + LOGERR(("Rcl::Db::open: got exception\n")); return false; } +// Note: xapian has no close call, we delete and recreate the db bool Rcl::Db::close() { if (pdata == 0) return false; Native *ndb = (Native *)pdata; - cerr << "Db::open: isopen " << ndb->isopen << " iswritable " << - ndb->iswritable << endl; + LOGDEB(("Db::close(): isopen %d iswritable %d\n", ndb->isopen, + ndb->iswritable)); if (ndb->isopen == false) return true; try { - if (ndb->isopen == true && ndb->iswritable == true) { + if (ndb->iswritable == true) ndb->wdb.flush(); - } delete ndb; } catch (const Xapian::Error &e) { - cout << "Exception: " << e.get_msg() << endl; + cerr << "Exception: " << e.get_msg() << endl; return false; } catch (const string &s) { - cout << "Exception: " << s << endl; + cerr << "Exception: " << s << endl; return false; } catch (const char *s) { - cout << "Exception: " << s << endl; + cerr << "Exception: " << s << endl; return false; } catch (...) { - cout << "Caught unknown exception" << endl; + cerr << "Caught unknown exception" << endl; return false; } + pdata = new Native; if (pdata) return true; @@ -165,7 +174,7 @@ static bool splitCb(void *cdata, const std::string &term, int pos) data->curpos = pos; data->doc.add_posting(term, data->basepos + data->curpos, 1); } catch (...) { - cerr << "Error occurred during add_posting" << endl; + LOGERR(("Error occurred during add_posting\n")); return false; } return true; @@ -242,7 +251,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) newdocument.add_term("T" + doc.mimetype); string pathterm = "P" + fn; newdocument.add_term(pathterm); - + const char *fnc = fn.c_str(); if (1 /*dupes == DUPE_replace*/) { // If this document has already been indexed, update the existing // entry. @@ -254,22 +263,22 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc) #if 0 if (did < updated.size()) { updated[did] = true; - //cout << "updated." << endl; + LOGDEB1(("%s updated\n", fnc)); } else { - //cout << "added." << endl; + LOGDEB1(("%s added\n", fnc)); } #endif } catch (...) { // FIXME: is this ever actually needed? ndb->wdb.add_document(newdocument); - //cout << "added (failed re-seek for duplicate)." << endl; + LOGDEB1(("%s added (failed re-seek for duplicate).\n", fnc)); } } else { try { ndb->wdb.add_document(newdocument); - // cout << "added." << endl; + LOGDEB1(("%s added\n", fnc)); } catch (...) { - cerr << "Got exception while adding doc" << endl; + LOGERR(("%s : Got exception while adding doc\n", fnc)); return false; } } @@ -293,7 +302,7 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp) return true; Xapian::Document doc = ndb->wdb.get_document(*did); string data = doc.get_data(); - //cout << "DOCUMENT EXISTS " << data << endl; + //cerr << "DOCUMENT EXISTS " << data << endl; const char *cp = strstr(data.c_str(), "mtime="); cp += 6; long mtime = atol(cp); @@ -315,17 +324,10 @@ class wsQData { vector terms; }; -// Callback for the document to word splitting class during indexation +// Callback for the query-to-words splitting static bool splitQCb(void *cdata, const std::string &term, int ) { wsQData *data = (wsQData*)cdata; - - cerr << "splitQCb: term '" << term << "'" << endl; - cerr << "splitQCb: term length: " << term.length() << endl; - //string printable; - //transcode(term, printable, "UTF-8", "ISO8859-1"); - //cerr << "Adding " << printable << endl; - data->terms.push_back(term); return true; } @@ -339,13 +341,10 @@ bool Rcl::Db::setQuery(const std::string &querystring) if (!dumb_string(querystring, noacc)) { return false; } - // noacc = querystring; splitter.text_to_words(noacc); Native *ndb = (Native *)pdata; - // splitData.terms.resize(0); - // splitData.terms.push_back(string("le")); ndb->query = Xapian::Query(Xapian::Query::OP_OR, splitData.terms.begin(), splitData.terms.end()); @@ -354,15 +353,15 @@ bool Rcl::Db::setQuery(const std::string &querystring) bool Rcl::Db::getDoc(int i, Doc &doc) { - // cerr << "Rcl::Db::getDoc: " << i << endl; + LOGDEB1(("Rcl::Db::getDoc: %d\n", i)); Native *ndb = (Native *)pdata; Xapian::Enquire enquire(ndb->db); enquire.set_query(ndb->query); Xapian::MSet matches = enquire.get_mset(i, 1); - // cerr << "Query `" << ndb->query.get_description() << "'" << - // "Estimated results: " << matches.get_matches_lower_bound() << endl; + LOGDEB1(("Rcl::Db::getDoc: Query '%s' Estimated results: %d\n", + ndb->query.get_description(), matches.get_matches_lower_bound())); if (matches.empty()) return false; diff --git a/src/rcldb/rcldb.h b/src/rcldb/rcldb.h index ba3e456e..4f012a09 100644 --- a/src/rcldb/rcldb.h +++ b/src/rcldb/rcldb.h @@ -1,6 +1,6 @@ #ifndef _DB_H_INCLUDED_ #define _DB_H_INCLUDED_ -/* @(#$Id: rcldb.h,v 1.4 2005-01-24 13:17:58 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: rcldb.h,v 1.5 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -22,18 +22,18 @@ struct stat; namespace Rcl { /** - * Holder for document attributes and data + * Dumb bunch holder for document attributes and data */ class Doc { public: - string url; - string mimetype; - string mtime; // Modification time as decimal ascii - string origcharset; - string title; - string text; - string keywords; - string abstract; + std::string url; + std::string mimetype; + std::string mtime; // Modification time as decimal ascii + std::string origcharset; + std::string title; + std::string text; + std::string keywords; + std::string abstract; void erase() { url.erase(); mimetype.erase(); @@ -60,8 +60,8 @@ class Db { bool close(); // Update-related functions - bool add(const string &filename, const Doc &doc); - bool needUpdate(const string &filename, const struct stat *stp); + bool add(const std::string &filename, const Doc &doc); + bool needUpdate(const std::string &filename, const struct stat *stp); // Query-related functions diff --git a/src/utils/Makefile b/src/utils/Makefile index ab9ca605..1c0ae9bf 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -1,7 +1,9 @@ CXXFLAGS = -I. -PROGS = trfstreewalk pathut execmd +BIGLIB = ../lib/librcl.a + +PROGS = trfstreewalk trpathut execmd transcode all: $(PROGS) FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o @@ -10,14 +12,23 @@ trfstreewalk : $(FSTREEWALK_OBJS) trfstreewalk.o : fstreewalk.cpp fstreewalk.h $(CXX) -o trfstreewalk.o -c $(CXXFLAGS) \ -DTEST_FSTREEWALK fstreewalk.cpp + PATHUT_OBJS= trpathut.o pathut.o trpathut : $(PATHUT_OBJS) $(CXX) -o trpathut $(PATHUT_OBJS) trpathut.o : pathut.cpp pathut.h $(CXX) -o trpathut.o -c $(CXXFLAGS) \ -DTEST_PATHUT pathut.cpp + execmd: pathut.o $(CXX) -o execmd $(CXXFLAGS) execmd.cpp pathut.o +TRANSCODE_OBJS= trtranscode.o $(BIGLIB) +transcode : $(TRANSCODE_OBJS) + $(CXX) $(CXXFLAGS) -o transcode $(TRANSCODE_OBJS) \ + -L/usr/local/lib -liconv +trtranscode.o : ../utils/transcode.cpp + $(CXX) $(CXXFLAGS) -DTEST_TRANSCODE -c -o trtranscode.o \ + transcode.cpp clean: rm -f *.o $(PROGS) diff --git a/src/utils/transcode.h b/src/utils/transcode.h index c059de25..e3f563e7 100644 --- a/src/utils/transcode.h +++ b/src/utils/transcode.h @@ -1,7 +1,9 @@ #ifndef _TRANSCODE_H_INCLUDED_ #define _TRANSCODE_H_INCLUDED_ -/* @(#$Id: transcode.h,v 1.1 2004-12-15 09:43:48 dockes Exp $ (C) 2004 J.F.Dockes */ - +/* @(#$Id: transcode.h,v 1.2 2005-01-25 14:37:21 dockes Exp $ (C) 2004 J.F.Dockes */ +/** + * A very minimal c++ized interface to iconv + */ #include extern bool transcode(const std::string &in, std::string &out,