*** empty log message ***
This commit is contained in:
parent
0786c283ef
commit
5ca462cdff
64
src/common/rclconfig.cpp
Normal file
64
src/common/rclconfig.cpp
Normal file
@ -0,0 +1,64 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "rclconfig.h"
|
||||
#include "pathut.h"
|
||||
#include "conftree.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ConfTree *getConfig()
|
||||
{
|
||||
}
|
||||
|
||||
RclConfig::RclConfig()
|
||||
: m_ok(false), conf(0), mimemap(0), mimeconf(0)
|
||||
{
|
||||
const char *cp = getenv("RECOLL_CONFDIR");
|
||||
if (cp) {
|
||||
confdir = cp;
|
||||
} else {
|
||||
confdir = path_home();
|
||||
confdir += ".recoll/";
|
||||
}
|
||||
string cfilename = confdir;
|
||||
path_cat(cfilename, "recoll.conf");
|
||||
|
||||
// Maybe we should try to open readonly here as, else, this will
|
||||
// casually create a configuration file
|
||||
conf = new ConfTree(cfilename.c_str(), 0);
|
||||
if (conf == 0) {
|
||||
cerr << "No configuration" << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
string mimemapfile;
|
||||
if (!conf->get("mimemapfile", mimemapfile, "")) {
|
||||
mimemapfile = "mimemap";
|
||||
}
|
||||
string mpath = confdir;
|
||||
path_cat(mpath, mimemapfile);
|
||||
mimemap = new ConfTree(mpath.c_str());
|
||||
if (mimemap == 0) {
|
||||
cerr << "No mime map file" << endl;
|
||||
return;
|
||||
}
|
||||
string mimeconffile;
|
||||
if (!conf->get("mimeconffile", mimeconffile, "")) {
|
||||
mimeconffile = "mimeconf";
|
||||
}
|
||||
mpath = confdir;
|
||||
|
||||
path_cat(mpath, mimeconffile);
|
||||
mimeconf = new ConfTree(mpath.c_str());
|
||||
if (mimeconf == 0) {
|
||||
cerr << "No mime conf file" << endl;
|
||||
return;
|
||||
}
|
||||
mimeconf->list();
|
||||
m_ok = true;
|
||||
return;
|
||||
}
|
||||
45
src/common/rclconfig.h
Normal file
45
src/common/rclconfig.h
Normal file
@ -0,0 +1,45 @@
|
||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||
#define _RCLCONFIG_H_INCLUDED_
|
||||
/* @(#$Id: rclconfig.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include "conftree.h"
|
||||
|
||||
class RclConfig {
|
||||
int m_ok;
|
||||
string confdir; // Directory where the files are stored
|
||||
ConfTree *conf; // Parsed main configuration
|
||||
string keydir; // Current directory used for parameter fetches.
|
||||
string defcharset; // These are stored locally to avoid a config lookup
|
||||
string deflang; // each time.
|
||||
// Note: this will have to change if/when we support per directory maps
|
||||
ConfTree *mimemap;
|
||||
ConfTree *mimeconf;
|
||||
public:
|
||||
RclConfig();
|
||||
~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
|
||||
bool ok() {return m_ok;}
|
||||
ConfTree *getConfig() {return m_ok ? conf : 0;}
|
||||
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
|
||||
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
|
||||
bool getConfParam(const string &name, string &value)
|
||||
{
|
||||
if (conf == 0)
|
||||
return false;
|
||||
return conf->get(name, value, keydir);
|
||||
}
|
||||
const string &getDefCharset() {
|
||||
return defcharset;
|
||||
}
|
||||
const string &getDefLang() {
|
||||
return deflang;
|
||||
}
|
||||
void setKeyDir(const string &dir)
|
||||
{
|
||||
keydir = dir;
|
||||
conf->get("defaultcharset", defcharset, keydir);
|
||||
conf->get("defaultlanguage", deflang, keydir);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif /* _RCLCONFIG_H_INCLUDED_ */
|
||||
@ -1,14 +1,33 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.2 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_TEXTSPLIT
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "textsplit.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/**
|
||||
* Splitting a text into words. The code in this file will work with any
|
||||
* charset where the basic separators (.,- etc.) have their ascii values
|
||||
* (ok for UTF-8, ascii, iso8859* and quite a few others).
|
||||
*
|
||||
* We work in a way which would make it quite difficult to handle non-ascii
|
||||
* separator chars (en-dash,etc.). We would then need to actually parse the
|
||||
* utf-8 stream, and use a different way to classify the characters (instead
|
||||
* of a 256 slot array).
|
||||
*
|
||||
* We are also not using capitalization information.
|
||||
*/
|
||||
|
||||
// Character classes: we have three main groups, and then some chars
|
||||
// are their own class because they want special handling.
|
||||
// We have an array with 256 slots where we keep the character states.
|
||||
// The array could be fully static, but we use a small function to fill it
|
||||
// once.
|
||||
enum CharClass {LETTER=256, SPACE=257, DIGIT=258};
|
||||
static int charclasses[256];
|
||||
static void setcharclasses()
|
||||
@ -38,7 +57,7 @@ static void setcharclasses()
|
||||
init = 1;
|
||||
}
|
||||
|
||||
static void emitterm(string &w, int *posp, bool doerase = true)
|
||||
void TextSplit::emitterm(string &w, int pos, bool doerase = true)
|
||||
{
|
||||
// Maybe trim end of word. These are chars that we would keep inside
|
||||
// a word or span, but not at the end
|
||||
@ -55,22 +74,27 @@ static void emitterm(string &w, int *posp, bool doerase = true)
|
||||
}
|
||||
breakloop:
|
||||
if (w.length()) {
|
||||
if (posp)
|
||||
*posp++;
|
||||
cout << w << endl;
|
||||
if (termsink)
|
||||
termsink(cdata, w, pos);
|
||||
}
|
||||
if (doerase)
|
||||
w.erase();
|
||||
}
|
||||
|
||||
void text_to_words(const string &in)
|
||||
/*
|
||||
* We basically emit a word every time we see a separator, but some chars are
|
||||
* handled specially so that special cases, ie, c++ and dockes@okyz.com etc,
|
||||
* are handled properly,
|
||||
*/
|
||||
void TextSplit::text_to_words(const string &in)
|
||||
{
|
||||
setcharclasses();
|
||||
string span;
|
||||
string word;
|
||||
bool number = false;
|
||||
int pos = 0;
|
||||
int wordpos = 0;
|
||||
int spanpos = 0;
|
||||
|
||||
for (int i = 0; i < in.length(); i++) {
|
||||
int c = in[i];
|
||||
int cc = charclasses[c];
|
||||
@ -78,11 +102,13 @@ void text_to_words(const string &in)
|
||||
case SPACE:
|
||||
SPACE:
|
||||
if (word.length()) {
|
||||
if (span.length() != word.length())
|
||||
emitterm(span, &spanpos);
|
||||
emitterm(word, &pos);
|
||||
if (span.length() != word.length()) {
|
||||
emitterm(span, spanpos);
|
||||
}
|
||||
emitterm(word, wordpos++);
|
||||
number = false;
|
||||
}
|
||||
spanpos = wordpos;
|
||||
span.erase();
|
||||
break;
|
||||
case '-':
|
||||
@ -94,9 +120,10 @@ void text_to_words(const string &in)
|
||||
span += c;
|
||||
}
|
||||
} else {
|
||||
if (span.length() != word.length())
|
||||
emitterm(span, &spanpos, false);
|
||||
emitterm(word, &pos);
|
||||
if (span.length() != word.length()) {
|
||||
emitterm(span, spanpos, false);
|
||||
}
|
||||
emitterm(word, wordpos++);
|
||||
number = false;
|
||||
span += c;
|
||||
}
|
||||
@ -104,9 +131,10 @@ void text_to_words(const string &in)
|
||||
case '\'':
|
||||
case '@':
|
||||
if (word.length()) {
|
||||
if (span.length() != word.length())
|
||||
emitterm(span, &spanpos, false);
|
||||
emitterm(word, &pos);
|
||||
if (span.length() != word.length()) {
|
||||
emitterm(span, spanpos, false);
|
||||
}
|
||||
emitterm(word, wordpos++);
|
||||
number = false;
|
||||
} else
|
||||
word += c;
|
||||
@ -117,7 +145,7 @@ void text_to_words(const string &in)
|
||||
word += c;
|
||||
} else {
|
||||
if (word.length()) {
|
||||
emitterm(word, &pos);
|
||||
emitterm(word, wordpos++);
|
||||
number = false;
|
||||
} else
|
||||
word += c;
|
||||
@ -139,8 +167,8 @@ void text_to_words(const string &in)
|
||||
// if '-' is the last char before end of line, just
|
||||
// ignore the line change. This is the right thing to
|
||||
// do almost always. We'd then need a way to check if
|
||||
// the - was added as part of the sleep or was really there,
|
||||
// but this would need a dictionary.
|
||||
// the - was added as part of the word hyphenation, or was
|
||||
// there in the first place, but this would need a dictionary.
|
||||
} else {
|
||||
// Handle like a normal separator
|
||||
goto SPACE;
|
||||
@ -162,42 +190,35 @@ void text_to_words(const string &in)
|
||||
}
|
||||
if (word.length()) {
|
||||
if (span.length() != word.length())
|
||||
emitterm(span, &spanpos);
|
||||
emitterm(word, &pos);
|
||||
emitterm(span, spanpos);
|
||||
emitterm(word, wordpos);
|
||||
}
|
||||
}
|
||||
|
||||
#if 1 || TEST_TEXTSPLIT
|
||||
#else // TEST driver ->
|
||||
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
int
|
||||
file_to_string(const string &fn, string &data)
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "textsplit.h"
|
||||
#include "readfile.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int termsink(void *, const string &term, int pos)
|
||||
{
|
||||
int fd = open(fn.c_str(), 0);
|
||||
if (fd < 0) {
|
||||
perror("open");
|
||||
return -1;
|
||||
}
|
||||
char buf[4096];
|
||||
for (;;) {
|
||||
int n = read(fd, buf, 4096);
|
||||
if (n < 0) {
|
||||
perror("read");
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
if (n == 0)
|
||||
break;
|
||||
data.append(buf, n);
|
||||
}
|
||||
close(fd);
|
||||
cout << pos << " " << term << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static string teststring =
|
||||
"jfd@okyz.com "
|
||||
"Ceci. Est;Oui 1.24 n@d @net .net t@v@c c# c++ -10 o'brien l'ami "
|
||||
"a 134 +134 -14 -1.5 +1.5 1.54e10 a"
|
||||
"@^#$(#$(*)"
|
||||
"one\n\rtwo\nthree-\nfour"
|
||||
"[olala][ululu]"
|
||||
@ -206,15 +227,16 @@ static string teststring =
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
TextSplit splitter(termsink, 0);
|
||||
if (argc == 2) {
|
||||
string data;
|
||||
if (file_to_string(argv[1], data) < 0)
|
||||
if (!file_to_string(argv[1], data))
|
||||
exit(1);
|
||||
text_to_words(data);
|
||||
splitter.text_to_words(data);
|
||||
} else {
|
||||
cout << teststring << endl; text_to_words(teststring);
|
||||
cout << teststring << endl;
|
||||
splitter.text_to_words(teststring);
|
||||
}
|
||||
|
||||
}
|
||||
#endif // TEST
|
||||
|
||||
|
||||
31
src/common/textsplit.h
Normal file
31
src/common/textsplit.h
Normal file
@ -0,0 +1,31 @@
|
||||
#ifndef _TEXTSPLIT_H_INCLUDED_
|
||||
#define _TEXTSPLIT_H_INCLUDED_
|
||||
/* @(#$Id: textsplit.h,v 1.1 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Split text into words.
|
||||
* See comments at top of .cpp for more explanations.
|
||||
* This used a callback function. It could be done with an iterator instead,
|
||||
* but 'ts much simpler this way...
|
||||
*/
|
||||
class TextSplit {
|
||||
public:
|
||||
typedef int (*TermSink)(void *cdata, const std::string & term, int pos);
|
||||
private:
|
||||
TermSink termsink;
|
||||
void *cdata;
|
||||
void emitterm(std::string &term, int pos, bool doerase);
|
||||
public:
|
||||
/**
|
||||
* Constructor: just store callback and client data
|
||||
*/
|
||||
TextSplit(TermSink t, void *c) : termsink(t), cdata(c) {}
|
||||
/**
|
||||
* Split text, emit words and positions.
|
||||
*/
|
||||
void text_to_words(const std::string &in);
|
||||
};
|
||||
|
||||
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
||||
23
src/index/indexer.h
Normal file
23
src/index/indexer.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef _INDEXER_H_INCLUDED_
|
||||
#define _INDEXER_H_INCLUDED_
|
||||
/* @(#$Id: indexer.h,v 1.1 2004-12-14 17:53:51 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include "rclconfig.h"
|
||||
|
||||
/* Definition for document interner functions */
|
||||
typedef Rcl::Doc* (*MimeHandlerFunc)(RclConfig *, const string &,
|
||||
const string &);
|
||||
|
||||
|
||||
#if 0
|
||||
class FsIndexer {
|
||||
const ConfTree &conf;
|
||||
public:
|
||||
enum runStatus {IndexerOk, IndexerError};
|
||||
Indexer(const ConfTree &cnf): conf(cnf) {}
|
||||
virtual ~Indexer() {}
|
||||
runStatus run() = 0;
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* _INDEXER_H_INCLUDED_ */
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
@ -11,6 +11,9 @@ using std::string;
|
||||
|
||||
string mimetype(const string &filename, ConfTree *mtypes)
|
||||
{
|
||||
if (mtypes == 0)
|
||||
return "";
|
||||
|
||||
// If filename has a suffix and we find it in the map, we're done
|
||||
string::size_type dot = filename.find_last_of(".");
|
||||
if (dot != string::npos) {
|
||||
@ -26,6 +29,8 @@ string mimetype(const string &filename, ConfTree *mtypes)
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef _TEST_MIMETYPE_
|
||||
#include <iostream>
|
||||
const char *tvec[] = {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _MIMETYPE_H_INCLUDED_
|
||||
#define _MIMETYPE_H_INCLUDED_
|
||||
/* @(#$Id: mimetype.h,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: mimetype.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
#include "conftree.h"
|
||||
@ -13,4 +13,5 @@
|
||||
*/
|
||||
string mimetype(const std::string &filename, ConfTree *mtypes);
|
||||
|
||||
|
||||
#endif /* _MIMETYPE_H_INCLUDED_ */
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <strings.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "pathut.h"
|
||||
@ -9,43 +11,156 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes
|
||||
#include "rclconfig.h"
|
||||
#include "fstreewalk.h"
|
||||
#include "mimetype.h"
|
||||
#include "rcldb.h"
|
||||
#include "readfile.h"
|
||||
#include "indexer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static map<string, MimeHandlerFunc> ihandlers;
|
||||
class IHandler_Init {
|
||||
public:
|
||||
IHandler_Init() {
|
||||
ihandlers["text/plain"] = textPlainToDoc;
|
||||
}
|
||||
};
|
||||
static IHandler_Init ihandleriniter;
|
||||
|
||||
/**
|
||||
* Return handler function for given mime type
|
||||
*/
|
||||
MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
||||
{
|
||||
// Return handler definition for mime type
|
||||
string hs;
|
||||
if (!mhandlers->get(mtype, hs, ""))
|
||||
return 0;
|
||||
|
||||
// Break definition into type and name
|
||||
vector<string> toks;
|
||||
ConfTree::stringToStrings(hs, toks);
|
||||
if (toks.size() < 1) {
|
||||
cerr << "Bad mimeconf line for " << mtype << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Retrieve handler function according to type
|
||||
if (!strcasecmp(toks[0].c_str(), "internal")) {
|
||||
cerr << "Internal Handler" << endl;
|
||||
map<string, MimeHandlerFunc>::const_iterator it =
|
||||
ihandlers.find(mtype);
|
||||
if (it == ihandlers.end()) {
|
||||
cerr << "Internal handler not found for " << mtype << endl;
|
||||
return 0;
|
||||
}
|
||||
cerr << "Got handler" << endl;
|
||||
return it->second;
|
||||
} else if (!strcasecmp(toks[0].c_str(), "dll")) {
|
||||
if (toks.size() != 2)
|
||||
return 0;
|
||||
return 0;
|
||||
} else if (!strcasecmp(toks[0].c_str(), "exec")) {
|
||||
if (toks.size() != 2)
|
||||
return 0;
|
||||
return 0;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
class DirIndexer {
|
||||
FsTreeWalker walker;
|
||||
RclConfig *config;
|
||||
string topdir;
|
||||
string dbdir;
|
||||
Rcl::Db db;
|
||||
public:
|
||||
DirIndexer(RclConfig *cnf, const string &top)
|
||||
: config(cnf), topdir(top)
|
||||
{
|
||||
}
|
||||
DirIndexer(RclConfig *cnf, const string &dbd, const string &top)
|
||||
: config(cnf), topdir(top), dbdir(dbd)
|
||||
{ }
|
||||
|
||||
friend FsTreeWalker::Status
|
||||
indexfile(void *, const std::string &, const struct stat *,
|
||||
FsTreeWalker::CbFlag);
|
||||
void index()
|
||||
{
|
||||
walker.walk(topdir, indexfile, this);
|
||||
}
|
||||
|
||||
void index();
|
||||
};
|
||||
|
||||
void DirIndexer::index()
|
||||
{
|
||||
#if 0
|
||||
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
|
||||
cerr << "Error opening database in " << dbdir << " for " <<
|
||||
topdir << endl;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
walker.walk(topdir, indexfile, this);
|
||||
#if 0
|
||||
if (!db.close()) {
|
||||
cerr << "Error closing database in " << dbdir << " for " <<
|
||||
topdir << endl;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
FsTreeWalker::Status
|
||||
indexfile(void *cdata, const std::string &fn,
|
||||
const struct stat *stp, FsTreeWalker::CbFlag flg)
|
||||
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
{
|
||||
DirIndexer *me = (DirIndexer *)cdata;
|
||||
if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) {
|
||||
// Possibly adjust defaults
|
||||
|
||||
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||
flg == FsTreeWalker::FtwDirReturn) {
|
||||
me->config->setKeyDir(fn);
|
||||
cout << "indexfile: [" << fn << "]" << endl;
|
||||
cout << " defcharset: " << me->config->getDefCharset()
|
||||
<< " deflang: " << me->config->getDefLang() << endl;
|
||||
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
string mtype = mimetype(fn, me->config->getMimeMap());
|
||||
if (mtype.length() > 0)
|
||||
cout << "indexfile: " << mtype << " " << fn << endl;
|
||||
else
|
||||
cout << "indexfile: " << "(nomime)" << " " << fn << endl;
|
||||
|
||||
string mime = mimetype(fn, me->config->getMimeMap());
|
||||
if (mime.length() == 0) {
|
||||
cout << "indexfile: " << "(no mime)" << " " << fn << endl;
|
||||
// No mime type ?? pass on.
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
cout << "indexfile: " << mime << " " << fn << endl;
|
||||
|
||||
// Look for appropriate handler
|
||||
MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
|
||||
if (!fun) {
|
||||
// No handler for this type, for now :(
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
// Check if file has already been indexed, and has changed since
|
||||
// - Make path term,
|
||||
// - query db: postlist_begin->docid
|
||||
// - fetch doc (get_document(docid)
|
||||
// - check date field, maybe skip
|
||||
|
||||
// Turn file into a document. The document has fields for title, body
|
||||
// etc., all text converted to utf8
|
||||
Rcl::Doc *doc = fun(me->config, fn, mime);
|
||||
|
||||
#if 0
|
||||
// Set up xapian document, add postings and misc fields,
|
||||
// add to or update database.
|
||||
dbadd(doc);
|
||||
#endif
|
||||
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
|
||||
@ -63,11 +178,18 @@ int main(int argc, const char **argv)
|
||||
cerr << "No top directories in configuration" << endl;
|
||||
exit(1);
|
||||
}
|
||||
list<string> tdl;
|
||||
vector<string> tdl;
|
||||
if (ConfTree::stringToStrings(topdirs, tdl)) {
|
||||
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
||||
cout << *it << endl;
|
||||
DirIndexer indexer(config, *it);
|
||||
for (int i = 0; i < tdl.size(); i++) {
|
||||
string topdir = tdl[i];
|
||||
cout << topdir << endl;
|
||||
string dbdir;
|
||||
if (conf->get("dbdir", dbdir, topdir) == 0) {
|
||||
cerr << "No database directory in configuration for "
|
||||
<< topdir << endl;
|
||||
exit(1);
|
||||
}
|
||||
DirIndexer indexer(config, dbdir, topdir);
|
||||
indexer.index();
|
||||
}
|
||||
}
|
||||
|
||||
121
src/rcldb/rcldb.cpp
Normal file
121
src/rcldb/rcldb.cpp
Normal file
@ -0,0 +1,121 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include "rcldb.h"
|
||||
|
||||
#include "xapian.h"
|
||||
|
||||
// Data for a xapian database
|
||||
class Native {
|
||||
public:
|
||||
bool isopen;
|
||||
bool iswritable;
|
||||
class Xapian::Database db;
|
||||
class Xapian::WritableDatabase wdb;
|
||||
vector<bool> updated;
|
||||
|
||||
Native() : isopen(false), iswritable(false) {}
|
||||
|
||||
};
|
||||
|
||||
Rcl::Db::Db()
|
||||
{
|
||||
pdata = new Native;
|
||||
}
|
||||
|
||||
Rcl::Db::~Db()
|
||||
{
|
||||
if (pdata == 0)
|
||||
return;
|
||||
Native *ndb = (Native *)pdata;
|
||||
try {
|
||||
// There is nothing to do for an ro db.
|
||||
if (ndb->isopen == false || ndb->iswritable == false) {
|
||||
delete ndb;
|
||||
return;
|
||||
}
|
||||
ndb->wdb.flush();
|
||||
delete ndb;
|
||||
} catch (const Xapian::Error &e) {
|
||||
cout << "Exception: " << e.get_msg() << endl;
|
||||
} catch (const string &s) {
|
||||
cout << "Exception: " << s << endl;
|
||||
} catch (const char *s) {
|
||||
cout << "Exception: " << s << endl;
|
||||
} catch (...) {
|
||||
cout << "Caught unknown exception" << endl;
|
||||
}
|
||||
}
|
||||
|
||||
bool Rcl::Db::open(const string& dir, OpenMode mode)
|
||||
{
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
try {
|
||||
switch (mode) {
|
||||
case DbUpd:
|
||||
ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OPEN);
|
||||
ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
|
||||
ndb->iswritable = true;
|
||||
break;
|
||||
case DbTrunc:
|
||||
ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||
ndb->iswritable = true;
|
||||
break;
|
||||
case DbRO:
|
||||
default:
|
||||
ndb->iswritable = false;
|
||||
cerr << "Not ready to open RO yet" << endl;
|
||||
exit(1);
|
||||
}
|
||||
ndb->isopen = true;
|
||||
return true;
|
||||
} catch (const Xapian::Error &e) {
|
||||
cout << "Exception: " << e.get_msg() << endl;
|
||||
} catch (const string &s) {
|
||||
cout << "Exception: " << s << endl;
|
||||
} catch (const char *s) {
|
||||
cout << "Exception: " << s << endl;
|
||||
} catch (...) {
|
||||
cout << "Caught unknown exception" << endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool Rcl::Db::close()
|
||||
{
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
if (ndb->isopen == false)
|
||||
return true;
|
||||
try {
|
||||
if (ndb->isopen == true && ndb->iswritable == true) {
|
||||
ndb->wdb.flush();
|
||||
}
|
||||
delete ndb;
|
||||
} catch (const Xapian::Error &e) {
|
||||
cout << "Exception: " << e.get_msg() << endl;
|
||||
return false;
|
||||
} catch (const string &s) {
|
||||
cout << "Exception: " << s << endl;
|
||||
return false;
|
||||
} catch (const char *s) {
|
||||
cout << "Exception: " << s << endl;
|
||||
return false;
|
||||
} catch (...) {
|
||||
cout << "Caught unknown exception" << endl;
|
||||
return false;
|
||||
}
|
||||
pdata = new Native;
|
||||
if (pdata)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
32
src/rcldb/rcldb.h
Normal file
32
src/rcldb/rcldb.h
Normal file
@ -0,0 +1,32 @@
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
/**
|
||||
* Wrapper class for the native database.
|
||||
*/
|
||||
class Db {
|
||||
void *pdata;
|
||||
public:
|
||||
Db();
|
||||
~Db();
|
||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||
bool open(const std::string &dbdir, OpenMode mode);
|
||||
bool close();
|
||||
};
|
||||
|
||||
class Doc {
|
||||
public:
|
||||
string title;
|
||||
string abstract;
|
||||
string keywords;
|
||||
string text;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif /* _DB_H_INCLUDED_ */
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
@ -14,6 +14,7 @@ static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $
|
||||
#include <iostream>
|
||||
|
||||
#include "execmd.h"
|
||||
#include "pathut.h"
|
||||
|
||||
using namespace std;
|
||||
#define MAX(A,B) (A>B?A:B)
|
||||
@ -152,7 +153,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
|
||||
}
|
||||
|
||||
// Fill up argv
|
||||
argv[0] = cmd.c_str();
|
||||
argv[0] = path_getsimple(cmd).c_str();
|
||||
i = 1;
|
||||
for (it = args.begin(); it != args.end(); it++) {
|
||||
argv[i++] = it->c_str();
|
||||
@ -160,7 +161,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
|
||||
argv[i] = 0;
|
||||
|
||||
#if 0
|
||||
{int i = 0;cerr << "cmd: " << cmd << endl << "ARGS:" << endl;
|
||||
{int i = 0;cerr << "cmd: " << cmd << endl << "ARGS: " << endl;
|
||||
while (argv[i]) cerr << argv[i++] << endl;}
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.1 2004-12-10 18:13:14 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#ifndef TEST_PATHUT
|
||||
@ -31,6 +31,20 @@ std::string path_getfather(const std::string &s) {
|
||||
return father;
|
||||
}
|
||||
|
||||
std::string path_getsimple(const std::string &s) {
|
||||
std::string simple = s;
|
||||
|
||||
if (simple.empty())
|
||||
return simple;
|
||||
|
||||
std::string::size_type slp = simple.rfind('/');
|
||||
if (slp == std::string::npos)
|
||||
return simple;
|
||||
|
||||
simple.erase(0, slp+1);
|
||||
return simple;
|
||||
}
|
||||
|
||||
std::string path_home()
|
||||
{
|
||||
uid_t uid = getuid();
|
||||
@ -53,13 +67,18 @@ using namespace std;
|
||||
|
||||
const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
|
||||
"/dir1/dir2",
|
||||
"./dir", "./dir1/", "dir", "../dir"};
|
||||
"./dir", "./dir1/", "dir", "../dir", "/dir/toto.c",
|
||||
"/dir/.c",
|
||||
};
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
|
||||
for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
|
||||
cout << tstvec[i] << " -> " << path_getfather(tstvec[i]) << endl;
|
||||
cout << tstvec[i] << " FATHER " << path_getfather(tstvec[i]) << endl;
|
||||
}
|
||||
for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
|
||||
cout << tstvec[i] << " SIMPLE " << path_getsimple(tstvec[i]) << endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _PATHUT_H_INCLUDED_
|
||||
#define _PATHUT_H_INCLUDED_
|
||||
/* @(#$Id: pathut.h,v 1.1 2004-12-10 18:13:14 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: pathut.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
@ -13,6 +13,7 @@ inline void path_cat(std::string &s1, const std::string &s2) {
|
||||
s1 += s2;
|
||||
}
|
||||
|
||||
extern std::string path_getsimple(const std::string &s);
|
||||
extern std::string path_getfather(const std::string &s);
|
||||
extern std::string path_home();
|
||||
|
||||
|
||||
49
src/utils/readfile.cpp
Normal file
49
src/utils/readfile.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: readfile.cpp,v 1.1 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#ifndef O_STREAMING
|
||||
#define O_STREAMING 0
|
||||
#endif
|
||||
#include <errno.h>
|
||||
|
||||
#include <string>
|
||||
using std::string;
|
||||
|
||||
#include "readfile.h"
|
||||
|
||||
bool file_to_string(const string &fn, string &data)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
int fd = open(fn.c_str(), O_RDONLY|O_STREAMING);
|
||||
if (fd < 0) {
|
||||
// perror("open");
|
||||
return false;
|
||||
}
|
||||
char buf[4096];
|
||||
for (;;) {
|
||||
int n = read(fd, buf, 4096);
|
||||
if (n < 0) {
|
||||
// perror("read");
|
||||
goto out;
|
||||
}
|
||||
if (n == 0)
|
||||
break;
|
||||
|
||||
try {
|
||||
data.append(buf, n);
|
||||
} catch (...) {
|
||||
// fprintf(stderr, "file_to_string: out of memory\n");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
13
src/utils/readfile.h
Normal file
13
src/utils/readfile.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef _READFILE_H_INCLUDED_
|
||||
#define _READFILE_H_INCLUDED_
|
||||
/* @(#$Id: readfile.h,v 1.1 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Read whole file into string.
|
||||
* @return true for ok, false else
|
||||
*/
|
||||
bool file_to_string(const std::string &filename, std::string &data);
|
||||
|
||||
#endif /* _READFILE_H_INCLUDED_ */
|
||||
Loading…
x
Reference in New Issue
Block a user