*** empty log message ***
This commit is contained in:
parent
0786c283ef
commit
5ca462cdff
64
src/common/rclconfig.cpp
Normal file
64
src/common/rclconfig.cpp
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
#ifndef lint
|
||||||
|
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "rclconfig.h"
|
||||||
|
#include "pathut.h"
|
||||||
|
#include "conftree.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
ConfTree *getConfig()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
RclConfig::RclConfig()
|
||||||
|
: m_ok(false), conf(0), mimemap(0), mimeconf(0)
|
||||||
|
{
|
||||||
|
const char *cp = getenv("RECOLL_CONFDIR");
|
||||||
|
if (cp) {
|
||||||
|
confdir = cp;
|
||||||
|
} else {
|
||||||
|
confdir = path_home();
|
||||||
|
confdir += ".recoll/";
|
||||||
|
}
|
||||||
|
string cfilename = confdir;
|
||||||
|
path_cat(cfilename, "recoll.conf");
|
||||||
|
|
||||||
|
// Maybe we should try to open readonly here as, else, this will
|
||||||
|
// casually create a configuration file
|
||||||
|
conf = new ConfTree(cfilename.c_str(), 0);
|
||||||
|
if (conf == 0) {
|
||||||
|
cerr << "No configuration" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
string mimemapfile;
|
||||||
|
if (!conf->get("mimemapfile", mimemapfile, "")) {
|
||||||
|
mimemapfile = "mimemap";
|
||||||
|
}
|
||||||
|
string mpath = confdir;
|
||||||
|
path_cat(mpath, mimemapfile);
|
||||||
|
mimemap = new ConfTree(mpath.c_str());
|
||||||
|
if (mimemap == 0) {
|
||||||
|
cerr << "No mime map file" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
string mimeconffile;
|
||||||
|
if (!conf->get("mimeconffile", mimeconffile, "")) {
|
||||||
|
mimeconffile = "mimeconf";
|
||||||
|
}
|
||||||
|
mpath = confdir;
|
||||||
|
|
||||||
|
path_cat(mpath, mimeconffile);
|
||||||
|
mimeconf = new ConfTree(mpath.c_str());
|
||||||
|
if (mimeconf == 0) {
|
||||||
|
cerr << "No mime conf file" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mimeconf->list();
|
||||||
|
m_ok = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
45
src/common/rclconfig.h
Normal file
45
src/common/rclconfig.h
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||||
|
#define _RCLCONFIG_H_INCLUDED_
|
||||||
|
/* @(#$Id: rclconfig.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
|
#include "conftree.h"
|
||||||
|
|
||||||
|
class RclConfig {
|
||||||
|
int m_ok;
|
||||||
|
string confdir; // Directory where the files are stored
|
||||||
|
ConfTree *conf; // Parsed main configuration
|
||||||
|
string keydir; // Current directory used for parameter fetches.
|
||||||
|
string defcharset; // These are stored locally to avoid a config lookup
|
||||||
|
string deflang; // each time.
|
||||||
|
// Note: this will have to change if/when we support per directory maps
|
||||||
|
ConfTree *mimemap;
|
||||||
|
ConfTree *mimeconf;
|
||||||
|
public:
|
||||||
|
RclConfig();
|
||||||
|
~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
|
||||||
|
bool ok() {return m_ok;}
|
||||||
|
ConfTree *getConfig() {return m_ok ? conf : 0;}
|
||||||
|
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
|
||||||
|
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
|
||||||
|
bool getConfParam(const string &name, string &value)
|
||||||
|
{
|
||||||
|
if (conf == 0)
|
||||||
|
return false;
|
||||||
|
return conf->get(name, value, keydir);
|
||||||
|
}
|
||||||
|
const string &getDefCharset() {
|
||||||
|
return defcharset;
|
||||||
|
}
|
||||||
|
const string &getDefLang() {
|
||||||
|
return deflang;
|
||||||
|
}
|
||||||
|
void setKeyDir(const string &dir)
|
||||||
|
{
|
||||||
|
keydir = dir;
|
||||||
|
conf->get("defaultcharset", defcharset, keydir);
|
||||||
|
conf->get("defaultlanguage", deflang, keydir);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* _RCLCONFIG_H_INCLUDED_ */
|
||||||
@ -1,14 +1,33 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.2 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef TEST_TEXTSPLIT
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "textsplit.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splitting a text into words. The code in this file will work with any
|
||||||
|
* charset where the basic separators (.,- etc.) have their ascii values
|
||||||
|
* (ok for UTF-8, ascii, iso8859* and quite a few others).
|
||||||
|
*
|
||||||
|
* We work in a way which would make it quite difficult to handle non-ascii
|
||||||
|
* separator chars (en-dash,etc.). We would then need to actually parse the
|
||||||
|
* utf-8 stream, and use a different way to classify the characters (instead
|
||||||
|
* of a 256 slot array).
|
||||||
|
*
|
||||||
|
* We are also not using capitalization information.
|
||||||
|
*/
|
||||||
|
|
||||||
// Character classes: we have three main groups, and then some chars
|
// Character classes: we have three main groups, and then some chars
|
||||||
// are their own class because they want special handling.
|
// are their own class because they want special handling.
|
||||||
|
// We have an array with 256 slots where we keep the character states.
|
||||||
|
// The array could be fully static, but we use a small function to fill it
|
||||||
|
// once.
|
||||||
enum CharClass {LETTER=256, SPACE=257, DIGIT=258};
|
enum CharClass {LETTER=256, SPACE=257, DIGIT=258};
|
||||||
static int charclasses[256];
|
static int charclasses[256];
|
||||||
static void setcharclasses()
|
static void setcharclasses()
|
||||||
@ -38,7 +57,7 @@ static void setcharclasses()
|
|||||||
init = 1;
|
init = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void emitterm(string &w, int *posp, bool doerase = true)
|
void TextSplit::emitterm(string &w, int pos, bool doerase = true)
|
||||||
{
|
{
|
||||||
// Maybe trim end of word. These are chars that we would keep inside
|
// Maybe trim end of word. These are chars that we would keep inside
|
||||||
// a word or span, but not at the end
|
// a word or span, but not at the end
|
||||||
@ -55,22 +74,27 @@ static void emitterm(string &w, int *posp, bool doerase = true)
|
|||||||
}
|
}
|
||||||
breakloop:
|
breakloop:
|
||||||
if (w.length()) {
|
if (w.length()) {
|
||||||
if (posp)
|
if (termsink)
|
||||||
*posp++;
|
termsink(cdata, w, pos);
|
||||||
cout << w << endl;
|
|
||||||
}
|
}
|
||||||
if (doerase)
|
if (doerase)
|
||||||
w.erase();
|
w.erase();
|
||||||
}
|
}
|
||||||
|
|
||||||
void text_to_words(const string &in)
|
/*
|
||||||
|
* We basically emit a word every time we see a separator, but some chars are
|
||||||
|
* handled specially so that special cases, ie, c++ and dockes@okyz.com etc,
|
||||||
|
* are handled properly,
|
||||||
|
*/
|
||||||
|
void TextSplit::text_to_words(const string &in)
|
||||||
{
|
{
|
||||||
setcharclasses();
|
setcharclasses();
|
||||||
string span;
|
string span;
|
||||||
string word;
|
string word;
|
||||||
bool number = false;
|
bool number = false;
|
||||||
int pos = 0;
|
int wordpos = 0;
|
||||||
int spanpos = 0;
|
int spanpos = 0;
|
||||||
|
|
||||||
for (int i = 0; i < in.length(); i++) {
|
for (int i = 0; i < in.length(); i++) {
|
||||||
int c = in[i];
|
int c = in[i];
|
||||||
int cc = charclasses[c];
|
int cc = charclasses[c];
|
||||||
@ -78,11 +102,13 @@ void text_to_words(const string &in)
|
|||||||
case SPACE:
|
case SPACE:
|
||||||
SPACE:
|
SPACE:
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
if (span.length() != word.length())
|
if (span.length() != word.length()) {
|
||||||
emitterm(span, &spanpos);
|
emitterm(span, spanpos);
|
||||||
emitterm(word, &pos);
|
}
|
||||||
|
emitterm(word, wordpos++);
|
||||||
number = false;
|
number = false;
|
||||||
}
|
}
|
||||||
|
spanpos = wordpos;
|
||||||
span.erase();
|
span.erase();
|
||||||
break;
|
break;
|
||||||
case '-':
|
case '-':
|
||||||
@ -94,9 +120,10 @@ void text_to_words(const string &in)
|
|||||||
span += c;
|
span += c;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (span.length() != word.length())
|
if (span.length() != word.length()) {
|
||||||
emitterm(span, &spanpos, false);
|
emitterm(span, spanpos, false);
|
||||||
emitterm(word, &pos);
|
}
|
||||||
|
emitterm(word, wordpos++);
|
||||||
number = false;
|
number = false;
|
||||||
span += c;
|
span += c;
|
||||||
}
|
}
|
||||||
@ -104,9 +131,10 @@ void text_to_words(const string &in)
|
|||||||
case '\'':
|
case '\'':
|
||||||
case '@':
|
case '@':
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
if (span.length() != word.length())
|
if (span.length() != word.length()) {
|
||||||
emitterm(span, &spanpos, false);
|
emitterm(span, spanpos, false);
|
||||||
emitterm(word, &pos);
|
}
|
||||||
|
emitterm(word, wordpos++);
|
||||||
number = false;
|
number = false;
|
||||||
} else
|
} else
|
||||||
word += c;
|
word += c;
|
||||||
@ -117,7 +145,7 @@ void text_to_words(const string &in)
|
|||||||
word += c;
|
word += c;
|
||||||
} else {
|
} else {
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
emitterm(word, &pos);
|
emitterm(word, wordpos++);
|
||||||
number = false;
|
number = false;
|
||||||
} else
|
} else
|
||||||
word += c;
|
word += c;
|
||||||
@ -139,8 +167,8 @@ void text_to_words(const string &in)
|
|||||||
// if '-' is the last char before end of line, just
|
// if '-' is the last char before end of line, just
|
||||||
// ignore the line change. This is the right thing to
|
// ignore the line change. This is the right thing to
|
||||||
// do almost always. We'd then need a way to check if
|
// do almost always. We'd then need a way to check if
|
||||||
// the - was added as part of the sleep or was really there,
|
// the - was added as part of the word hyphenation, or was
|
||||||
// but this would need a dictionary.
|
// there in the first place, but this would need a dictionary.
|
||||||
} else {
|
} else {
|
||||||
// Handle like a normal separator
|
// Handle like a normal separator
|
||||||
goto SPACE;
|
goto SPACE;
|
||||||
@ -162,42 +190,35 @@ void text_to_words(const string &in)
|
|||||||
}
|
}
|
||||||
if (word.length()) {
|
if (word.length()) {
|
||||||
if (span.length() != word.length())
|
if (span.length() != word.length())
|
||||||
emitterm(span, &spanpos);
|
emitterm(span, spanpos);
|
||||||
emitterm(word, &pos);
|
emitterm(word, wordpos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 1 || TEST_TEXTSPLIT
|
#else // TEST driver ->
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
int
|
|
||||||
file_to_string(const string &fn, string &data)
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "textsplit.h"
|
||||||
|
#include "readfile.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
int termsink(void *, const string &term, int pos)
|
||||||
{
|
{
|
||||||
int fd = open(fn.c_str(), 0);
|
cout << pos << " " << term << endl;
|
||||||
if (fd < 0) {
|
|
||||||
perror("open");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
char buf[4096];
|
|
||||||
for (;;) {
|
|
||||||
int n = read(fd, buf, 4096);
|
|
||||||
if (n < 0) {
|
|
||||||
perror("read");
|
|
||||||
close(fd);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (n == 0)
|
|
||||||
break;
|
|
||||||
data.append(buf, n);
|
|
||||||
}
|
|
||||||
close(fd);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static string teststring =
|
static string teststring =
|
||||||
"jfd@okyz.com "
|
"jfd@okyz.com "
|
||||||
"Ceci. Est;Oui 1.24 n@d @net .net t@v@c c# c++ -10 o'brien l'ami "
|
"Ceci. Est;Oui 1.24 n@d @net .net t@v@c c# c++ -10 o'brien l'ami "
|
||||||
|
"a 134 +134 -14 -1.5 +1.5 1.54e10 a"
|
||||||
"@^#$(#$(*)"
|
"@^#$(#$(*)"
|
||||||
"one\n\rtwo\nthree-\nfour"
|
"one\n\rtwo\nthree-\nfour"
|
||||||
"[olala][ululu]"
|
"[olala][ululu]"
|
||||||
@ -206,15 +227,16 @@ static string teststring =
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
|
TextSplit splitter(termsink, 0);
|
||||||
if (argc == 2) {
|
if (argc == 2) {
|
||||||
string data;
|
string data;
|
||||||
if (file_to_string(argv[1], data) < 0)
|
if (!file_to_string(argv[1], data))
|
||||||
exit(1);
|
exit(1);
|
||||||
text_to_words(data);
|
splitter.text_to_words(data);
|
||||||
} else {
|
} else {
|
||||||
cout << teststring << endl; text_to_words(teststring);
|
cout << teststring << endl;
|
||||||
|
splitter.text_to_words(teststring);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif // TEST
|
#endif // TEST
|
||||||
|
|
||||||
|
|||||||
31
src/common/textsplit.h
Normal file
31
src/common/textsplit.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#ifndef _TEXTSPLIT_H_INCLUDED_
|
||||||
|
#define _TEXTSPLIT_H_INCLUDED_
|
||||||
|
/* @(#$Id: textsplit.h,v 1.1 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split text into words.
|
||||||
|
* See comments at top of .cpp for more explanations.
|
||||||
|
* This used a callback function. It could be done with an iterator instead,
|
||||||
|
* but 'ts much simpler this way...
|
||||||
|
*/
|
||||||
|
class TextSplit {
|
||||||
|
public:
|
||||||
|
typedef int (*TermSink)(void *cdata, const std::string & term, int pos);
|
||||||
|
private:
|
||||||
|
TermSink termsink;
|
||||||
|
void *cdata;
|
||||||
|
void emitterm(std::string &term, int pos, bool doerase);
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructor: just store callback and client data
|
||||||
|
*/
|
||||||
|
TextSplit(TermSink t, void *c) : termsink(t), cdata(c) {}
|
||||||
|
/**
|
||||||
|
* Split text, emit words and positions.
|
||||||
|
*/
|
||||||
|
void text_to_words(const std::string &in);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
||||||
23
src/index/indexer.h
Normal file
23
src/index/indexer.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
#ifndef _INDEXER_H_INCLUDED_
|
||||||
|
#define _INDEXER_H_INCLUDED_
|
||||||
|
/* @(#$Id: indexer.h,v 1.1 2004-12-14 17:53:51 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
|
#include "rclconfig.h"
|
||||||
|
|
||||||
|
/* Definition for document interner functions */
|
||||||
|
typedef Rcl::Doc* (*MimeHandlerFunc)(RclConfig *, const string &,
|
||||||
|
const string &);
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
class FsIndexer {
|
||||||
|
const ConfTree &conf;
|
||||||
|
public:
|
||||||
|
enum runStatus {IndexerOk, IndexerError};
|
||||||
|
Indexer(const ConfTree &cnf): conf(cnf) {}
|
||||||
|
virtual ~Indexer() {}
|
||||||
|
runStatus run() = 0;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _INDEXER_H_INCLUDED_ */
|
||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@ -11,6 +11,9 @@ using std::string;
|
|||||||
|
|
||||||
string mimetype(const string &filename, ConfTree *mtypes)
|
string mimetype(const string &filename, ConfTree *mtypes)
|
||||||
{
|
{
|
||||||
|
if (mtypes == 0)
|
||||||
|
return "";
|
||||||
|
|
||||||
// If filename has a suffix and we find it in the map, we're done
|
// If filename has a suffix and we find it in the map, we're done
|
||||||
string::size_type dot = filename.find_last_of(".");
|
string::size_type dot = filename.find_last_of(".");
|
||||||
if (dot != string::npos) {
|
if (dot != string::npos) {
|
||||||
@ -26,6 +29,8 @@ string mimetype(const string &filename, ConfTree *mtypes)
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef _TEST_MIMETYPE_
|
#ifdef _TEST_MIMETYPE_
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
const char *tvec[] = {
|
const char *tvec[] = {
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#ifndef _MIMETYPE_H_INCLUDED_
|
#ifndef _MIMETYPE_H_INCLUDED_
|
||||||
#define _MIMETYPE_H_INCLUDED_
|
#define _MIMETYPE_H_INCLUDED_
|
||||||
/* @(#$Id: mimetype.h,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: mimetype.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "conftree.h"
|
#include "conftree.h"
|
||||||
@ -13,4 +13,5 @@
|
|||||||
*/
|
*/
|
||||||
string mimetype(const std::string &filename, ConfTree *mtypes);
|
string mimetype(const std::string &filename, ConfTree *mtypes);
|
||||||
|
|
||||||
|
|
||||||
#endif /* _MIMETYPE_H_INCLUDED_ */
|
#endif /* _MIMETYPE_H_INCLUDED_ */
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <strings.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "pathut.h"
|
#include "pathut.h"
|
||||||
@ -9,43 +11,156 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.1 2004-12-13 15:42:16 dockes
|
|||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
#include "fstreewalk.h"
|
#include "fstreewalk.h"
|
||||||
#include "mimetype.h"
|
#include "mimetype.h"
|
||||||
|
#include "rcldb.h"
|
||||||
|
#include "readfile.h"
|
||||||
|
#include "indexer.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn,
|
||||||
|
const string &mtype)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static map<string, MimeHandlerFunc> ihandlers;
|
||||||
|
class IHandler_Init {
|
||||||
|
public:
|
||||||
|
IHandler_Init() {
|
||||||
|
ihandlers["text/plain"] = textPlainToDoc;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
static IHandler_Init ihandleriniter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return handler function for given mime type
|
||||||
|
*/
|
||||||
|
MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
||||||
|
{
|
||||||
|
// Return handler definition for mime type
|
||||||
|
string hs;
|
||||||
|
if (!mhandlers->get(mtype, hs, ""))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// Break definition into type and name
|
||||||
|
vector<string> toks;
|
||||||
|
ConfTree::stringToStrings(hs, toks);
|
||||||
|
if (toks.size() < 1) {
|
||||||
|
cerr << "Bad mimeconf line for " << mtype << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retrieve handler function according to type
|
||||||
|
if (!strcasecmp(toks[0].c_str(), "internal")) {
|
||||||
|
cerr << "Internal Handler" << endl;
|
||||||
|
map<string, MimeHandlerFunc>::const_iterator it =
|
||||||
|
ihandlers.find(mtype);
|
||||||
|
if (it == ihandlers.end()) {
|
||||||
|
cerr << "Internal handler not found for " << mtype << endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
cerr << "Got handler" << endl;
|
||||||
|
return it->second;
|
||||||
|
} else if (!strcasecmp(toks[0].c_str(), "dll")) {
|
||||||
|
if (toks.size() != 2)
|
||||||
|
return 0;
|
||||||
|
return 0;
|
||||||
|
} else if (!strcasecmp(toks[0].c_str(), "exec")) {
|
||||||
|
if (toks.size() != 2)
|
||||||
|
return 0;
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class DirIndexer {
|
class DirIndexer {
|
||||||
FsTreeWalker walker;
|
FsTreeWalker walker;
|
||||||
RclConfig *config;
|
RclConfig *config;
|
||||||
string topdir;
|
string topdir;
|
||||||
|
string dbdir;
|
||||||
|
Rcl::Db db;
|
||||||
public:
|
public:
|
||||||
DirIndexer(RclConfig *cnf, const string &top)
|
DirIndexer(RclConfig *cnf, const string &dbd, const string &top)
|
||||||
: config(cnf), topdir(top)
|
: config(cnf), topdir(top), dbdir(dbd)
|
||||||
{
|
{ }
|
||||||
}
|
|
||||||
friend FsTreeWalker::Status
|
friend FsTreeWalker::Status
|
||||||
indexfile(void *, const std::string &, const struct stat *,
|
indexfile(void *, const std::string &, const struct stat *,
|
||||||
FsTreeWalker::CbFlag);
|
FsTreeWalker::CbFlag);
|
||||||
void index()
|
|
||||||
{
|
void index();
|
||||||
walker.walk(topdir, indexfile, this);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void DirIndexer::index()
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
|
||||||
|
cerr << "Error opening database in " << dbdir << " for " <<
|
||||||
|
topdir << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
walker.walk(topdir, indexfile, this);
|
||||||
|
#if 0
|
||||||
|
if (!db.close()) {
|
||||||
|
cerr << "Error closing database in " << dbdir << " for " <<
|
||||||
|
topdir << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
FsTreeWalker::Status
|
FsTreeWalker::Status
|
||||||
indexfile(void *cdata, const std::string &fn,
|
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||||
const struct stat *stp, FsTreeWalker::CbFlag flg)
|
FsTreeWalker::CbFlag flg)
|
||||||
{
|
{
|
||||||
DirIndexer *me = (DirIndexer *)cdata;
|
DirIndexer *me = (DirIndexer *)cdata;
|
||||||
if (flg == FsTreeWalker::FtwDirEnter || flg == FsTreeWalker::FtwDirReturn) {
|
|
||||||
// Possibly adjust defaults
|
if (flg == FsTreeWalker::FtwDirEnter ||
|
||||||
|
flg == FsTreeWalker::FtwDirReturn) {
|
||||||
|
me->config->setKeyDir(fn);
|
||||||
cout << "indexfile: [" << fn << "]" << endl;
|
cout << "indexfile: [" << fn << "]" << endl;
|
||||||
|
cout << " defcharset: " << me->config->getDefCharset()
|
||||||
|
<< " deflang: " << me->config->getDefLang() << endl;
|
||||||
|
|
||||||
return FsTreeWalker::FtwOk;
|
return FsTreeWalker::FtwOk;
|
||||||
}
|
}
|
||||||
string mtype = mimetype(fn, me->config->getMimeMap());
|
|
||||||
if (mtype.length() > 0)
|
|
||||||
cout << "indexfile: " << mtype << " " << fn << endl;
|
|
||||||
else
|
|
||||||
cout << "indexfile: " << "(nomime)" << " " << fn << endl;
|
|
||||||
|
|
||||||
|
string mime = mimetype(fn, me->config->getMimeMap());
|
||||||
|
if (mime.length() == 0) {
|
||||||
|
cout << "indexfile: " << "(no mime)" << " " << fn << endl;
|
||||||
|
// No mime type ?? pass on.
|
||||||
|
return FsTreeWalker::FtwOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "indexfile: " << mime << " " << fn << endl;
|
||||||
|
|
||||||
|
// Look for appropriate handler
|
||||||
|
MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
|
||||||
|
if (!fun) {
|
||||||
|
// No handler for this type, for now :(
|
||||||
|
return FsTreeWalker::FtwOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if file has already been indexed, and has changed since
|
||||||
|
// - Make path term,
|
||||||
|
// - query db: postlist_begin->docid
|
||||||
|
// - fetch doc (get_document(docid)
|
||||||
|
// - check date field, maybe skip
|
||||||
|
|
||||||
|
// Turn file into a document. The document has fields for title, body
|
||||||
|
// etc., all text converted to utf8
|
||||||
|
Rcl::Doc *doc = fun(me->config, fn, mime);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Set up xapian document, add postings and misc fields,
|
||||||
|
// add to or update database.
|
||||||
|
dbadd(doc);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return FsTreeWalker::FtwOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -63,11 +178,18 @@ int main(int argc, const char **argv)
|
|||||||
cerr << "No top directories in configuration" << endl;
|
cerr << "No top directories in configuration" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
list<string> tdl;
|
vector<string> tdl;
|
||||||
if (ConfTree::stringToStrings(topdirs, tdl)) {
|
if (ConfTree::stringToStrings(topdirs, tdl)) {
|
||||||
for (list<string>::iterator it = tdl.begin(); it != tdl.end(); it++) {
|
for (int i = 0; i < tdl.size(); i++) {
|
||||||
cout << *it << endl;
|
string topdir = tdl[i];
|
||||||
DirIndexer indexer(config, *it);
|
cout << topdir << endl;
|
||||||
|
string dbdir;
|
||||||
|
if (conf->get("dbdir", dbdir, topdir) == 0) {
|
||||||
|
cerr << "No database directory in configuration for "
|
||||||
|
<< topdir << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
DirIndexer indexer(config, dbdir, topdir);
|
||||||
indexer.index();
|
indexer.index();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
121
src/rcldb/rcldb.cpp
Normal file
121
src/rcldb/rcldb.cpp
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
#ifndef lint
|
||||||
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "rcldb.h"
|
||||||
|
|
||||||
|
#include "xapian.h"
|
||||||
|
|
||||||
|
// Data for a xapian database
|
||||||
|
class Native {
|
||||||
|
public:
|
||||||
|
bool isopen;
|
||||||
|
bool iswritable;
|
||||||
|
class Xapian::Database db;
|
||||||
|
class Xapian::WritableDatabase wdb;
|
||||||
|
vector<bool> updated;
|
||||||
|
|
||||||
|
Native() : isopen(false), iswritable(false) {}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
Rcl::Db::Db()
|
||||||
|
{
|
||||||
|
pdata = new Native;
|
||||||
|
}
|
||||||
|
|
||||||
|
Rcl::Db::~Db()
|
||||||
|
{
|
||||||
|
if (pdata == 0)
|
||||||
|
return;
|
||||||
|
Native *ndb = (Native *)pdata;
|
||||||
|
try {
|
||||||
|
// There is nothing to do for an ro db.
|
||||||
|
if (ndb->isopen == false || ndb->iswritable == false) {
|
||||||
|
delete ndb;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ndb->wdb.flush();
|
||||||
|
delete ndb;
|
||||||
|
} catch (const Xapian::Error &e) {
|
||||||
|
cout << "Exception: " << e.get_msg() << endl;
|
||||||
|
} catch (const string &s) {
|
||||||
|
cout << "Exception: " << s << endl;
|
||||||
|
} catch (const char *s) {
|
||||||
|
cout << "Exception: " << s << endl;
|
||||||
|
} catch (...) {
|
||||||
|
cout << "Caught unknown exception" << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Rcl::Db::open(const string& dir, OpenMode mode)
|
||||||
|
{
|
||||||
|
if (pdata == 0)
|
||||||
|
return false;
|
||||||
|
Native *ndb = (Native *)pdata;
|
||||||
|
try {
|
||||||
|
switch (mode) {
|
||||||
|
case DbUpd:
|
||||||
|
ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OPEN);
|
||||||
|
ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
|
||||||
|
ndb->iswritable = true;
|
||||||
|
break;
|
||||||
|
case DbTrunc:
|
||||||
|
ndb->wdb = Xapian::Auto::open(dir, Xapian::DB_CREATE_OR_OVERWRITE);
|
||||||
|
ndb->iswritable = true;
|
||||||
|
break;
|
||||||
|
case DbRO:
|
||||||
|
default:
|
||||||
|
ndb->iswritable = false;
|
||||||
|
cerr << "Not ready to open RO yet" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
ndb->isopen = true;
|
||||||
|
return true;
|
||||||
|
} catch (const Xapian::Error &e) {
|
||||||
|
cout << "Exception: " << e.get_msg() << endl;
|
||||||
|
} catch (const string &s) {
|
||||||
|
cout << "Exception: " << s << endl;
|
||||||
|
} catch (const char *s) {
|
||||||
|
cout << "Exception: " << s << endl;
|
||||||
|
} catch (...) {
|
||||||
|
cout << "Caught unknown exception" << endl;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool Rcl::Db::close()
|
||||||
|
{
|
||||||
|
if (pdata == 0)
|
||||||
|
return false;
|
||||||
|
Native *ndb = (Native *)pdata;
|
||||||
|
if (ndb->isopen == false)
|
||||||
|
return true;
|
||||||
|
try {
|
||||||
|
if (ndb->isopen == true && ndb->iswritable == true) {
|
||||||
|
ndb->wdb.flush();
|
||||||
|
}
|
||||||
|
delete ndb;
|
||||||
|
} catch (const Xapian::Error &e) {
|
||||||
|
cout << "Exception: " << e.get_msg() << endl;
|
||||||
|
return false;
|
||||||
|
} catch (const string &s) {
|
||||||
|
cout << "Exception: " << s << endl;
|
||||||
|
return false;
|
||||||
|
} catch (const char *s) {
|
||||||
|
cout << "Exception: " << s << endl;
|
||||||
|
return false;
|
||||||
|
} catch (...) {
|
||||||
|
cout << "Caught unknown exception" << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pdata = new Native;
|
||||||
|
if (pdata)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
32
src/rcldb/rcldb.h
Normal file
32
src/rcldb/rcldb.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#ifndef _DB_H_INCLUDED_
|
||||||
|
#define _DB_H_INCLUDED_
|
||||||
|
/* @(#$Id: rcldb.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace Rcl {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper class for the native database.
|
||||||
|
*/
|
||||||
|
class Db {
|
||||||
|
void *pdata;
|
||||||
|
public:
|
||||||
|
Db();
|
||||||
|
~Db();
|
||||||
|
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||||
|
bool open(const std::string &dbdir, OpenMode mode);
|
||||||
|
bool close();
|
||||||
|
};
|
||||||
|
|
||||||
|
class Doc {
|
||||||
|
public:
|
||||||
|
string title;
|
||||||
|
string abstract;
|
||||||
|
string keywords;
|
||||||
|
string text;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _DB_H_INCLUDED_ */
|
||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: execmd.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
@ -14,6 +14,7 @@ static char rcsid[] = "@(#$Id: execmd.cpp,v 1.1 2004-12-12 08:58:12 dockes Exp $
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "execmd.h"
|
#include "execmd.h"
|
||||||
|
#include "pathut.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
#define MAX(A,B) (A>B?A:B)
|
#define MAX(A,B) (A>B?A:B)
|
||||||
@ -152,7 +153,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fill up argv
|
// Fill up argv
|
||||||
argv[0] = cmd.c_str();
|
argv[0] = path_getsimple(cmd).c_str();
|
||||||
i = 1;
|
i = 1;
|
||||||
for (it = args.begin(); it != args.end(); it++) {
|
for (it = args.begin(); it != args.end(); it++) {
|
||||||
argv[i++] = it->c_str();
|
argv[i++] = it->c_str();
|
||||||
@ -160,7 +161,7 @@ ExecCmd::doexec(const string &cmd, const list<string> args,
|
|||||||
argv[i] = 0;
|
argv[i] = 0;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
{int i = 0;cerr << "cmd: " << cmd << endl << "ARGS:" << endl;
|
{int i = 0;cerr << "cmd: " << cmd << endl << "ARGS: " << endl;
|
||||||
while (argv[i]) cerr << argv[i++] << endl;}
|
while (argv[i]) cerr << argv[i++] << endl;}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.1 2004-12-10 18:13:14 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: pathut.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef TEST_PATHUT
|
#ifndef TEST_PATHUT
|
||||||
@ -31,6 +31,20 @@ std::string path_getfather(const std::string &s) {
|
|||||||
return father;
|
return father;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string path_getsimple(const std::string &s) {
|
||||||
|
std::string simple = s;
|
||||||
|
|
||||||
|
if (simple.empty())
|
||||||
|
return simple;
|
||||||
|
|
||||||
|
std::string::size_type slp = simple.rfind('/');
|
||||||
|
if (slp == std::string::npos)
|
||||||
|
return simple;
|
||||||
|
|
||||||
|
simple.erase(0, slp+1);
|
||||||
|
return simple;
|
||||||
|
}
|
||||||
|
|
||||||
std::string path_home()
|
std::string path_home()
|
||||||
{
|
{
|
||||||
uid_t uid = getuid();
|
uid_t uid = getuid();
|
||||||
@ -53,13 +67,18 @@ using namespace std;
|
|||||||
|
|
||||||
const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
|
const char *tstvec[] = {"", "/", "/dir", "/dir/", "/dir1/dir2",
|
||||||
"/dir1/dir2",
|
"/dir1/dir2",
|
||||||
"./dir", "./dir1/", "dir", "../dir"};
|
"./dir", "./dir1/", "dir", "../dir", "/dir/toto.c",
|
||||||
|
"/dir/.c",
|
||||||
|
};
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
int main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
|
|
||||||
for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
|
for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
|
||||||
cout << tstvec[i] << " -> " << path_getfather(tstvec[i]) << endl;
|
cout << tstvec[i] << " FATHER " << path_getfather(tstvec[i]) << endl;
|
||||||
|
}
|
||||||
|
for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
|
||||||
|
cout << tstvec[i] << " SIMPLE " << path_getsimple(tstvec[i]) << endl;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#ifndef _PATHUT_H_INCLUDED_
|
#ifndef _PATHUT_H_INCLUDED_
|
||||||
#define _PATHUT_H_INCLUDED_
|
#define _PATHUT_H_INCLUDED_
|
||||||
/* @(#$Id: pathut.h,v 1.1 2004-12-10 18:13:14 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: pathut.h,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
@ -13,6 +13,7 @@ inline void path_cat(std::string &s1, const std::string &s2) {
|
|||||||
s1 += s2;
|
s1 += s2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern std::string path_getsimple(const std::string &s);
|
||||||
extern std::string path_getfather(const std::string &s);
|
extern std::string path_getfather(const std::string &s);
|
||||||
extern std::string path_home();
|
extern std::string path_home();
|
||||||
|
|
||||||
|
|||||||
49
src/utils/readfile.cpp
Normal file
49
src/utils/readfile.cpp
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
#ifndef lint
|
||||||
|
static char rcsid[] = "@(#$Id: readfile.cpp,v 1.1 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#ifndef O_STREAMING
|
||||||
|
#define O_STREAMING 0
|
||||||
|
#endif
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
#include "readfile.h"
|
||||||
|
|
||||||
|
bool file_to_string(const string &fn, string &data)
|
||||||
|
{
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
int fd = open(fn.c_str(), O_RDONLY|O_STREAMING);
|
||||||
|
if (fd < 0) {
|
||||||
|
// perror("open");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
char buf[4096];
|
||||||
|
for (;;) {
|
||||||
|
int n = read(fd, buf, 4096);
|
||||||
|
if (n < 0) {
|
||||||
|
// perror("read");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (n == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
try {
|
||||||
|
data.append(buf, n);
|
||||||
|
} catch (...) {
|
||||||
|
// fprintf(stderr, "file_to_string: out of memory\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = true;
|
||||||
|
out:
|
||||||
|
if (fd >= 0)
|
||||||
|
close(fd);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
13
src/utils/readfile.h
Normal file
13
src/utils/readfile.h
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#ifndef _READFILE_H_INCLUDED_
|
||||||
|
#define _READFILE_H_INCLUDED_
|
||||||
|
/* @(#$Id: readfile.h,v 1.1 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read whole file into string.
|
||||||
|
* @return true for ok, false else
|
||||||
|
*/
|
||||||
|
bool file_to_string(const std::string &filename, std::string &data);
|
||||||
|
|
||||||
|
#endif /* _READFILE_H_INCLUDED_ */
|
||||||
Loading…
x
Reference in New Issue
Block a user