warnings cleanup
This commit is contained in:
parent
91df3aef73
commit
a43ebc3716
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -10,10 +10,6 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Ex
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
ConfTree *getConfig()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
RclConfig::RclConfig()
|
RclConfig::RclConfig()
|
||||||
: m_ok(false), conf(0), mimemap(0), mimeconf(0)
|
: m_ok(false), conf(0), mimemap(0), mimeconf(0)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||||
#define _RCLCONFIG_H_INCLUDED_
|
#define _RCLCONFIG_H_INCLUDED_
|
||||||
/* @(#$Id: rclconfig.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rclconfig.h,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include "conftree.h"
|
#include "conftree.h"
|
||||||
|
|
||||||
@ -9,18 +9,30 @@ class RclConfig {
|
|||||||
string confdir; // Directory where the files are stored
|
string confdir; // Directory where the files are stored
|
||||||
ConfTree *conf; // Parsed main configuration
|
ConfTree *conf; // Parsed main configuration
|
||||||
string keydir; // Current directory used for parameter fetches.
|
string keydir; // Current directory used for parameter fetches.
|
||||||
string defcharset; // These are stored locally to avoid a config lookup
|
|
||||||
string deflang; // each time.
|
|
||||||
// Note: this will have to change if/when we support per directory maps
|
// Note: this will have to change if/when we support per directory maps
|
||||||
ConfTree *mimemap;
|
ConfTree *mimemap;
|
||||||
ConfTree *mimeconf;
|
ConfTree *mimeconf;
|
||||||
public:
|
public:
|
||||||
|
// Let some parameters be accessed directly
|
||||||
|
string defcharset; // These are stored locally to avoid a config lookup
|
||||||
|
string deflang; // each time.
|
||||||
|
bool guesscharset;
|
||||||
|
|
||||||
RclConfig();
|
RclConfig();
|
||||||
~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
|
~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
|
||||||
bool ok() {return m_ok;}
|
bool ok() {return m_ok;}
|
||||||
ConfTree *getConfig() {return m_ok ? conf : 0;}
|
ConfTree *getConfig() {return m_ok ? conf : 0;}
|
||||||
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
|
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
|
||||||
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
|
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
|
||||||
|
void setKeyDir(const string &dir)
|
||||||
|
{
|
||||||
|
keydir = dir;
|
||||||
|
conf->get("defaultcharset", defcharset, keydir);
|
||||||
|
conf->get("defaultlanguage", deflang, keydir);
|
||||||
|
string str;
|
||||||
|
conf->get("guesscharset", deflang, str);
|
||||||
|
guesscharset = ConfTree::stringToBool(str);
|
||||||
|
}
|
||||||
bool getConfParam(const string &name, string &value)
|
bool getConfParam(const string &name, string &value)
|
||||||
{
|
{
|
||||||
if (conf == 0)
|
if (conf == 0)
|
||||||
@ -33,12 +45,6 @@ class RclConfig {
|
|||||||
const string &getDefLang() {
|
const string &getDefLang() {
|
||||||
return deflang;
|
return deflang;
|
||||||
}
|
}
|
||||||
void setKeyDir(const string &dir)
|
|
||||||
{
|
|
||||||
keydir = dir;
|
|
||||||
conf->get("defaultcharset", defcharset, keydir);
|
|
||||||
conf->get("defaultlanguage", deflang, keydir);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.2 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.3 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
#ifndef TEST_TEXTSPLIT
|
#ifndef TEST_TEXTSPLIT
|
||||||
|
|
||||||
@ -35,24 +35,24 @@ static void setcharclasses()
|
|||||||
static int init = 0;
|
static int init = 0;
|
||||||
if (init)
|
if (init)
|
||||||
return;
|
return;
|
||||||
int i;
|
unsigned int i;
|
||||||
memset(charclasses, LETTER, sizeof(charclasses));
|
memset(charclasses, LETTER, sizeof(charclasses));
|
||||||
|
|
||||||
char digits[] = "0123456789";
|
char digits[] = "0123456789";
|
||||||
for (i = 0; i < sizeof(digits); i++)
|
for (i = 0; i < sizeof(digits); i++)
|
||||||
charclasses[digits[i]] = DIGIT;
|
charclasses[int(digits[i])] = DIGIT;
|
||||||
|
|
||||||
char blankspace[] = "\t\v\f ";
|
char blankspace[] = "\t\v\f ";
|
||||||
for (i = 0; i < sizeof(blankspace); i++)
|
for (i = 0; i < sizeof(blankspace); i++)
|
||||||
charclasses[blankspace[i]] = SPACE;
|
charclasses[int(blankspace[i])] = SPACE;
|
||||||
|
|
||||||
char seps[] = "!\"$%&()/<=>[\\]^{|}~:;,*";
|
char seps[] = "!\"$%&()/<=>[\\]^{|}~:;,*";
|
||||||
for (i = 0; i < sizeof(seps); i++)
|
for (i = 0; i < sizeof(seps); i++)
|
||||||
charclasses[seps[i]] = SPACE;
|
charclasses[int(seps[i])] = SPACE;
|
||||||
|
|
||||||
char special[] = ".@+-,#'\n\r";
|
char special[] = ".@+-,#'\n\r";
|
||||||
for (i = 0; i < sizeof(special); i++)
|
for (i = 0; i < sizeof(special); i++)
|
||||||
charclasses[special[i]] = special[i];
|
charclasses[int(special[i])] = special[i];
|
||||||
|
|
||||||
init = 1;
|
init = 1;
|
||||||
}
|
}
|
||||||
@ -95,7 +95,7 @@ void TextSplit::text_to_words(const string &in)
|
|||||||
int wordpos = 0;
|
int wordpos = 0;
|
||||||
int spanpos = 0;
|
int spanpos = 0;
|
||||||
|
|
||||||
for (int i = 0; i < in.length(); i++) {
|
for (unsigned int i = 0; i < in.length(); i++) {
|
||||||
int c = in[i];
|
int c = in[i];
|
||||||
int cc = charclasses[c];
|
int cc = charclasses[c];
|
||||||
switch (cc) {
|
switch (cc) {
|
||||||
@ -114,7 +114,7 @@ void TextSplit::text_to_words(const string &in)
|
|||||||
case '-':
|
case '-':
|
||||||
case '+':
|
case '+':
|
||||||
if (word.length() == 0) {
|
if (word.length() == 0) {
|
||||||
if (i < in.length() || charclasses[in[i+1]] == DIGIT) {
|
if (i < in.length() || charclasses[int(in[i+1])] == DIGIT) {
|
||||||
number = true;
|
number = true;
|
||||||
word += c;
|
word += c;
|
||||||
span += c;
|
span += c;
|
||||||
@ -155,7 +155,7 @@ void TextSplit::text_to_words(const string &in)
|
|||||||
case '#':
|
case '#':
|
||||||
// Keep it only at end of word...
|
// Keep it only at end of word...
|
||||||
if (word.length() > 0 &&
|
if (word.length() > 0 &&
|
||||||
(i == in.length() -1 || charclasses[in[i+1]] == SPACE)) {
|
(i == in.length() -1 || charclasses[int(in[i+1])] == SPACE)) {
|
||||||
word += c;
|
word += c;
|
||||||
span += c;
|
span += c;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,18 +1,41 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: csguess.cpp,v 1.1 2004-12-15 08:21:05 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: csguess.cpp,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
// This code was converted from estraier / qdbm / myconf.c
|
|
||||||
|
#ifndef TEST_CSGUESS
|
||||||
|
|
||||||
|
// This code was converted from estraier / qdbm / myconf.c:
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
* Copyright (C) 2000-2004 Mikio Hirabayashi
|
||||||
|
*
|
||||||
|
* This file is part of QDBM, Quick Database Manager.
|
||||||
|
*
|
||||||
|
* QDBM is free software; you can redistribute it and/or modify it under the
|
||||||
|
* terms of the GNU Lesser General Public License as published by the Free
|
||||||
|
* Software Foundation; either version 2.1 of the License or any later
|
||||||
|
* version. QDBM is distributed in the hope that it will be useful, but
|
||||||
|
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
* License for more details. You should have received a copy of the GNU
|
||||||
|
* Lesser General Public License along with QDBM; if not, write to the Free
|
||||||
|
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
* 02111-1307 USA.
|
||||||
|
* *********************************************************/
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include <iconv.h>
|
|
||||||
#include "csguess.h"
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
|
#include <iconv.h>
|
||||||
|
|
||||||
|
#include "csguess.h"
|
||||||
|
|
||||||
// The values from estraier were 32768, 256, 0.001
|
// The values from estraier were 32768, 256, 0.001
|
||||||
const int ICONVCHECKSIZ = 4000;
|
const int ICONVCHECKSIZ = 32768;
|
||||||
const int ICONVMISSMAX = 10;
|
const int ICONVMISSMAX = 256;
|
||||||
const double ICONVALLWRAT = 0.001;
|
const double ICONVALLWRAT = 0.001;
|
||||||
|
|
||||||
// Try to transcode and count errors (for charset guessing)
|
// Try to transcode and count errors (for charset guessing)
|
||||||
@ -20,17 +43,18 @@ static int transcodeErrCnt(const char *ptr, int size,
|
|||||||
const char *icode, const char *ocode)
|
const char *icode, const char *ocode)
|
||||||
{
|
{
|
||||||
iconv_t ic;
|
iconv_t ic;
|
||||||
char obuf[ICONVCHECKSIZ], *wp, *rp;
|
char obuf[2*ICONVCHECKSIZ], *wp, *rp;
|
||||||
size_t isiz, osiz;
|
size_t isiz, osiz;
|
||||||
int miss;
|
int miss;
|
||||||
isiz = size;
|
isiz = size;
|
||||||
if((ic = iconv_open(ocode, icode)) == (iconv_t)-1) return ICONVMISSMAX;
|
if((ic = iconv_open(ocode, icode)) == (iconv_t)-1)
|
||||||
|
return size;
|
||||||
miss = 0;
|
miss = 0;
|
||||||
rp = (char *)ptr;
|
rp = (char *)ptr;
|
||||||
while(isiz > 0){
|
while(isiz > 0){
|
||||||
osiz = ICONVCHECKSIZ;
|
osiz = 2*ICONVCHECKSIZ;
|
||||||
wp = obuf;
|
wp = obuf;
|
||||||
if(iconv(ic, (const char **)&rp, &isiz, &wp, &osiz) == -1){
|
if(iconv(ic, (const char **)&rp, &isiz, &wp, &osiz) == (size_t)-1){
|
||||||
if(errno == EILSEQ || errno == EINVAL){
|
if(errno == EILSEQ || errno == EINVAL){
|
||||||
rp++;
|
rp++;
|
||||||
isiz--;
|
isiz--;
|
||||||
@ -38,17 +62,20 @@ static int transcodeErrCnt(const char *ptr, int size,
|
|||||||
if(miss >= ICONVMISSMAX)
|
if(miss >= ICONVMISSMAX)
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
miss = size;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(iconv_close(ic) == -1)
|
if(iconv_close(ic) == -1)
|
||||||
return ICONVMISSMAX;
|
return size;
|
||||||
return miss;
|
return miss;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try to guess character encoding. This could be optimized quite a
|
||||||
string csguess(const string &in)
|
// lot by avoiding the multiple passes on the document, to be done
|
||||||
|
// after usefulness is demonstrated...
|
||||||
|
string csguess(const string &in, const string &dflt)
|
||||||
{
|
{
|
||||||
const char *hypo;
|
const char *hypo;
|
||||||
int i, miss;
|
int i, miss;
|
||||||
@ -74,9 +101,10 @@ string csguess(const string &in)
|
|||||||
return "UTF-16LE";
|
return "UTF-16LE";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look for iso-2022 specific escape sequences. As iso-2022 begins
|
// Look for iso-2022 (rfc1468) specific escape sequences. As
|
||||||
// in ascii, these succeed fast for a japanese text, but are quite
|
// iso-2022 begins in ascii, and typically soon escapes, these
|
||||||
// expensive for any other
|
// succeed fast for a japanese text, but are quite expensive for
|
||||||
|
// any other
|
||||||
for (i = 0; i < size - 3; i++) {
|
for (i = 0; i < size - 3; i++) {
|
||||||
if (text[i] == 0x1b) {
|
if (text[i] == 0x1b) {
|
||||||
i++;
|
i++;
|
||||||
@ -131,5 +159,35 @@ string csguess(const string &in)
|
|||||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||||
hypo = "CP932";
|
hypo = "CP932";
|
||||||
|
|
||||||
return hypo ? hypo : "ISO-8859-1";
|
return hypo ? hypo : dflt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "readfile.h"
|
||||||
|
#include "csguess.h"
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
if (argc != 2) {
|
||||||
|
cerr << "Usage: trcsguess <filename> <default>" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
const string filename = argv[1];
|
||||||
|
const string dflt = argv[2];
|
||||||
|
string text;
|
||||||
|
if (!file_to_string(filename, text)) {
|
||||||
|
cerr << "Couldnt read file, errno " << errno << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
cout << csguess(text, dflt) << endl;
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@ -1,12 +1,13 @@
|
|||||||
#ifndef _CSGUESS_H_INCLUDED_
|
#ifndef _CSGUESS_H_INCLUDED_
|
||||||
#define _CSGUESS_H_INCLUDED_
|
#define _CSGUESS_H_INCLUDED_
|
||||||
/* @(#$Id: csguess.h,v 1.1 2004-12-15 08:21:05 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: csguess.h,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|
||||||
// Try to guess the character set. This might guess unicode encodings, and
|
// Try to guess the character set. This might guess unicode encodings, and
|
||||||
// some asian charsets, but has no chance, for example, of discriminating
|
// some asian charsets, but has no chance, for example, of discriminating
|
||||||
// betweeen the different iso8859-xx charsets.
|
// betweeen the different iso8859-xx charsets.
|
||||||
extern std::string csguess(const std::string &in);
|
extern std::string csguess(const std::string &in, const std::string &dflt);
|
||||||
|
|
||||||
#endif /* _CSGUESS_H_INCLUDED_ */
|
#endif /* _CSGUESS_H_INCLUDED_ */
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
#ifndef _INDEXER_H_INCLUDED_
|
#ifndef _INDEXER_H_INCLUDED_
|
||||||
#define _INDEXER_H_INCLUDED_
|
#define _INDEXER_H_INCLUDED_
|
||||||
/* @(#$Id: indexer.h,v 1.1 2004-12-14 17:53:51 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: indexer.h,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include "rclconfig.h"
|
#include "rclconfig.h"
|
||||||
|
|
||||||
/* Definition for document interner functions */
|
/* Definition for document interner functions */
|
||||||
typedef Rcl::Doc* (*MimeHandlerFunc)(RclConfig *, const string &,
|
typedef bool (*MimeHandlerFunc)(RclConfig *, const string &,
|
||||||
const string &);
|
const string &, Rcl::Doc&);
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@ -18,7 +18,7 @@ string mimetype(const string &filename, ConfTree *mtypes)
|
|||||||
string::size_type dot = filename.find_last_of(".");
|
string::size_type dot = filename.find_last_of(".");
|
||||||
if (dot != string::npos) {
|
if (dot != string::npos) {
|
||||||
string suff = filename.substr(dot);
|
string suff = filename.substr(dot);
|
||||||
for (int i = 0; i < suff.length(); i++)
|
for (unsigned int i = 0; i < suff.length(); i++)
|
||||||
suff[i] = tolower(suff[i]);
|
suff[i] = tolower(suff[i]);
|
||||||
|
|
||||||
string mtype;
|
string mtype;
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
@ -14,25 +16,50 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes
|
|||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
#include "readfile.h"
|
#include "readfile.h"
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
|
#include "csguess.h"
|
||||||
|
#include "transcode.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn,
|
bool textPlainToDoc(RclConfig *conf, const string &fn,
|
||||||
const string &mtype)
|
const string &mtype, Rcl::Doc &docout)
|
||||||
{
|
{
|
||||||
return 0;
|
string otext;
|
||||||
|
if (!file_to_string(fn, otext))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Try to guess charset, then convert to utf-8, and fill document fields
|
||||||
|
string charset;
|
||||||
|
if (conf->guesscharset) {
|
||||||
|
charset = csguess(otext, conf->defcharset);
|
||||||
|
} else
|
||||||
|
charset = conf->defcharset;
|
||||||
|
string utf8;
|
||||||
|
if (transcode(otext, charset, utf8, "UTF-8"))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
Rcl::Doc out;
|
||||||
|
out.origcharset = charset;
|
||||||
|
out.text = utf8;
|
||||||
|
docout = out;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Map of mime types to internal interner functions. This could just as well
|
||||||
|
// be an if else if suite inside getMimeHandler(), but this is prettier ?
|
||||||
static map<string, MimeHandlerFunc> ihandlers;
|
static map<string, MimeHandlerFunc> ihandlers;
|
||||||
|
// Static object to get the map to be initialized at program start.
|
||||||
class IHandler_Init {
|
class IHandler_Init {
|
||||||
public:
|
public:
|
||||||
IHandler_Init() {
|
IHandler_Init() {
|
||||||
ihandlers["text/plain"] = textPlainToDoc;
|
ihandlers["text/plain"] = textPlainToDoc;
|
||||||
|
// Add new associations here when needed
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
static IHandler_Init ihandleriniter;
|
static IHandler_Init ihandleriniter;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return handler function for given mime type
|
* Return handler function for given mime type
|
||||||
*/
|
*/
|
||||||
@ -75,6 +102,9 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bunch holder for data used while indexing a directory tree
|
||||||
|
*/
|
||||||
class DirIndexer {
|
class DirIndexer {
|
||||||
FsTreeWalker walker;
|
FsTreeWalker walker;
|
||||||
RclConfig *config;
|
RclConfig *config;
|
||||||
@ -95,23 +125,23 @@ class DirIndexer {
|
|||||||
|
|
||||||
void DirIndexer::index()
|
void DirIndexer::index()
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
|
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
|
||||||
cerr << "Error opening database in " << dbdir << " for " <<
|
cerr << "Error opening database in " << dbdir << " for " <<
|
||||||
topdir << endl;
|
topdir << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
walker.walk(topdir, indexfile, this);
|
walker.walk(topdir, indexfile, this);
|
||||||
#if 0
|
|
||||||
if (!db.close()) {
|
if (!db.close()) {
|
||||||
cerr << "Error closing database in " << dbdir << " for " <<
|
cerr << "Error closing database in " << dbdir << " for " <<
|
||||||
topdir << endl;
|
topdir << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function gets called for every file and directory found by the
|
||||||
|
* tree walker. Adjust parameters and index files if/when needed.
|
||||||
|
*/
|
||||||
FsTreeWalker::Status
|
FsTreeWalker::Status
|
||||||
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||||
FsTreeWalker::CbFlag flg)
|
FsTreeWalker::CbFlag flg)
|
||||||
@ -144,26 +174,25 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
|||||||
return FsTreeWalker::FtwOk;
|
return FsTreeWalker::FtwOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if file has already been indexed, and has changed since
|
if (!me->db.needUpdate(fn, stp))
|
||||||
// - Make path term,
|
return FsTreeWalker::FtwOk;
|
||||||
// - query db: postlist_begin->docid
|
|
||||||
// - fetch doc (get_document(docid)
|
|
||||||
// - check date field, maybe skip
|
|
||||||
|
|
||||||
// Turn file into a document. The document has fields for title, body
|
// Turn file into a document. The document has fields for title, body
|
||||||
// etc., all text converted to utf8
|
// etc., all text converted to utf8
|
||||||
Rcl::Doc *doc = fun(me->config, fn, mime);
|
Rcl::Doc doc;
|
||||||
|
if (!fun(me->config, fn, mime, doc))
|
||||||
|
return FsTreeWalker::FtwOk;
|
||||||
|
|
||||||
#if 0
|
|
||||||
// Set up xapian document, add postings and misc fields,
|
// Set up xapian document, add postings and misc fields,
|
||||||
// add to or update database.
|
// add to or update database.
|
||||||
dbadd(doc);
|
if (!me->db.add(fn, doc))
|
||||||
#endif
|
return FsTreeWalker::FtwError;
|
||||||
|
|
||||||
return FsTreeWalker::FtwOk;
|
return FsTreeWalker::FtwOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, const char **argv)
|
int main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
RclConfig *config = new RclConfig;
|
RclConfig *config = new RclConfig;
|
||||||
@ -180,7 +209,7 @@ int main(int argc, const char **argv)
|
|||||||
}
|
}
|
||||||
vector<string> tdl;
|
vector<string> tdl;
|
||||||
if (ConfTree::stringToStrings(topdirs, tdl)) {
|
if (ConfTree::stringToStrings(topdirs, tdl)) {
|
||||||
for (int i = 0; i < tdl.size(); i++) {
|
for (unsigned int i = 0; i < tdl.size(); i++) {
|
||||||
string topdir = tdl[i];
|
string topdir = tdl[i];
|
||||||
cout << topdir << endl;
|
cout << topdir << endl;
|
||||||
string dbdir;
|
string dbdir;
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -27,7 +29,7 @@ class Native {
|
|||||||
|
|
||||||
Rcl::Db::Db()
|
Rcl::Db::Db()
|
||||||
{
|
{
|
||||||
pdata = new Native;
|
// pdata = new Native;
|
||||||
}
|
}
|
||||||
|
|
||||||
Rcl::Db::~Db()
|
Rcl::Db::~Db()
|
||||||
@ -56,6 +58,7 @@ Rcl::Db::~Db()
|
|||||||
|
|
||||||
bool Rcl::Db::open(const string& dir, OpenMode mode)
|
bool Rcl::Db::open(const string& dir, OpenMode mode)
|
||||||
{
|
{
|
||||||
|
return true;
|
||||||
if (pdata == 0)
|
if (pdata == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
@ -89,8 +92,10 @@ bool Rcl::Db::open(const string& dir, OpenMode mode)
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Rcl::Db::close()
|
bool Rcl::Db::close()
|
||||||
{
|
{
|
||||||
|
return true;
|
||||||
if (pdata == 0)
|
if (pdata == 0)
|
||||||
return false;
|
return false;
|
||||||
Native *ndb = (Native *)pdata;
|
Native *ndb = (Native *)pdata;
|
||||||
@ -119,3 +124,21 @@ bool Rcl::Db::close()
|
|||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
// TOBEDONE: Check if file has already been indexed, and has changed since
|
||||||
|
// - Make path term,
|
||||||
|
// - query db: postlist_begin->docid
|
||||||
|
// - fetch doc (get_document(docid)
|
||||||
|
// - check date field, maybe skip
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +1,25 @@
|
|||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
struct stat;
|
||||||
|
|
||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holder for document attributes and data
|
||||||
|
*/
|
||||||
|
class Doc {
|
||||||
|
public:
|
||||||
|
string origcharset;
|
||||||
|
string title;
|
||||||
|
string abstract;
|
||||||
|
string keywords;
|
||||||
|
string text;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wrapper class for the native database.
|
* Wrapper class for the native database.
|
||||||
*/
|
*/
|
||||||
@ -17,15 +31,10 @@ class Db {
|
|||||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||||
bool open(const std::string &dbdir, OpenMode mode);
|
bool open(const std::string &dbdir, OpenMode mode);
|
||||||
bool close();
|
bool close();
|
||||||
|
bool add(const string &filename, const Doc &doc);
|
||||||
|
bool needUpdate(const string &filename, const struct stat *stp);
|
||||||
};
|
};
|
||||||
|
|
||||||
class Doc {
|
|
||||||
public:
|
|
||||||
string title;
|
|
||||||
string abstract;
|
|
||||||
string keywords;
|
|
||||||
string text;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: transcode.cpp,v 1.1 2004-12-15 09:43:48 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: transcode.cpp,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef TEST_TRANSCODE
|
#ifndef TEST_TRANSCODE
|
||||||
@ -22,6 +22,7 @@ bool transcode(const string &in, string &out, const string &icode,
|
|||||||
bool ret = false;
|
bool ret = false;
|
||||||
const int OBSIZ = 8192;
|
const int OBSIZ = 8192;
|
||||||
char obuf[OBSIZ], *op;
|
char obuf[OBSIZ], *op;
|
||||||
|
bool icopen = false;
|
||||||
|
|
||||||
out.erase();
|
out.erase();
|
||||||
size_t isiz = in.length();
|
size_t isiz = in.length();
|
||||||
@ -33,12 +34,13 @@ bool transcode(const string &in, string &out, const string &icode,
|
|||||||
+ " -> " + ocode;
|
+ " -> " + ocode;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
icopen = true;
|
||||||
|
|
||||||
while (isiz > 0) {
|
while (isiz > 0) {
|
||||||
size_t osiz;
|
size_t osiz;
|
||||||
op = obuf;
|
op = obuf;
|
||||||
osiz = OBSIZ;
|
osiz = OBSIZ;
|
||||||
if(iconv(ic, &ip, &isiz, &op, &osiz) == -1 && errno != E2BIG){
|
if(iconv(ic, &ip, &isiz, &op, &osiz) == (size_t)-1 && errno != E2BIG){
|
||||||
out.erase();
|
out.erase();
|
||||||
out = string("iconv failed for ") + icode + " -> " + ocode +
|
out = string("iconv failed for ") + icode + " -> " + ocode +
|
||||||
" : " + strerror(errno);
|
" : " + strerror(errno);
|
||||||
@ -53,8 +55,11 @@ bool transcode(const string &in, string &out, const string &icode,
|
|||||||
+ " -> " + ocode;
|
+ " -> " + ocode;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
icopen = false;
|
||||||
ret = true;
|
ret = true;
|
||||||
error:
|
error:
|
||||||
|
if (icopen)
|
||||||
|
iconv_close(ic);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,7 +105,7 @@ int main(int argc, char **argv)
|
|||||||
perror("Open/create output");
|
perror("Open/create output");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if (write(fd, out.c_str(), out.length()) != out.length()) {
|
if (write(fd, out.c_str(), out.length()) != (int)out.length()) {
|
||||||
perror("write");
|
perror("write");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user