warnings cleanup
This commit is contained in:
parent
91df3aef73
commit
a43ebc3716
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
@ -10,10 +10,6 @@ static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.1 2004-12-14 17:50:28 dockes Ex
|
||||
|
||||
using namespace std;
|
||||
|
||||
ConfTree *getConfig()
|
||||
{
|
||||
}
|
||||
|
||||
RclConfig::RclConfig()
|
||||
: m_ok(false), conf(0), mimemap(0), mimeconf(0)
|
||||
{
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _RCLCONFIG_H_INCLUDED_
|
||||
#define _RCLCONFIG_H_INCLUDED_
|
||||
/* @(#$Id: rclconfig.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rclconfig.h,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include "conftree.h"
|
||||
|
||||
@ -9,18 +9,30 @@ class RclConfig {
|
||||
string confdir; // Directory where the files are stored
|
||||
ConfTree *conf; // Parsed main configuration
|
||||
string keydir; // Current directory used for parameter fetches.
|
||||
string defcharset; // These are stored locally to avoid a config lookup
|
||||
string deflang; // each time.
|
||||
// Note: this will have to change if/when we support per directory maps
|
||||
ConfTree *mimemap;
|
||||
ConfTree *mimeconf;
|
||||
public:
|
||||
// Let some parameters be accessed directly
|
||||
string defcharset; // These are stored locally to avoid a config lookup
|
||||
string deflang; // each time.
|
||||
bool guesscharset;
|
||||
|
||||
RclConfig();
|
||||
~RclConfig() {delete conf;delete mimemap;delete mimeconf;}
|
||||
bool ok() {return m_ok;}
|
||||
ConfTree *getConfig() {return m_ok ? conf : 0;}
|
||||
ConfTree *getMimeMap() {return m_ok ? mimemap : 0;}
|
||||
ConfTree *getMimeConf() {return m_ok ? mimeconf : 0;}
|
||||
void setKeyDir(const string &dir)
|
||||
{
|
||||
keydir = dir;
|
||||
conf->get("defaultcharset", defcharset, keydir);
|
||||
conf->get("defaultlanguage", deflang, keydir);
|
||||
string str;
|
||||
conf->get("guesscharset", deflang, str);
|
||||
guesscharset = ConfTree::stringToBool(str);
|
||||
}
|
||||
bool getConfParam(const string &name, string &value)
|
||||
{
|
||||
if (conf == 0)
|
||||
@ -33,12 +45,6 @@ class RclConfig {
|
||||
const string &getDefLang() {
|
||||
return deflang;
|
||||
}
|
||||
void setKeyDir(const string &dir)
|
||||
{
|
||||
keydir = dir;
|
||||
conf->get("defaultcharset", defcharset, keydir);
|
||||
conf->get("defaultlanguage", deflang, keydir);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.2 2004-12-14 17:49:11 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.3 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_TEXTSPLIT
|
||||
|
||||
@ -35,24 +35,24 @@ static void setcharclasses()
|
||||
static int init = 0;
|
||||
if (init)
|
||||
return;
|
||||
int i;
|
||||
unsigned int i;
|
||||
memset(charclasses, LETTER, sizeof(charclasses));
|
||||
|
||||
char digits[] = "0123456789";
|
||||
for (i = 0; i < sizeof(digits); i++)
|
||||
charclasses[digits[i]] = DIGIT;
|
||||
charclasses[int(digits[i])] = DIGIT;
|
||||
|
||||
char blankspace[] = "\t\v\f ";
|
||||
for (i = 0; i < sizeof(blankspace); i++)
|
||||
charclasses[blankspace[i]] = SPACE;
|
||||
charclasses[int(blankspace[i])] = SPACE;
|
||||
|
||||
char seps[] = "!\"$%&()/<=>[\\]^{|}~:;,*";
|
||||
for (i = 0; i < sizeof(seps); i++)
|
||||
charclasses[seps[i]] = SPACE;
|
||||
charclasses[int(seps[i])] = SPACE;
|
||||
|
||||
char special[] = ".@+-,#'\n\r";
|
||||
for (i = 0; i < sizeof(special); i++)
|
||||
charclasses[special[i]] = special[i];
|
||||
charclasses[int(special[i])] = special[i];
|
||||
|
||||
init = 1;
|
||||
}
|
||||
@ -95,7 +95,7 @@ void TextSplit::text_to_words(const string &in)
|
||||
int wordpos = 0;
|
||||
int spanpos = 0;
|
||||
|
||||
for (int i = 0; i < in.length(); i++) {
|
||||
for (unsigned int i = 0; i < in.length(); i++) {
|
||||
int c = in[i];
|
||||
int cc = charclasses[c];
|
||||
switch (cc) {
|
||||
@ -114,7 +114,7 @@ void TextSplit::text_to_words(const string &in)
|
||||
case '-':
|
||||
case '+':
|
||||
if (word.length() == 0) {
|
||||
if (i < in.length() || charclasses[in[i+1]] == DIGIT) {
|
||||
if (i < in.length() || charclasses[int(in[i+1])] == DIGIT) {
|
||||
number = true;
|
||||
word += c;
|
||||
span += c;
|
||||
@ -155,7 +155,7 @@ void TextSplit::text_to_words(const string &in)
|
||||
case '#':
|
||||
// Keep it only at end of word...
|
||||
if (word.length() > 0 &&
|
||||
(i == in.length() -1 || charclasses[in[i+1]] == SPACE)) {
|
||||
(i == in.length() -1 || charclasses[int(in[i+1])] == SPACE)) {
|
||||
word += c;
|
||||
span += c;
|
||||
}
|
||||
|
||||
@ -1,18 +1,41 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: csguess.cpp,v 1.1 2004-12-15 08:21:05 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: csguess.cpp,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
// This code was converted from estraier / qdbm / myconf.c
|
||||
|
||||
#ifndef TEST_CSGUESS
|
||||
|
||||
// This code was converted from estraier / qdbm / myconf.c:
|
||||
|
||||
/**************************************************************************
|
||||
* Copyright (C) 2000-2004 Mikio Hirabayashi
|
||||
*
|
||||
* This file is part of QDBM, Quick Database Manager.
|
||||
*
|
||||
* QDBM is free software; you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License or any later
|
||||
* version. QDBM is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
* License for more details. You should have received a copy of the GNU
|
||||
* Lesser General Public License along with QDBM; if not, write to the Free
|
||||
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
* 02111-1307 USA.
|
||||
* *********************************************************/
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include <iconv.h>
|
||||
#include "csguess.h"
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
using std::string;
|
||||
|
||||
#include <iconv.h>
|
||||
|
||||
#include "csguess.h"
|
||||
|
||||
// The values from estraier were 32768, 256, 0.001
|
||||
const int ICONVCHECKSIZ = 4000;
|
||||
const int ICONVMISSMAX = 10;
|
||||
const int ICONVCHECKSIZ = 32768;
|
||||
const int ICONVMISSMAX = 256;
|
||||
const double ICONVALLWRAT = 0.001;
|
||||
|
||||
// Try to transcode and count errors (for charset guessing)
|
||||
@ -20,17 +43,18 @@ static int transcodeErrCnt(const char *ptr, int size,
|
||||
const char *icode, const char *ocode)
|
||||
{
|
||||
iconv_t ic;
|
||||
char obuf[ICONVCHECKSIZ], *wp, *rp;
|
||||
char obuf[2*ICONVCHECKSIZ], *wp, *rp;
|
||||
size_t isiz, osiz;
|
||||
int miss;
|
||||
isiz = size;
|
||||
if((ic = iconv_open(ocode, icode)) == (iconv_t)-1) return ICONVMISSMAX;
|
||||
if((ic = iconv_open(ocode, icode)) == (iconv_t)-1)
|
||||
return size;
|
||||
miss = 0;
|
||||
rp = (char *)ptr;
|
||||
while(isiz > 0){
|
||||
osiz = ICONVCHECKSIZ;
|
||||
osiz = 2*ICONVCHECKSIZ;
|
||||
wp = obuf;
|
||||
if(iconv(ic, (const char **)&rp, &isiz, &wp, &osiz) == -1){
|
||||
if(iconv(ic, (const char **)&rp, &isiz, &wp, &osiz) == (size_t)-1){
|
||||
if(errno == EILSEQ || errno == EINVAL){
|
||||
rp++;
|
||||
isiz--;
|
||||
@ -38,17 +62,20 @@ static int transcodeErrCnt(const char *ptr, int size,
|
||||
if(miss >= ICONVMISSMAX)
|
||||
break;
|
||||
} else {
|
||||
miss = size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(iconv_close(ic) == -1)
|
||||
return ICONVMISSMAX;
|
||||
return size;
|
||||
return miss;
|
||||
}
|
||||
|
||||
|
||||
string csguess(const string &in)
|
||||
// Try to guess character encoding. This could be optimized quite a
|
||||
// lot by avoiding the multiple passes on the document, to be done
|
||||
// after usefulness is demonstrated...
|
||||
string csguess(const string &in, const string &dflt)
|
||||
{
|
||||
const char *hypo;
|
||||
int i, miss;
|
||||
@ -74,9 +101,10 @@ string csguess(const string &in)
|
||||
return "UTF-16LE";
|
||||
}
|
||||
|
||||
// Look for iso-2022 specific escape sequences. As iso-2022 begins
|
||||
// in ascii, these succeed fast for a japanese text, but are quite
|
||||
// expensive for any other
|
||||
// Look for iso-2022 (rfc1468) specific escape sequences. As
|
||||
// iso-2022 begins in ascii, and typically soon escapes, these
|
||||
// succeed fast for a japanese text, but are quite expensive for
|
||||
// any other
|
||||
for (i = 0; i < size - 3; i++) {
|
||||
if (text[i] == 0x1b) {
|
||||
i++;
|
||||
@ -89,7 +117,7 @@ string csguess(const string &in)
|
||||
|
||||
// Try conversions from ascii and utf-8. These are unlikely to succeed
|
||||
// by mistake.
|
||||
if (transcodeErrCnt(text, size, "US-ASCII", "UTF-16BE") < 1)
|
||||
if (transcodeErrCnt(text, size, "US-ASCII", "UTF-16BE") < 1)
|
||||
return "US-ASCII";
|
||||
|
||||
if (transcodeErrCnt(text, size, "UTF-8", "UTF-16BE") < 1)
|
||||
@ -131,5 +159,35 @@ string csguess(const string &in)
|
||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||
hypo = "CP932";
|
||||
|
||||
return hypo ? hypo : "ISO-8859-1";
|
||||
return hypo ? hypo : dflt;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include "readfile.h"
|
||||
#include "csguess.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
cerr << "Usage: trcsguess <filename> <default>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
const string filename = argv[1];
|
||||
const string dflt = argv[2];
|
||||
string text;
|
||||
if (!file_to_string(filename, text)) {
|
||||
cerr << "Couldnt read file, errno " << errno << endl;
|
||||
exit(1);
|
||||
}
|
||||
cout << csguess(text, dflt) << endl;
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1,12 +1,13 @@
|
||||
#ifndef _CSGUESS_H_INCLUDED_
|
||||
#define _CSGUESS_H_INCLUDED_
|
||||
/* @(#$Id: csguess.h,v 1.1 2004-12-15 08:21:05 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: csguess.h,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
// Try to guess the character set. This might guess unicode encodings, and
|
||||
// some asian charsets, but has no chance, for example, of discriminating
|
||||
// betweeen the different iso8859-xx charsets.
|
||||
extern std::string csguess(const std::string &in);
|
||||
extern std::string csguess(const std::string &in, const std::string &dflt);
|
||||
|
||||
#endif /* _CSGUESS_H_INCLUDED_ */
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
#ifndef _INDEXER_H_INCLUDED_
|
||||
#define _INDEXER_H_INCLUDED_
|
||||
/* @(#$Id: indexer.h,v 1.1 2004-12-14 17:53:51 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: indexer.h,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include "rclconfig.h"
|
||||
|
||||
/* Definition for document interner functions */
|
||||
typedef Rcl::Doc* (*MimeHandlerFunc)(RclConfig *, const string &,
|
||||
const string &);
|
||||
typedef bool (*MimeHandlerFunc)(RclConfig *, const string &,
|
||||
const string &, Rcl::Doc&);
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
@ -18,7 +18,7 @@ string mimetype(const string &filename, ConfTree *mtypes)
|
||||
string::size_type dot = filename.find_last_of(".");
|
||||
if (dot != string::npos) {
|
||||
string suff = filename.substr(dot);
|
||||
for (int i = 0; i < suff.length(); i++)
|
||||
for (unsigned int i = 0; i < suff.length(); i++)
|
||||
suff[i] = tolower(suff[i]);
|
||||
|
||||
string mtype;
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <strings.h>
|
||||
|
||||
#include <iostream>
|
||||
@ -14,25 +16,50 @@ static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes
|
||||
#include "rcldb.h"
|
||||
#include "readfile.h"
|
||||
#include "indexer.h"
|
||||
#include "csguess.h"
|
||||
#include "transcode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype)
|
||||
bool textPlainToDoc(RclConfig *conf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout)
|
||||
{
|
||||
return 0;
|
||||
string otext;
|
||||
if (!file_to_string(fn, otext))
|
||||
return false;
|
||||
|
||||
// Try to guess charset, then convert to utf-8, and fill document fields
|
||||
string charset;
|
||||
if (conf->guesscharset) {
|
||||
charset = csguess(otext, conf->defcharset);
|
||||
} else
|
||||
charset = conf->defcharset;
|
||||
string utf8;
|
||||
if (transcode(otext, charset, utf8, "UTF-8"))
|
||||
return 0;
|
||||
|
||||
Rcl::Doc out;
|
||||
out.origcharset = charset;
|
||||
out.text = utf8;
|
||||
docout = out;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Map of mime types to internal interner functions. This could just as well
|
||||
// be an if else if suite inside getMimeHandler(), but this is prettier ?
|
||||
static map<string, MimeHandlerFunc> ihandlers;
|
||||
// Static object to get the map to be initialized at program start.
|
||||
class IHandler_Init {
|
||||
public:
|
||||
IHandler_Init() {
|
||||
ihandlers["text/plain"] = textPlainToDoc;
|
||||
// Add new associations here when needed
|
||||
}
|
||||
};
|
||||
static IHandler_Init ihandleriniter;
|
||||
|
||||
|
||||
/**
|
||||
* Return handler function for given mime type
|
||||
*/
|
||||
@ -75,6 +102,9 @@ MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bunch holder for data used while indexing a directory tree
|
||||
*/
|
||||
class DirIndexer {
|
||||
FsTreeWalker walker;
|
||||
RclConfig *config;
|
||||
@ -95,23 +125,23 @@ class DirIndexer {
|
||||
|
||||
void DirIndexer::index()
|
||||
{
|
||||
#if 0
|
||||
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
|
||||
cerr << "Error opening database in " << dbdir << " for " <<
|
||||
topdir << endl;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
walker.walk(topdir, indexfile, this);
|
||||
#if 0
|
||||
if (!db.close()) {
|
||||
cerr << "Error closing database in " << dbdir << " for " <<
|
||||
topdir << endl;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* This function gets called for every file and directory found by the
|
||||
* tree walker. Adjust parameters and index files if/when needed.
|
||||
*/
|
||||
FsTreeWalker::Status
|
||||
indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||
FsTreeWalker::CbFlag flg)
|
||||
@ -144,26 +174,25 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
// Check if file has already been indexed, and has changed since
|
||||
// - Make path term,
|
||||
// - query db: postlist_begin->docid
|
||||
// - fetch doc (get_document(docid)
|
||||
// - check date field, maybe skip
|
||||
if (!me->db.needUpdate(fn, stp))
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
// Turn file into a document. The document has fields for title, body
|
||||
// etc., all text converted to utf8
|
||||
Rcl::Doc *doc = fun(me->config, fn, mime);
|
||||
Rcl::Doc doc;
|
||||
if (!fun(me->config, fn, mime, doc))
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
#if 0
|
||||
// Set up xapian document, add postings and misc fields,
|
||||
// add to or update database.
|
||||
dbadd(doc);
|
||||
#endif
|
||||
if (!me->db.add(fn, doc))
|
||||
return FsTreeWalker::FtwError;
|
||||
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
RclConfig *config = new RclConfig;
|
||||
@ -180,7 +209,7 @@ int main(int argc, const char **argv)
|
||||
}
|
||||
vector<string> tdl;
|
||||
if (ConfTree::stringToStrings(topdirs, tdl)) {
|
||||
for (int i = 0; i < tdl.size(); i++) {
|
||||
for (unsigned int i = 0; i < tdl.size(); i++) {
|
||||
string topdir = tdl[i];
|
||||
cout << topdir << endl;
|
||||
string dbdir;
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -27,7 +29,7 @@ class Native {
|
||||
|
||||
Rcl::Db::Db()
|
||||
{
|
||||
pdata = new Native;
|
||||
// pdata = new Native;
|
||||
}
|
||||
|
||||
Rcl::Db::~Db()
|
||||
@ -56,6 +58,7 @@ Rcl::Db::~Db()
|
||||
|
||||
bool Rcl::Db::open(const string& dir, OpenMode mode)
|
||||
{
|
||||
return true;
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
@ -89,8 +92,10 @@ bool Rcl::Db::open(const string& dir, OpenMode mode)
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Rcl::Db::close()
|
||||
{
|
||||
return true;
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
@ -119,3 +124,21 @@ bool Rcl::Db::close()
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
{
|
||||
return true;
|
||||
// TOBEDONE: Check if file has already been indexed, and has changed since
|
||||
// - Make path term,
|
||||
// - query db: postlist_begin->docid
|
||||
// - fetch doc (get_document(docid)
|
||||
// - check date field, maybe skip
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,11 +1,25 @@
|
||||
#ifndef _DB_H_INCLUDED_
|
||||
#define _DB_H_INCLUDED_
|
||||
/* @(#$Id: rcldb.h,v 1.1 2004-12-14 17:50:28 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: rcldb.h,v 1.2 2004-12-15 15:00:36 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
struct stat;
|
||||
|
||||
namespace Rcl {
|
||||
|
||||
/**
|
||||
* Holder for document attributes and data
|
||||
*/
|
||||
class Doc {
|
||||
public:
|
||||
string origcharset;
|
||||
string title;
|
||||
string abstract;
|
||||
string keywords;
|
||||
string text;
|
||||
};
|
||||
|
||||
/**
|
||||
* Wrapper class for the native database.
|
||||
*/
|
||||
@ -17,15 +31,10 @@ class Db {
|
||||
enum OpenMode {DbRO, DbUpd, DbTrunc};
|
||||
bool open(const std::string &dbdir, OpenMode mode);
|
||||
bool close();
|
||||
bool add(const string &filename, const Doc &doc);
|
||||
bool needUpdate(const string &filename, const struct stat *stp);
|
||||
};
|
||||
|
||||
class Doc {
|
||||
public:
|
||||
string title;
|
||||
string abstract;
|
||||
string keywords;
|
||||
string text;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: transcode.cpp,v 1.1 2004-12-15 09:43:48 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: transcode.cpp,v 1.2 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#ifndef TEST_TRANSCODE
|
||||
@ -22,6 +22,7 @@ bool transcode(const string &in, string &out, const string &icode,
|
||||
bool ret = false;
|
||||
const int OBSIZ = 8192;
|
||||
char obuf[OBSIZ], *op;
|
||||
bool icopen = false;
|
||||
|
||||
out.erase();
|
||||
size_t isiz = in.length();
|
||||
@ -33,12 +34,13 @@ bool transcode(const string &in, string &out, const string &icode,
|
||||
+ " -> " + ocode;
|
||||
goto error;
|
||||
}
|
||||
|
||||
icopen = true;
|
||||
|
||||
while (isiz > 0) {
|
||||
size_t osiz;
|
||||
op = obuf;
|
||||
osiz = OBSIZ;
|
||||
if(iconv(ic, &ip, &isiz, &op, &osiz) == -1 && errno != E2BIG){
|
||||
if(iconv(ic, &ip, &isiz, &op, &osiz) == (size_t)-1 && errno != E2BIG){
|
||||
out.erase();
|
||||
out = string("iconv failed for ") + icode + " -> " + ocode +
|
||||
" : " + strerror(errno);
|
||||
@ -53,8 +55,11 @@ bool transcode(const string &in, string &out, const string &icode,
|
||||
+ " -> " + ocode;
|
||||
goto error;
|
||||
}
|
||||
icopen = false;
|
||||
ret = true;
|
||||
error:
|
||||
if (icopen)
|
||||
iconv_close(ic);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -100,7 +105,7 @@ int main(int argc, char **argv)
|
||||
perror("Open/create output");
|
||||
exit(1);
|
||||
}
|
||||
if (write(fd, out.c_str(), out.length()) != out.length()) {
|
||||
if (write(fd, out.c_str(), out.length()) != (int)out.length()) {
|
||||
perror("write");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user