*** empty log message ***
This commit is contained in:
parent
ab473faa8c
commit
3c66f8e964
81
src/common/unacpp.cpp
Normal file
81
src/common/unacpp.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: unacpp.cpp,v 1.1 2004-12-17 15:36:13 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#ifndef TEST_UNACPP
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
using std::string;
|
||||
|
||||
#include "unacpp.h"
|
||||
#include "unac.h"
|
||||
|
||||
|
||||
bool unac_cpp(const std::string &in, std::string &out, const char *encoding)
|
||||
{
|
||||
char *cout = 0;
|
||||
size_t out_len;
|
||||
|
||||
if (unac_string(encoding, in.c_str(), in.length(), &cout, &out_len) < 0) {
|
||||
char cerrno[20];
|
||||
sprintf(cerrno, "%d", errno);
|
||||
out = string("unac_string failed, errno : ") + cerrno;
|
||||
return false;
|
||||
}
|
||||
out.assign(cout, out_len);
|
||||
return true;
|
||||
}
|
||||
|
||||
#else // not testing
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include "unacpp.h"
|
||||
#include "readfile.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 4) {
|
||||
cerr << "Usage: unacpp <encoding> <infile> <outfile>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
const char *encoding = argv[1];
|
||||
string ifn = argv[2];
|
||||
const char *ofn = argv[3];
|
||||
|
||||
string odata;
|
||||
if (!file_to_string(ifn, odata)) {
|
||||
cerr << "file_to_string: " << odata << endl;
|
||||
exit(1);
|
||||
}
|
||||
string ndata;
|
||||
if (!unac_cpp(odata, ndata, encoding)) {
|
||||
cerr << "unac: " << ndata << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
|
||||
if (fd < 0) {
|
||||
cerr << "Open/Create " << ofn << " failed: " << strerror(errno)
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
if (write(fd, ndata.c_str(), ndata.length()) != (int)ndata.length()) {
|
||||
cerr << "Write(2) failed: " << strerror(errno) << endl;
|
||||
exit(1);
|
||||
}
|
||||
close(fd);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
11
src/common/unacpp.h
Normal file
11
src/common/unacpp.h
Normal file
@ -0,0 +1,11 @@
|
||||
#ifndef _UNACPP_H_INCLUDED_
|
||||
#define _UNACPP_H_INCLUDED_
|
||||
/* @(#$Id: unacpp.h,v 1.1 2004-12-17 15:36:13 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
// A small wrapper for unac.c
|
||||
extern bool unac_cpp(const std::string &in, std::string &out,
|
||||
const char *encoding = "UTF-8");
|
||||
|
||||
#endif /* _UNACPP_H_INCLUDED_ */
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.3 2004-12-17 13:01:01 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.4 2004-12-17 15:36:13 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <sys/stat.h>
|
||||
@ -144,15 +144,17 @@ class wsData {
|
||||
bool splitCb(void *cdata, const std::string &term, int pos)
|
||||
{
|
||||
wsData *data = (wsData*)cdata;
|
||||
cerr << "splitCb: term " << term << endl;
|
||||
|
||||
// cerr << "splitCb: term " << term << endl;
|
||||
//string printable;
|
||||
//transcode(term, printable, "UTF-8", "ISO8859-1");
|
||||
//cerr << "Adding " << printable << endl;
|
||||
|
||||
try {
|
||||
// 1 is the value for wdfinc in index_text when called from omindex
|
||||
// TOBEDONE: check what this is used for
|
||||
data->curpos = pos;
|
||||
data->doc.add_posting(term, data->basepos + data->curpos, 1);
|
||||
string printable;
|
||||
transcode(term, printable, "UTF-8", "ISO8859-1");
|
||||
cerr << "Adding " << printable << endl;
|
||||
} catch (...) {
|
||||
cerr << "Error occurred during add_posting" << endl;
|
||||
return false;
|
||||
@ -202,41 +204,68 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &doc)
|
||||
splitter.text_to_words(doc.abstract);
|
||||
|
||||
newdocument.add_term("T" + doc.mimetype);
|
||||
newdocument.add_term("P" + fn);
|
||||
string pathterm = "P" + fn;
|
||||
newdocument.add_term(pathterm);
|
||||
|
||||
#if 0
|
||||
if (dupes == DUPE_replace) {
|
||||
if (1 /*dupes == DUPE_replace*/) {
|
||||
// If this document has already been indexed, update the existing
|
||||
// entry.
|
||||
try {
|
||||
Xapian::docid did = db.replace_document(urlterm, newdocument);
|
||||
Xapian::docid did = ndb->wdb.replace_document(pathterm,
|
||||
newdocument);
|
||||
#if 0
|
||||
if (did < updated.size()) {
|
||||
updated[did] = true;
|
||||
cout << "updated." << endl;
|
||||
//cout << "updated." << endl;
|
||||
} else {
|
||||
cout << "added." << endl;
|
||||
//cout << "added." << endl;
|
||||
}
|
||||
#endif
|
||||
} catch (...) {
|
||||
// FIXME: is this ever actually needed?
|
||||
db.add_document(newdocument);
|
||||
cout << "added (failed re-seek for duplicate)." << endl;
|
||||
ndb->wdb.add_document(newdocument);
|
||||
//cout << "added (failed re-seek for duplicate)." << endl;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
} else {
|
||||
try {
|
||||
ndb->wdb.add_document(newdocument);
|
||||
// cout << "added." << endl;
|
||||
} catch (...) {
|
||||
cerr << "Got exception while adding doc" << endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp)
|
||||
{
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
|
||||
string pathterm = "P" + filename;
|
||||
if (!ndb->wdb.term_exists(pathterm))
|
||||
return true;
|
||||
Xapian::PostingIterator doc;
|
||||
try {
|
||||
Xapian::PostingIterator did = ndb->wdb.postlist_begin(pathterm);
|
||||
if (did == ndb->wdb.postlist_end(pathterm))
|
||||
return true;
|
||||
Xapian::Document doc = ndb->wdb.get_document(*did);
|
||||
string data = doc.get_data();
|
||||
//cout << "DOCUMENT EXISTS " << data << endl;
|
||||
const char *cp = strstr(data.c_str(), "mtime=");
|
||||
cp += 6;
|
||||
long mtime = atol(cp);
|
||||
if (mtime >= stp->st_mtime) {
|
||||
// cerr << "DOCUMENT UP TO DATE" << endl;
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
// TOBEDONE: Check if file has already been indexed, and has changed since
|
||||
// - Make path term,
|
||||
// - query db: postlist_begin->docid
|
||||
// - fetch doc (get_document(docid)
|
||||
// - check date field, maybe skip
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user