replaced path|ipath with unique doc id in rcldb i/f. Still depends on udi structure for parent/child
This commit is contained in:
parent
1dd66b5b1d
commit
3109a33f4a
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.66 2008-07-28 08:42:52 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.67 2008-07-28 12:24:15 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -46,6 +46,7 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.66 2008-07-28 08:42:52 dockes Exp
|
|||||||
#include "internfile.h"
|
#include "internfile.h"
|
||||||
#include "smallut.h"
|
#include "smallut.h"
|
||||||
#include "wipedir.h"
|
#include "wipedir.h"
|
||||||
|
#include "fileudi.h"
|
||||||
|
|
||||||
#ifdef RCL_USE_ASPELL
|
#ifdef RCL_USE_ASPELL
|
||||||
#include "rclaspell.h"
|
#include "rclaspell.h"
|
||||||
@ -335,7 +336,9 @@ bool DbIndexer::purgeFiles(const list<string> &filenames)
|
|||||||
|
|
||||||
list<string>::const_iterator it;
|
list<string>::const_iterator it;
|
||||||
for (it = filenames.begin(); it != filenames.end(); it++) {
|
for (it = filenames.begin(); it != filenames.end(); it++) {
|
||||||
if (!m_db.purgeFile(*it)) {
|
string udi;
|
||||||
|
make_udi(*it, "", udi);
|
||||||
|
if (!m_db.purgeFile(udi)) {
|
||||||
LOGERR(("DbIndexer::purgeFiles: Database error\n"));
|
LOGERR(("DbIndexer::purgeFiles: Database error\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -390,7 +393,9 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// Document signature
|
// Document signature
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
||||||
string sig = cbuf;
|
string sig = cbuf;
|
||||||
if (!m_db.needUpdate(fn, sig)) {
|
string udi;
|
||||||
|
make_udi(fn, "", udi);
|
||||||
|
if (!m_db.needUpdate(udi, sig)) {
|
||||||
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
|
LOGDEB(("processone: up to date: %s\n", fn.c_str()));
|
||||||
if (m_updater) {
|
if (m_updater) {
|
||||||
m_updater->status.fn = fn;
|
m_updater->status.fn = fn;
|
||||||
@ -463,6 +468,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
hadNullIpath = true;
|
hadNullIpath = true;
|
||||||
else
|
else
|
||||||
doc.ipath = ipath;
|
doc.ipath = ipath;
|
||||||
|
doc.url = string("file://") + fn;
|
||||||
|
|
||||||
// Note that the filter may have its own idea of the file name
|
// Note that the filter may have its own idea of the file name
|
||||||
// (ie: mail attachment)
|
// (ie: mail attachment)
|
||||||
@ -479,7 +485,9 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
doc.sig = cbuf;
|
doc.sig = cbuf;
|
||||||
|
|
||||||
// Add document to database
|
// Add document to database
|
||||||
if (!m_db.add(fn, doc))
|
string udi;
|
||||||
|
make_udi(fn, ipath, udi);
|
||||||
|
if (!m_db.add(udi, doc))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
|
|
||||||
// Tell what we are doing and check for interrupt request
|
// Tell what we are doing and check for interrupt request
|
||||||
@ -504,7 +512,7 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
fileDoc.fmtime = ascdate;
|
fileDoc.fmtime = ascdate;
|
||||||
fileDoc.utf8fn = utf8fn;
|
fileDoc.utf8fn = utf8fn;
|
||||||
fileDoc.mimetype = interner.getMimetype();
|
fileDoc.mimetype = interner.getMimetype();
|
||||||
|
fileDoc.url = string("file://") + fn;
|
||||||
|
|
||||||
char cbuf[100];
|
char cbuf[100];
|
||||||
sprintf(cbuf, "%ld", (long)stp->st_size);
|
sprintf(cbuf, "%ld", (long)stp->st_size);
|
||||||
@ -512,7 +520,9 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
|||||||
// Document signature for up to date checks.
|
// Document signature for up to date checks.
|
||||||
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
sprintf(cbuf, "%ld%ld", (long)stp->st_size, (long)stp->st_mtime);
|
||||||
fileDoc.sig = cbuf;
|
fileDoc.sig = cbuf;
|
||||||
if (!m_db.add(fn, fileDoc))
|
string udi;
|
||||||
|
make_udi(fn, "", udi);
|
||||||
|
if (!m_db.add(udi, fileDoc))
|
||||||
return FsTreeWalker::FtwError;
|
return FsTreeWalker::FtwError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -8,8 +8,8 @@ LIBS = librcl.a
|
|||||||
|
|
||||||
all: $(LIBS)
|
all: $(LIBS)
|
||||||
|
|
||||||
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
OBJS = rclaspell.o rclconfig.o rclinit.o textsplit.o unacpp.o csguess.o indexer.o mimetype.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_html.o mh_mail.o mh_mbox.o mh_text.o docseq.o docseqdb.o docseqhist.o history.o recollq.o sortseq.o wasastringtoquery.o wasatorcl.o pathhash.o rcldb.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o conftree.o copyfile.o debuglog.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o pathut.o readfile.o smallut.o transcode.o wipedir.o x11mon.o
|
||||||
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
DEPS = rclaspell.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp csguess.dep.stamp indexer.dep.stamp mimetype.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp history.dep.stamp recollq.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp pathhash.dep.stamp rcldb.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp pathut.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp
|
||||||
|
|
||||||
librcl.a : $(DEPS) $(OBJS) unac.o
|
librcl.a : $(DEPS) $(OBJS) unac.o
|
||||||
ar ru librcl.a $(OBJS) unac.o
|
ar ru librcl.a $(OBJS) unac.o
|
||||||
@ -93,6 +93,8 @@ fstreewalk.o : ../utils/fstreewalk.cpp
|
|||||||
$(CXX) $(ALL_CXXFLAGS) -c ../utils/fstreewalk.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../utils/fstreewalk.cpp
|
||||||
idfile.o : ../utils/idfile.cpp
|
idfile.o : ../utils/idfile.cpp
|
||||||
$(CXX) $(ALL_CXXFLAGS) -c ../utils/idfile.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../utils/idfile.cpp
|
||||||
|
fileudi.o : ../utils/fileudi.cpp
|
||||||
|
$(CXX) $(ALL_CXXFLAGS) -c ../utils/fileudi.cpp
|
||||||
md5.o : ../utils/md5.cpp
|
md5.o : ../utils/md5.cpp
|
||||||
$(CXX) $(ALL_CXXFLAGS) -c ../utils/md5.cpp
|
$(CXX) $(ALL_CXXFLAGS) -c ../utils/md5.cpp
|
||||||
mimeparse.o : ../utils/mimeparse.cpp
|
mimeparse.o : ../utils/mimeparse.cpp
|
||||||
@ -229,6 +231,9 @@ fstreewalk.dep.stamp : ../utils/fstreewalk.cpp
|
|||||||
idfile.dep.stamp : ../utils/idfile.cpp
|
idfile.dep.stamp : ../utils/idfile.cpp
|
||||||
$(CXX) -M $(ALL_CXXFLAGS) ../utils/idfile.cpp > idfile.dep
|
$(CXX) -M $(ALL_CXXFLAGS) ../utils/idfile.cpp > idfile.dep
|
||||||
touch idfile.dep.stamp
|
touch idfile.dep.stamp
|
||||||
|
fileudi.dep.stamp : ../utils/fileudi.cpp
|
||||||
|
$(CXX) -M $(ALL_CXXFLAGS) ../utils/fileudi.cpp > fileudi.dep
|
||||||
|
touch fileudi.dep.stamp
|
||||||
md5.dep.stamp : ../utils/md5.cpp
|
md5.dep.stamp : ../utils/md5.cpp
|
||||||
$(CXX) -M $(ALL_CXXFLAGS) ../utils/md5.cpp > md5.dep
|
$(CXX) -M $(ALL_CXXFLAGS) ../utils/md5.cpp > md5.dep
|
||||||
touch md5.dep.stamp
|
touch md5.dep.stamp
|
||||||
@ -291,6 +296,7 @@ include debuglog.dep
|
|||||||
include execmd.dep
|
include execmd.dep
|
||||||
include fstreewalk.dep
|
include fstreewalk.dep
|
||||||
include idfile.dep
|
include idfile.dep
|
||||||
|
include fileudi.dep
|
||||||
include md5.dep
|
include md5.dep
|
||||||
include mimeparse.dep
|
include mimeparse.dep
|
||||||
include pathut.dep
|
include pathut.dep
|
||||||
|
|||||||
@ -42,6 +42,7 @@ ${depth}/utils/debuglog.cpp \
|
|||||||
${depth}/utils/execmd.cpp \
|
${depth}/utils/execmd.cpp \
|
||||||
${depth}/utils/fstreewalk.cpp \
|
${depth}/utils/fstreewalk.cpp \
|
||||||
${depth}/utils/idfile.cpp \
|
${depth}/utils/idfile.cpp \
|
||||||
|
${depth}/utils/fileudi.cpp \
|
||||||
${depth}/utils/md5.cpp \
|
${depth}/utils/md5.cpp \
|
||||||
${depth}/utils/mimeparse.cpp \
|
${depth}/utils/mimeparse.cpp \
|
||||||
${depth}/utils/pathut.cpp \
|
${depth}/utils/pathut.cpp \
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: docseqhist.cpp,v 1.2 2007-12-13 06:58:21 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: docseqhist.cpp,v 1.3 2008-07-28 12:24:15 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -23,6 +23,7 @@ static char rcsid[] = "@(#$Id: docseqhist.cpp,v 1.2 2007-12-13 06:58:21 dockes E
|
|||||||
|
|
||||||
#include "docseqhist.h"
|
#include "docseqhist.h"
|
||||||
#include "rcldb.h"
|
#include "rcldb.h"
|
||||||
|
#include "fileudi.h"
|
||||||
|
|
||||||
bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, int *percent,
|
bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, int *percent,
|
||||||
string *sh)
|
string *sh)
|
||||||
@ -58,7 +59,14 @@ bool DocSequenceHistory::getDoc(int num, Rcl::Doc &doc, int *percent,
|
|||||||
} else
|
} else
|
||||||
sh->erase();
|
sh->erase();
|
||||||
}
|
}
|
||||||
return m_db->getDoc(m_it->fn, m_it->ipath, doc, percent);
|
string udi;
|
||||||
|
make_udi(m_it->fn, m_it->ipath, udi);
|
||||||
|
bool ret = m_db->getDoc(udi, doc, percent);
|
||||||
|
if (!ret) {
|
||||||
|
doc.url = string("file://") + m_it->fn;
|
||||||
|
doc.ipath = m_it->ipath;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DocSequenceHistory::getResCnt()
|
int DocSequenceHistory::getResCnt()
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: pathhash.cpp,v 1.5 2007-12-13 06:58:21 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: pathhash.cpp,v 1.6 2008-07-28 12:24:15 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -30,6 +30,7 @@ using std::string;
|
|||||||
namespace Rcl {
|
namespace Rcl {
|
||||||
#endif /* NO_NAMESPACES */
|
#endif /* NO_NAMESPACES */
|
||||||
|
|
||||||
|
// Debug only
|
||||||
#ifdef PATHHASH_HEX
|
#ifdef PATHHASH_HEX
|
||||||
static void md5hexprint(const unsigned char hash[16], string &out)
|
static void md5hexprint(const unsigned char hash[16], string &out)
|
||||||
{
|
{
|
||||||
@ -69,7 +70,7 @@ void pathHash(const std::string &path, std::string &phash, unsigned int maxlen)
|
|||||||
path.length() - (maxlen - HASHLEN));
|
path.length() - (maxlen - HASHLEN));
|
||||||
MD5Final(chash, &ctx);
|
MD5Final(chash, &ctx);
|
||||||
|
|
||||||
#if 0
|
#ifdef PATHHASH_HEX
|
||||||
string hex;
|
string hex;
|
||||||
md5hexprint(chash, hex);
|
md5hexprint(chash, hex);
|
||||||
printf("hex [%s]\n", hex.c_str());
|
printf("hex [%s]\n", hex.c_str());
|
||||||
@ -83,7 +84,6 @@ void pathHash(const std::string &path, std::string &phash, unsigned int maxlen)
|
|||||||
// don't need as this won't ever be decoded. Resulting length is 22
|
// don't need as this won't ever be decoded. Resulting length is 22
|
||||||
hash.resize(hash.length() - 2);
|
hash.resize(hash.length() - 2);
|
||||||
|
|
||||||
|
|
||||||
// Truncate path and append hash
|
// Truncate path and append hash
|
||||||
phash = path.substr(0, maxlen - HASHLEN) + hash;
|
phash = path.substr(0, maxlen - HASHLEN) + hash;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.135 2008-07-28 08:42:52 dockes Exp $ (C) 2004 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.136 2008-07-28 12:24:15 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -80,32 +80,20 @@ namespace Rcl {
|
|||||||
// found in document)
|
// found in document)
|
||||||
const static string rclSyntAbs("?!#@");
|
const static string rclSyntAbs("?!#@");
|
||||||
|
|
||||||
// Maximum length for path terms stored for each document. We truncate
|
|
||||||
// longer paths and uniquize them by appending a hashed value. This
|
|
||||||
// is done to avoid xapian max term length limitations, not
|
|
||||||
// to gain space (we gain very little even with very short maxlens
|
|
||||||
// like 30) Note that Q terms add the ipath to this, and that the
|
|
||||||
// xapian max key length seems to be around 250.
|
|
||||||
// The value for PATHHASHLEN includes the length of the hash part.
|
|
||||||
#define PATHHASHLEN 150
|
|
||||||
|
|
||||||
// Compute the unique term used to link documents to their file-system source:
|
// Compute the unique term used to link documents to their file-system source:
|
||||||
// Hashed path + possible internal path
|
// Hashed path + possible internal path
|
||||||
static inline string make_uniterm(const string& fn, const string& ipath)
|
static inline string make_uniterm(const string& udi)
|
||||||
{
|
{
|
||||||
string hash;
|
string uniterm("Q");
|
||||||
pathHash(fn, hash, PATHHASHLEN);
|
uniterm.append(udi);
|
||||||
string s("Q");
|
return uniterm;
|
||||||
s.append(hash);
|
|
||||||
s.append("|");
|
|
||||||
s.append(ipath);
|
|
||||||
return s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* See comment in class declaration: return all subdocuments of a
|
/* See comment in class declaration: return all subdocuments of a
|
||||||
* document given by its unique path id */
|
* document given by its unique path id */
|
||||||
bool Db::Native::subDocs(const string &uniterm, vector<Xapian::docid>& docids)
|
bool Db::Native::subDocs(const string &uniterm, vector<Xapian::docid>& docids)
|
||||||
{
|
{
|
||||||
|
LOGDEB2(("subDocs: [%s]\n", uniterm.c_str()));
|
||||||
docids.clear();
|
docids.clear();
|
||||||
|
|
||||||
string ermsg;
|
string ermsg;
|
||||||
@ -116,11 +104,11 @@ bool Db::Native::subDocs(const string &uniterm, vector<Xapian::docid>& docids)
|
|||||||
// Don't return the doc itself:
|
// Don't return the doc itself:
|
||||||
it++;
|
it++;
|
||||||
for (; it != db.allterms_end(); it++) {
|
for (; it != db.allterms_end(); it++) {
|
||||||
LOGDEB2(("Testing [%s]\n", (*it).c_str()));
|
LOGDEB2(("subDocs: testing [%s]\n", (*it).c_str()));
|
||||||
// If current term does not begin with uniterm or has
|
// If current term does not begin with uniterm or has
|
||||||
// another |, not the same file
|
// another |, not the same file
|
||||||
if ((*it).find(uniterm) != 0 ||
|
if ((*it).find(uniterm) != 0 ||
|
||||||
(*it).find_last_of("|") != uniterm.length() - 1)
|
(*it).find_last_of("|") != uniterm.length()-1)
|
||||||
break;
|
break;
|
||||||
docids.push_back(*(db.postlist_begin(*it)));
|
docids.push_back(*(db.postlist_begin(*it)));
|
||||||
}
|
}
|
||||||
@ -812,9 +800,9 @@ static const int MB = 1024 * 1024;
|
|||||||
// the title abstract and body and add special terms for file name,
|
// the title abstract and body and add special terms for file name,
|
||||||
// date, mime type ... , create the document data record (more
|
// date, mime type ... , create the document data record (more
|
||||||
// metadata), and update database
|
// metadata), and update database
|
||||||
bool Db::add(const string &fn, const Doc &idoc)
|
bool Db::add(const string &udi, const Doc &idoc)
|
||||||
{
|
{
|
||||||
LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
|
LOGDEB1(("Db::add: udi %s\n", udi.c_str()));
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
static int first = 1;
|
static int first = 1;
|
||||||
@ -937,7 +925,7 @@ bool Db::add(const string &fn, const Doc &idoc)
|
|||||||
|
|
||||||
// Pathname/ipath unique term: this is used for file existence/uptodate
|
// Pathname/ipath unique term: this is used for file existence/uptodate
|
||||||
// checks, and unique id for the replace_document() call.
|
// checks, and unique id for the replace_document() call.
|
||||||
string uniterm = make_uniterm(fn, doc.ipath);
|
string uniterm = make_uniterm(udi);
|
||||||
newdocument.add_term(uniterm);
|
newdocument.add_term(uniterm);
|
||||||
|
|
||||||
// Dates etc...
|
// Dates etc...
|
||||||
@ -957,7 +945,7 @@ bool Db::add(const string &fn, const Doc &idoc)
|
|||||||
// - sample
|
// - sample
|
||||||
// - caption (title limited to 100 chars)
|
// - caption (title limited to 100 chars)
|
||||||
// - mime type
|
// - mime type
|
||||||
string record = "url=file://" + fn;
|
string record = "url=" + doc.url;
|
||||||
record += "\nmtype=" + doc.mimetype;
|
record += "\nmtype=" + doc.mimetype;
|
||||||
record += "\nfmtime=" + doc.fmtime;
|
record += "\nfmtime=" + doc.fmtime;
|
||||||
if (!doc.dmtime.empty()) {
|
if (!doc.dmtime.empty()) {
|
||||||
@ -992,7 +980,7 @@ bool Db::add(const string &fn, const Doc &idoc)
|
|||||||
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
||||||
newdocument.set_data(record);
|
newdocument.set_data(record);
|
||||||
|
|
||||||
const char *fnc = fn.c_str();
|
const char *fnc = udi.c_str();
|
||||||
string ermsg;
|
string ermsg;
|
||||||
|
|
||||||
// Add db entry or update existing entry:
|
// Add db entry or update existing entry:
|
||||||
@ -1001,11 +989,9 @@ bool Db::add(const string &fn, const Doc &idoc)
|
|||||||
m_ndb->wdb.replace_document(uniterm, newdocument);
|
m_ndb->wdb.replace_document(uniterm, newdocument);
|
||||||
if (did < updated.size()) {
|
if (did < updated.size()) {
|
||||||
updated[did] = true;
|
updated[did] = true;
|
||||||
LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
LOGDEB(("Db::add: docid %d updated [%s]\n", did, fnc));
|
||||||
doc.ipath.c_str()));
|
|
||||||
} else {
|
} else {
|
||||||
LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc,
|
LOGDEB(("Db::add: docid %d added [%s]\n", did, fnc));
|
||||||
doc.ipath.c_str()));
|
|
||||||
}
|
}
|
||||||
} XCATCHERROR(ermsg);
|
} XCATCHERROR(ermsg);
|
||||||
|
|
||||||
@ -1044,13 +1030,13 @@ bool Db::add(const string &fn, const Doc &idoc)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test if given filename has changed since last indexed:
|
// Test if doc given by udi has changed since last indexed (test sigs)
|
||||||
bool Db::needUpdate(const string &filename, const string& sig)
|
bool Db::needUpdate(const string &udi, const string& sig)
|
||||||
{
|
{
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
string uniterm = make_uniterm(filename, string());
|
string uniterm = make_uniterm(udi);
|
||||||
string ermsg;
|
string ermsg;
|
||||||
|
|
||||||
// We look up the document indexed by the uniterm. This is either
|
// We look up the document indexed by the uniterm. This is either
|
||||||
@ -1086,8 +1072,8 @@ bool Db::needUpdate(const string &filename, const string& sig)
|
|||||||
return true;
|
return true;
|
||||||
string osig = data.substr(i1, i2-i1);
|
string osig = data.substr(i1, i2-i1);
|
||||||
#endif
|
#endif
|
||||||
LOGDEB(("Db::needUpdate: oldsig [%s] new [%s]\n",
|
LOGDEB2(("Db::needUpdate: oldsig [%s] new [%s]\n",
|
||||||
osig.c_str(), sig.c_str()));
|
osig.c_str(), sig.c_str()));
|
||||||
// Compare new/old sig
|
// Compare new/old sig
|
||||||
if (sig != osig) {
|
if (sig != osig) {
|
||||||
LOGDEB(("Db::needUpdate:yes: olsig [%s] new [%s]\n",
|
LOGDEB(("Db::needUpdate:yes: olsig [%s] new [%s]\n",
|
||||||
@ -1222,14 +1208,14 @@ bool Db::purge()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Delete document(s) for given filename */
|
/* Delete document(s) for given unique identifier (doc and descendents) */
|
||||||
bool Db::purgeFile(const string &fn)
|
bool Db::purgeFile(const string &udi)
|
||||||
{
|
{
|
||||||
LOGDEB(("Db:purgeFile: [%s]\n", fn.c_str()));
|
LOGDEB(("Db:purgeFile: [%s]\n", udi.c_str()));
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
Xapian::WritableDatabase db = m_ndb->wdb;
|
Xapian::WritableDatabase db = m_ndb->wdb;
|
||||||
string uniterm = make_uniterm(fn, string());
|
string uniterm = make_uniterm(udi);
|
||||||
string ermsg;
|
string ermsg;
|
||||||
try {
|
try {
|
||||||
Xapian::PostingIterator docid = db.postlist_begin(uniterm);
|
Xapian::PostingIterator docid = db.postlist_begin(uniterm);
|
||||||
@ -1528,21 +1514,18 @@ bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve document defined by file name and internal path.
|
// Retrieve document defined by file name and internal path.
|
||||||
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
bool Db::getDoc(const string &udi, Doc &doc, int *pc)
|
||||||
{
|
{
|
||||||
LOGDEB(("Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
|
LOGDEB(("Db:getDoc: [%s]\n", udi.c_str()));
|
||||||
ipath.c_str()));
|
|
||||||
if (m_ndb == 0)
|
if (m_ndb == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Initialize what we can in any case. If this is history, caller
|
// Initialize what we can in any case. If this is history, caller
|
||||||
// will make partial display in case of error
|
// will make partial display in case of error
|
||||||
doc.ipath = ipath;
|
|
||||||
doc.url = string("file://") + fn;
|
|
||||||
if (*pc)
|
if (*pc)
|
||||||
*pc = 100;
|
*pc = 100;
|
||||||
|
|
||||||
string uniterm = make_uniterm(fn, ipath);
|
string uniterm = make_uniterm(udi);
|
||||||
string ermsg;
|
string ermsg;
|
||||||
try {
|
try {
|
||||||
if (!m_ndb->db.term_exists(uniterm)) {
|
if (!m_ndb->db.term_exists(uniterm)) {
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _DB_H_INCLUDED_
|
#ifndef _DB_H_INCLUDED_
|
||||||
#define _DB_H_INCLUDED_
|
#define _DB_H_INCLUDED_
|
||||||
/* @(#$Id: rcldb.h,v 1.57 2008-07-28 08:42:52 dockes Exp $ (C) 2004 J.F.Dockes */
|
/* @(#$Id: rcldb.h,v 1.58 2008-07-28 12:24:15 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -113,7 +113,7 @@ class Db {
|
|||||||
bool add(const string &udi, const Doc &doc);
|
bool add(const string &udi, const Doc &doc);
|
||||||
|
|
||||||
/** Delete document(s) for given UDI, including subdocs */
|
/** Delete document(s) for given UDI, including subdocs */
|
||||||
bool purgeFile(const string &fn);
|
bool purgeFile(const string &udi);
|
||||||
|
|
||||||
/** Remove documents that no longer exist in the file system. This
|
/** Remove documents that no longer exist in the file system. This
|
||||||
* depends on the update map, which is built during
|
* depends on the update map, which is built during
|
||||||
@ -161,7 +161,7 @@ class Db {
|
|||||||
|
|
||||||
/** Get document for given filename and ipath. Used by the 'history'
|
/** Get document for given filename and ipath. Used by the 'history'
|
||||||
* feature (and nothing else?) */
|
* feature (and nothing else?) */
|
||||||
bool getDoc(const string &fn, const string &ipath, Doc &doc, int *percent);
|
bool getDoc(const string &udi, Doc &doc, int *percent);
|
||||||
|
|
||||||
/* The following are mainly for the aspell module */
|
/* The following are mainly for the aspell module */
|
||||||
/** Whole term list walking. */
|
/** Whole term list walking. */
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
*/
|
*/
|
||||||
#ifndef _RCLDOC_H_INCLUDED_
|
#ifndef _RCLDOC_H_INCLUDED_
|
||||||
#define _RCLDOC_H_INCLUDED_
|
#define _RCLDOC_H_INCLUDED_
|
||||||
/* @(#$Id: rcldoc.h,v 1.4 2008-07-28 08:42:52 dockes Exp $ (C) 2006 J.F.Dockes */
|
/* @(#$Id: rcldoc.h,v 1.5 2008-07-28 12:24:15 dockes Exp $ (C) 2006 J.F.Dockes */
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
@ -34,14 +34,21 @@ class Doc {
|
|||||||
public:
|
public:
|
||||||
// These fields potentially go into the document data record
|
// These fields potentially go into the document data record
|
||||||
// We indicate the routine that sets them up during indexing
|
// We indicate the routine that sets them up during indexing
|
||||||
string url; // This is just "file://" + binary filename.
|
|
||||||
// No transcoding: this is used to access files
|
// This is just "file://" + binary filename. No transcoding: this
|
||||||
// Computed from fn by Db::add
|
// is used to access files
|
||||||
string utf8fn; // Transcoded version of the simple file name for
|
// Index: computed from fn by Db::add caller. Query: from doc data.
|
||||||
// SFN-prefixed specific file name indexation
|
string url;
|
||||||
// Set by DbIndexer::processone
|
|
||||||
string ipath; // Internal path for multi-doc files. Ascii
|
// Transcoded version of the simple file name for SFN-prefixed
|
||||||
// Set by DbIndexer::processone
|
// specific file name indexation
|
||||||
|
// Indexx: set by DbIndexer::processone
|
||||||
|
string utf8fn;
|
||||||
|
|
||||||
|
// Internal path for multi-doc files. Ascii
|
||||||
|
// Set by DbIndexer::processone
|
||||||
|
string ipath;
|
||||||
|
|
||||||
string mimetype; // Set by FileInterner::internfile
|
string mimetype; // Set by FileInterner::internfile
|
||||||
string fmtime; // File modification time as decimal ascii unix time
|
string fmtime; // File modification time as decimal ascii unix time
|
||||||
// Set by DbIndexer::processone
|
// Set by DbIndexer::processone
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
depth = ..
|
depth = ..
|
||||||
include $(depth)/mk/sysconf
|
include $(depth)/mk/sysconf
|
||||||
|
|
||||||
PROGS = trconftree wipedir smallut trfstreewalk trpathut \
|
PROGS = trfileudi trconftree wipedir smallut trfstreewalk trpathut \
|
||||||
transcode trbase64 \
|
transcode trbase64 \
|
||||||
trmimeparse trexecmd utf8iter idfile
|
trmimeparse trexecmd utf8iter idfile
|
||||||
|
|
||||||
@ -24,6 +24,12 @@ trpathut : $(PATHUT_OBJS)
|
|||||||
trpathut.o : pathut.cpp pathut.h
|
trpathut.o : pathut.cpp pathut.h
|
||||||
$(CXX) -o trpathut.o -c $(ALL_CXXFLAGS) -DTEST_PATHUT pathut.cpp
|
$(CXX) -o trpathut.o -c $(ALL_CXXFLAGS) -DTEST_PATHUT pathut.cpp
|
||||||
|
|
||||||
|
FILEUDI_OBJS= trfileudi.o $(BIGLIB)
|
||||||
|
trfileudi : $(FILEUDI_OBJS)
|
||||||
|
$(CXX) $(ALL_CXXFLAGS) -o trfileudi $(FILEUDI_OBJS)
|
||||||
|
trfileudi.o : fileudi.cpp fileudi.h
|
||||||
|
$(CXX) -o trfileudi.o -c $(ALL_CXXFLAGS) -DTEST_FILEUDI fileudi.cpp
|
||||||
|
|
||||||
EXECMD_OBJS= trexecmd.o $(BIGLIB)
|
EXECMD_OBJS= trexecmd.o $(BIGLIB)
|
||||||
trexecmd : $(EXECMD_OBJS)
|
trexecmd : $(EXECMD_OBJS)
|
||||||
$(CXX) $(ALL_CXXFLAGS) -o trexecmd $(EXECMD_OBJS)
|
$(CXX) $(ALL_CXXFLAGS) -o trexecmd $(EXECMD_OBJS)
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
#ifndef lint
|
#ifndef lint
|
||||||
static char rcsid[] = "@(#$Id: fileudi.cpp,v 1.1 2008-07-28 10:20:20 dockes Exp $ (C) 2005 J.F.Dockes";
|
static char rcsid[] = "@(#$Id: fileudi.cpp,v 1.2 2008-07-28 12:24:15 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
@ -100,6 +100,7 @@ void pathHash(const std::string &path, std::string &phash, unsigned int maxlen)
|
|||||||
void make_udi(const string& fn, const string& ipath, string &udi)
|
void make_udi(const string& fn, const string& ipath, string &udi)
|
||||||
{
|
{
|
||||||
string s(fn);
|
string s(fn);
|
||||||
|
// Note that we append a "|" in all cases. Historical, could be removed
|
||||||
s.append("|");
|
s.append("|");
|
||||||
s.append(ipath);
|
s.append(ipath);
|
||||||
pathHash(s, udi, PATHHASHLEN);
|
pathHash(s, udi, PATHHASHLEN);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user