diff --git a/src/bincimapmime/mime-inputsource.h b/src/bincimapmime/mime-inputsource.h index da79760e..d05df546 100644 --- a/src/bincimapmime/mime-inputsource.h +++ b/src/bincimapmime/mime-inputsource.h @@ -33,6 +33,8 @@ #include #include +#include + namespace Binc { class MimeInputSource { @@ -40,9 +42,10 @@ namespace Binc { inline MimeInputSource(int fd, unsigned int start = 0); virtual inline ~MimeInputSource(void); - virtual inline bool fillInputBuffer(void); + virtual inline size_t fillRaw(char *raw, size_t nbytes); virtual inline void reset(void); + virtual inline bool fillInputBuffer(void); inline void seek(unsigned int offset); inline bool getChar(char *c); inline void ungetChar(void); @@ -77,10 +80,15 @@ namespace Binc { { } + inline size_t MimeInputSource::fillRaw(char *raw, size_t nbytes) + { + return read(fd, raw, nbytes); + } + inline bool MimeInputSource::fillInputBuffer(void) { char raw[4096]; - ssize_t nbytes = read(fd, raw, sizeof(raw)); + ssize_t nbytes = fillRaw(raw, 4096); if (nbytes <= 0) { // FIXME: If ferror(crlffile) we should log this. return false; @@ -159,8 +167,53 @@ namespace Binc { { return offset; } + + /////////////////////////////////// + class MimeInputSourceStream : public MimeInputSource { + public: + inline MimeInputSourceStream(istream& s, unsigned int start = 0); + virtual inline size_t fillRaw(char *raw, size_t nb); + virtual inline void reset(void); + private: + istream& s; + }; + + inline MimeInputSourceStream::MimeInputSourceStream(istream& si, + unsigned int start) + : MimeInputSource(-1, start), s(si) + { + } + + inline size_t MimeInputSourceStream::fillRaw(char *raw, size_t nb) + { + // Why can't streams tell how many characters were actually read + // when hitting eof ? + std::streampos st = s.tellg(); + s.seekg(0, ios_base::end); + std::streampos lst = s.tellg(); + s.seekg(st); + size_t nbytes = lst - st; + if (nbytes > nb) { + nbytes = nb; + } + if (nbytes <= 0) { + return (size_t)-1; + } + + s.read(raw, nbytes); + return nbytes; + } + + inline void MimeInputSourceStream::reset(void) + { + MimeInputSource::reset(); + s.seekg(0); + } + } + + extern Binc::MimeInputSource *mimeSource; #endif diff --git a/src/bincimapmime/mime-parsefull.cc b/src/bincimapmime/mime-parsefull.cc index 9adc2dc5..59288440 100644 --- a/src/bincimapmime/mime-parsefull.cc +++ b/src/bincimapmime/mime-parsefull.cc @@ -80,6 +80,35 @@ void Binc::MimeDocument::parseFull(int fd) const size = mimeSource->getOffset(); } +void Binc::MimeDocument::parseFull(istream& s) const +{ + if (allIsParsed) + return; + + allIsParsed = true; + + delete mimeSource; + mimeSource = new MimeInputSourceStream(s); + + headerstartoffsetcrlf = 0; + headerlength = 0; + bodystartoffsetcrlf = 0; + bodylength = 0; + size = 0; + messagerfc822 = false; + multipart = false; + + int bsize = 0; + string bound; + MimePart::parseFull(bound, bsize); + + // eat any trailing junk to get the correct size + char c; + while (mimeSource->getChar(&c)); + + size = mimeSource->getOffset(); +} + //------------------------------------------------------------------------ static bool parseOneHeaderLine(Binc::Header *header, unsigned int *nlines) { diff --git a/src/bincimapmime/mime-parseonlyheader.cc b/src/bincimapmime/mime-parseonlyheader.cc index 59fc4840..2ca41541 100644 --- a/src/bincimapmime/mime-parseonlyheader.cc +++ b/src/bincimapmime/mime-parseonlyheader.cc @@ -73,6 +73,29 @@ void Binc::MimeDocument::parseOnlyHeader(int fd) const MimePart::parseOnlyHeader(""); } +void Binc::MimeDocument::parseOnlyHeader(istream& s) const +{ + if (allIsParsed || headerIsParsed) + return; + + headerIsParsed = true; + + delete mimeSource; + mimeSource = new MimeInputSourceStream(s); + + headerstartoffsetcrlf = 0; + headerlength = 0; + bodystartoffsetcrlf = 0; + bodylength = 0; + messagerfc822 = false; + multipart = false; + + nlines = 0; + nbodylines = 0; + + MimePart::parseOnlyHeader(""); +} + //------------------------------------------------------------------------ int Binc::MimePart::parseOnlyHeader(const string &toboundary) const { diff --git a/src/bincimapmime/mime-printbody.cc b/src/bincimapmime/mime-printbody.cc index 89072061..e4c05e59 100644 --- a/src/bincimapmime/mime-printbody.cc +++ b/src/bincimapmime/mime-printbody.cc @@ -77,11 +77,18 @@ void Binc::MimePart::getBody(int fd, string &s, unsigned int startoffset, unsigned int length) const { + if (!mimeSource || mimeSource->getFileDescriptor() != fd) { delete mimeSource; mimeSource = new MimeInputSource(fd); } + getBody(s, startoffset, length); +} +void Binc::MimePart::getBody(string &s, + unsigned int startoffset, + unsigned int length) const +{ mimeSource->reset(); mimeSource->seek(bodystartoffsetcrlf + startoffset); diff --git a/src/bincimapmime/mime.h b/src/bincimapmime/mime.h index 1f79b376..51a82793 100644 --- a/src/bincimapmime/mime.h +++ b/src/bincimapmime/mime.h @@ -108,6 +108,7 @@ namespace Binc { void printBody(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const; void getBody(int fd, std::string& s, unsigned int startoffset, unsigned int length) const; + void getBody(std::string& s, unsigned int startoffset, unsigned int length) const; void printHeader(int fd, Binc::IODevice &output, std::vector headers, bool includeheaders, unsigned int startoffset, unsigned int length, std::string &storage) const; void printDoc(int fd, Binc::IODevice &output, unsigned int startoffset, unsigned int length) const; virtual void clear(void) const; @@ -129,6 +130,8 @@ namespace Binc { public: void parseOnlyHeader(int fd) const; void parseFull(int fd) const; + void parseOnlyHeader(std::istream& s) const; + void parseFull(std::istream& s) const; void clear(void) const; inline bool isHeaderParsed(void) { return headerIsParsed; } diff --git a/src/bincimapmime/trbinc.cc b/src/bincimapmime/trbinc.cc index 2c970302..0596d7f0 100644 --- a/src/bincimapmime/trbinc.cc +++ b/src/bincimapmime/trbinc.cc @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid [] = "@(#$Id: trbinc.cc,v 1.1 2005-03-25 09:40:27 dockes Exp $ (C) 1994 CDKIT"; +static char rcsid [] = "@(#$Id: trbinc.cc,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 1994 CDKIT"; #endif #include @@ -9,6 +9,10 @@ static char rcsid [] = "@(#$Id: trbinc.cc,v 1.1 2005-03-25 09:40:27 dockes Exp $ #include #include +#include + +using namespace std; + #include "mime.h" static char *thisprog; @@ -68,7 +72,27 @@ int main(int argc, char **argv) exit(1); } Binc::MimeDocument doc; + +#if 0 doc.parseFull(fd); +#else + char *cp; + int size = lseek(fd, 0, SEEK_END); + lseek(fd, 0, 0); + fprintf(stderr, "Size: %d\n", size); + cp = (char *)malloc(size); + if (cp==0) { + fprintf(stderr, "Malloc %d failed\n", size); + exit(1); + } + int n; + if ((n=read(fd, cp, size)) != size) { + fprintf(stderr, "Read failed: requested %d, got %d\n", size, n); + exit(1); + } + std::stringstream s(string(cp, size), ios::in); + doc.parseFull(s); +#endif if (!doc.isHeaderParsed() && !doc.isAllParsed()) { fprintf(stderr, "Parse error\n"); diff --git a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp index c3473723..a9f9d7af 100644 --- a/src/common/rclconfig.cpp +++ b/src/common/rclconfig.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.6 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.7 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -51,20 +51,23 @@ RclConfig::RclConfig() cerr << "No mime map file" << endl; return; } + // mimemap->list(); + string mimeconffile; if (!conf->get("mimeconffile", mimeconffile, "")) { mimeconffile = "mimeconf"; } mpath = confdir; - path_cat(mpath, mimeconffile); mimeconf = new ConfTree(mpath.c_str()); if (mimeconf == 0) { cerr << "No mime conf file" << endl; return; } + // mimeconf->list(); + setKeyDir(string("")); - // mimeconf->list(); + m_ok = true; return; } diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index ccbbab23..a6c0cc82 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -56,7 +56,18 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &, const string& htext, const string &mtype, Rcl::Doc &docout) { - // Character set handling: + //LOGDEB(("textHtmlToDoc: htext: %s\n", htext.c_str())); + // Character set handling: the initial guessed charset depends on + // external factors: possible hint (ie mime charset in a mail + // message), charset guessing, or default configured charset. + string charset; + if (!charsethint.empty()) { + charset = charsethint; + if (conf->getGuessCharset()) { + charset = csguess(htext, conf->getDefCharset()); + } else + charset = conf->getDefCharset(); + } // - We first try to convert from the default configured charset // (which may depend of the current directory) to utf-8. If this @@ -64,12 +75,6 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &, // - During parsing, if we find a charset parameter, and it differs from // what we started with, we abort and restart with the parameter value // instead of the configuration one. - string charset; - if (conf->getGuessCharset()) { - charset = csguess(htext, conf->getDefCharset()); - } else - charset = conf->getDefCharset(); - LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str())); MyHtmlParser pres; @@ -108,7 +113,7 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &, Rcl::Doc out; out.origcharset = charset; out.text = pres.dump; - // LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str())); + // LOGDEB(("textHtmlToDoc: dump : %s\n", pres.dump.c_str())); out.title = pres.title; out.keywords = pres.keywords; out.abstract = pres.sample; diff --git a/src/internfile/mh_html.h b/src/internfile/mh_html.h index 65f0d812..4c12f709 100644 --- a/src/internfile/mh_html.h +++ b/src/internfile/mh_html.h @@ -1,7 +1,8 @@ #ifndef _HTML_H_INCLUDED_ #define _HTML_H_INCLUDED_ -/* @(#$Id: mh_html.h,v 1.3 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mh_html.h,v 1.4 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes */ #include "mimehandler.h" +#include // Code to turn an html document into an internal one. There are 2 // interfaces, depending if we're working on a file, or on a @@ -11,6 +12,7 @@ // carry titles, abstracts, whatever) class MimeHandlerHtml : public MimeHandler { public: + std::string charsethint; virtual MimeHandler::Status worker(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string&); virtual MimeHandler::Status worker1(RclConfig *conf, const string &fn, diff --git a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp index f278109b..d2b797fd 100644 --- a/src/internfile/mh_mail.cpp +++ b/src/internfile/mh_mail.cpp @@ -1,11 +1,14 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.1 2005-03-25 09:40:27 dockes Exp $ (C) 2005 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 2005 J.F.Dockes"; #endif +#include #include #include #include +#include +using std::stringstream; using std::map; #include "mimehandler.h" @@ -19,65 +22,183 @@ using std::map; #include "debuglog.h" #include "smallut.h" #include "mimeparse.h" +#include "html.h" + +// binc imap mime definitions +#include "mime.h" + +static void +walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc, int depth); using namespace std; +MimeHandlerMail::~MimeHandlerMail() +{ + if (vfp) { + fclose((FILE *)vfp); + vfp = 0; + } +} + // We are called for two different file types: mbox-type folders -// holding multiple messages, and maildir-type files with one rfc822 -// message +// holding multiple messages, and maildir-type files with one message MimeHandler::Status MimeHandlerMail::worker(RclConfig *cnf, const string &fn, - const string &mtype, Rcl::Doc &docout, string&) + const string &mtype, Rcl::Doc &docout, string& ipath) { LOGDEB(("MimeHandlerMail::worker: %s [%s]\n", mtype.c_str(), fn.c_str())); conf = cnf; if (!stringlowercmp("message/rfc822", mtype)) { - return processone(fn, docout); + ipath = ""; + int fd; + if ((fd = open(fn.c_str(), 0)) < 0) { + LOGERR(("MimeHandlerMail::worker: open(%s) errno %d\n", + fn.c_str(), errno)); + return MimeHandler::MHError; + } + Binc::MimeDocument doc; + doc.parseFull(fd); + MimeHandler::Status ret = processone(fn, doc, docout); + close(fd); + return ret; } else if (!stringlowercmp("text/x-mail", mtype)) { - return MimeHandler::MHError; - } else + return processmbox(fn, docout, ipath); + } else // hu ho return MimeHandler::MHError; } +MimeHandler::Status +MimeHandlerMail::processmbox(const string &fn, Rcl::Doc &docout, string& ipath) +{ + int mtarg = 0; + if (ipath != "") { + sscanf(ipath.c_str(), "%d", &mtarg); + } + LOGDEB(("MimeHandlerMail::processmbox: fn %s, mtarg %d\n", fn.c_str(), + mtarg)); -#include "mime.h" + FILE *fp; + if (vfp) { + fp = (FILE *)vfp; + } else { + fp = fopen(fn.c_str(), "r"); + if (fp == 0) { + LOGERR(("MimeHandlerMail::processmbox: error opening %s\n", + fn.c_str())); + return MimeHandler::MHError; + } + vfp = fp; + } + if (mtarg > 0) { + fseek(fp, 0, SEEK_SET); + msgnum = 0; + } -const char *hnames[] = {"Subject", "Content-type"}; -int nh = sizeof(hnames) / sizeof(char *); + off_t start, end; + bool iseof = false; + do { + // Look for next 'From ' Line, start of message. Set start to + // line after this + char line[301]; + for (;;) { + if (!fgets(line, 300, fp)) { + // Eof hit while looking for 'From ' -> file done. We'd need + // another return code here + return MimeHandler::MHError; + } + + if (!strncmp("From ", line, 5)) { + start = ftello(fp); + break; + } + } + + // Look for next 'From ' line or eof, end of message (we let a + // spurious empty line in) + for (;;) { + end = ftello(fp); + if (!fgets(line, 300, fp) || !strncmp("From ", line, 5)) { + if (ferror(fp) || feof(fp)) + iseof = true; + break; + } + } + msgnum++; + LOGDEB(("MimeHandlerMail::processmbox: got msg %d\n", msgnum)); + fseek(fp, end, SEEK_SET); + } while (mtarg > 0 && msgnum < mtarg); + + + size_t size = end - start; + fseek(fp, start, SEEK_SET); + char *cp = (char *)malloc(size); + if (cp == 0) { + LOGERR(("MimeHandlerMail::processmbox: malloc(%d) failed\n", size)); + return MimeHandler::MHError; + } + if (fread(cp, 1, size, fp) != size) { + LOGERR(("MimeHandlerMail::processmbox: fread failed (errno %d)\n", + errno)); + free(cp); + return MimeHandler::MHError; + } + string msgbuf(cp, size); + free(cp); + stringstream s(msgbuf); + Binc::MimeDocument doc; + doc.parseFull(s); + MimeHandler::Status ret = processone(fn, doc, docout); + if (ret == MimeHandler::MHError) + return ret; + char buf[20]; + sprintf(buf, "%d", msgnum); + ipath = buf; + return iseof ? MimeHandler::MHDone : + (mtarg > 0) ? MimeHandler::MHDone : MimeHandler::MHAgain; +} -void walkmime(string &out, Binc::MimePart& doc, int fd, int depth); // Transform a single message into a document. The subject becomes the // title, and any simple body part with a content-type of text or html // and content-disposition inline gets concatenated as text. MimeHandler::Status -MimeHandlerMail::processone(const string &fn, Rcl::Doc &docout) +MimeHandlerMail::processone(const string &fn, Binc::MimeDocument& doc, + Rcl::Doc &docout) { - int fd; - if ((fd = open(fn.c_str(), 0)) < 0) { - LOGERR(("MimeHandlerMail::processone: open(%s) errno %d\n", - fn.c_str(), errno)); - return MimeHandler::MHError; - } - Binc::MimeDocument doc; - doc.parseFull(fd); - if (!doc.isHeaderParsed() && !doc.isAllParsed()) { - LOGERR(("MimeHandlerMail::processone: parse error for %s\n", + LOGERR(("MimeHandlerMail::processone: mime parse error for %s\n", fn.c_str())); - close(fd); return MimeHandler::MHError; } + + // Handle some headers. We should process rfc2047 encoding here + Binc::HeaderItem hi; + if (doc.h.getFirstHeader("Subject", hi)) { + docout.title = hi.getValue(); + } + if (doc.h.getFirstHeader("From", hi)) { + docout.text += string("From: ") + hi.getValue() + string("\n"); + } + if (doc.h.getFirstHeader("To", hi)) { + docout.text += string("To: ") + hi.getValue() + string("\n"); + } + if (doc.h.getFirstHeader("Date", hi)) { + docout.text += string("Date: ") + hi.getValue() + string("\n"); + } + LOGDEB(("MimeHandlerMail::processone: ismultipart %d mime subtype '%s'\n", doc.isMultipart(), doc.getSubType().c_str())); - walkmime(docout.text, doc, fd, 0); - close(fd); - LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str())); - return MimeHandler::MHError; + walkmime(conf, docout.text, doc, 0); + + LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str())); + return MimeHandler::MHDone; } -void walkmime(string &out, Binc::MimePart& doc, int fd, int depth) +// Recursively walk the message mime parts and concatenate all the +// inline html or text that we find anywhere. +static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc, + int depth) { if (depth > 5) { LOGINFO(("walkmime: max depth exceeded\n")); @@ -88,12 +209,12 @@ void walkmime(string &out, Binc::MimePart& doc, int fd, int depth) LOGDEB(("walkmime: ismultipart %d subtype '%s'\n", doc.isMultipart(), doc.getSubType().c_str())); // We only handle alternative and mixed for now. For - // alternative, we look for a text/plain part, else html and process it - // For mixed, we process each part. + // alternative, we look for a text/plain part, else html and + // process it For mixed, we process each part. std::vector::iterator it; if (!stringicmp("mixed", doc.getSubType())) { for (it = doc.members.begin(); it != doc.members.end();it++) { - walkmime(out, *it, fd, depth+1); + walkmime(cnf, out, *it, depth+1); } } else if (!stringicmp("alternative", doc.getSubType())) { std::vector::iterator ittxt, ithtml; @@ -103,7 +224,17 @@ void walkmime(string &out, Binc::MimePart& doc, int fd, int depth) Binc::HeaderItem hi; if (!doc.h.getFirstHeader("Content-Type", hi)) continue; - LOGDEB(("walkmime:content-type: %s\n", hi.getValue().c_str())); + MimeHeaderValue content_type; + parseMimeHeaderValue(hi.getValue(), content_type); + if (!stringlowercmp("text/plain", content_type.value)) + ittxt = it; + else if (!stringlowercmp("text/html", content_type.value)) + ithtml = it; + } + if (ittxt != doc.members.end()) { + walkmime(cnf, out, *ittxt, depth+1); + } else if (ithtml != doc.members.end()) { + walkmime(cnf, out, *ithtml, depth+1); } } } else { @@ -149,30 +280,36 @@ void walkmime(string &out, Binc::MimePart& doc, int fd, int depth) LOGDEB(("walkmime: final: body start offset %d, length %d\n", doc.getBodyStartOffset(), doc.getBodyLength())); string body; - doc.getBody(fd, body, 0, doc.bodylength); + doc.getBody(body, 0, doc.bodylength); // Decode content transfer encoding - if (stringlowercmp("quoted-printable", content_disposition.value)) { + if (!stringlowercmp("quoted-printable", cte)) { string decoded; qp_decode(body, decoded); body = decoded; - } else if (stringlowercmp("base64", content_disposition.value)) { + } else if (!stringlowercmp("base64", cte)) { string decoded; base64_decode(body, decoded); body = decoded; } - // Transcode to utf-8 string transcoded; - if (!transcode(body, transcoded, charset, "UTF-8")) { - LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n", - charset.c_str())); - transcoded = body; + if (!stringlowercmp("text/html", content_type.value)) { + MimeHandlerHtml mh; + Rcl::Doc hdoc; + mh.charsethint = charset; + mh.worker1(cnf, "", body, content_type.value, hdoc); + transcoded = hdoc.text; + } else { + // Transcode to utf-8 + if (!transcode(body, transcoded, charset, "UTF-8")) { + LOGERR(("walkmime: transcode failed from cs '%s' to UTF-8\n", + charset.c_str())); + transcoded = body; + } } out += string("\r\n") + transcoded; } } - - diff --git a/src/internfile/mh_mail.h b/src/internfile/mh_mail.h index 8be35be9..f7089636 100644 --- a/src/internfile/mh_mail.h +++ b/src/internfile/mh_mail.h @@ -1,14 +1,23 @@ #ifndef _MAIL_H_INCLUDED_ #define _MAIL_H_INCLUDED_ -/* @(#$Id: mh_mail.h,v 1.1 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: mh_mail.h,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes */ #include "mimehandler.h" +namespace Binc { + class MimeDocument; +} // Code to turn a mail folder file into internal documents class MimeHandlerMail : public MimeHandler { + void *vfp; + int msgnum; RclConfig *conf; - MimeHandler::Status processone(const string &fn, Rcl::Doc &docout); + MimeHandler::Status processone(const string &fn, Binc::MimeDocument& doc, + Rcl::Doc &docout); + MimeHandler::Status processmbox(const string &fn, Rcl::Doc &docout, + string &ipath); public: - MimeHandlerMail() : conf(0) {} + MimeHandlerMail() : vfp(0), msgnum(0), conf(0) {} + virtual ~MimeHandlerMail(); virtual MimeHandler::Status worker(RclConfig *conf, const string &fn, const string &mtype, Rcl::Doc &docout, string& ipath); diff --git a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp index 35713cd2..318a2352 100644 --- a/src/rcldb/rcldb.cpp +++ b/src/rcldb/rcldb.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.24 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.25 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -316,7 +316,8 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc) splitter.text_to_words(noacc); newdocument.add_term("T" + doc.mimetype); - string pathterm = "P" + fn; + string pathterm = doc.ipath.empty() ? + "P" + fn : "P" + fn + "|" + doc.ipath; newdocument.add_term(pathterm); const char *fnc = fn.c_str(); @@ -332,6 +333,10 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc) record += "\ncaption=" + doc.title; record += "\nkeywords=" + doc.keywords; record += "\nabstract=" + doc.abstract; + if (!doc.ipath.empty()) { + record += "\nipath=" + doc.ipath; + } + record += "\n"; LOGDEB1(("Newdocument data: %s\n", record.c_str())); newdocument.set_data(record); @@ -357,9 +362,11 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc) ndb->wdb.replace_document(pathterm, newdocument); if (did < ndb->updated.size()) { ndb->updated[did] = true; - LOGDEB(("Rcl::Db::add: docid %d updated [%s]\n", did, fnc)); + LOGDEB(("Rcl::Db::add: docid %d updated [%s , %s]\n", did, fnc, + doc.ipath.c_str())); } else { - LOGDEB(("Rcl::Db::add: docid %d added [%s]\n", did, fnc)); + LOGDEB(("Rcl::Db::add: docid %d added [%s , %s]\n", did, fnc, + doc.ipath.c_str())); } } catch (...) { // FIXME: is this ever actually needed? @@ -378,8 +385,12 @@ bool Rcl::Db::needUpdate(const string &filename, const struct stat *stp) Native *ndb = (Native *)pdata; string pathterm = "P" + filename; - if (!ndb->wdb.term_exists(pathterm)) - return true; + if (!ndb->wdb.term_exists(pathterm)) { + pathterm += string("|") + "1"; + if (!ndb->wdb.term_exists(pathterm)) { + return true; + } + } Xapian::PostingIterator doc; try { Xapian::PostingIterator did = ndb->wdb.postlist_begin(pathterm); @@ -775,5 +786,6 @@ bool Rcl::Db::getDoc(int i, Doc &doc, int *percent) parms.get(string("caption"), doc.title); parms.get(string("keywords"), doc.keywords); parms.get(string("abstract"), doc.abstract); + parms.get(string("ipath"), doc.ipath); return true; }