*** empty log message ***
This commit is contained in:
parent
04b279dcd5
commit
50b927f65c
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.9 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.10 2005-04-04 13:18:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -89,6 +89,22 @@ bool DbIndexer::index()
|
||||
it != topdirs->end(); it++) {
|
||||
LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(),
|
||||
dbdir.c_str()));
|
||||
config->setKeyDir(*it);
|
||||
|
||||
// Set up skipped patterns for this subtree
|
||||
{
|
||||
walker.clearSkippedNames();
|
||||
string skipped;
|
||||
if (config->getConfParam("skippedNames", skipped)) {
|
||||
list<string> skpl;
|
||||
ConfTree::stringToStrings(skipped, skpl);
|
||||
list<string>::const_iterator it;
|
||||
for (it = skpl.begin(); it != skpl.end(); it++) {
|
||||
walker.addSkippedName(*it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
|
||||
LOGERR(("DbIndexer::index: error while indexing %s\n",
|
||||
it->c_str()));
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.6 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.7 2005-04-04 13:18:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
@ -82,8 +82,12 @@ string mimetype(const string &fn, ConfTree *mtypes)
|
||||
return mtype;
|
||||
}
|
||||
|
||||
// Look at file data ? Only when no suffix
|
||||
if (suff.empty())
|
||||
// Look at file data ? Only when no suffix or always
|
||||
// Also 'file' is not that great for us. For exemple it will
|
||||
// mistake mail folders for simple text files if there is no 'Received'
|
||||
// header, which would be the case, for exemple in a 'Sent' folder. Also
|
||||
// I'm not sure that file -i exists on all systems
|
||||
//if (suff.empty())
|
||||
return mimetypefromdata(fn);
|
||||
return "";
|
||||
}
|
||||
|
||||
@ -63,11 +63,11 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &,
|
||||
string charset;
|
||||
if (!charsethint.empty()) {
|
||||
charset = charsethint;
|
||||
if (conf->getGuessCharset()) {
|
||||
} else if (conf->getGuessCharset()) {
|
||||
charset = csguess(htext, conf->getDefCharset());
|
||||
} else
|
||||
charset = conf->getDefCharset();
|
||||
}
|
||||
|
||||
|
||||
// - We first try to convert from the default configured charset
|
||||
// (which may depend of the current directory) to utf-8. If this
|
||||
@ -75,7 +75,7 @@ MimeHandlerHtml::worker1(RclConfig *conf, const string &,
|
||||
// - During parsing, if we find a charset parameter, and it differs from
|
||||
// what we started with, we abort and restart with the parameter value
|
||||
// instead of the configuration one.
|
||||
LOGDEB(("textHtmlToDoc: charset before parsing: %s\n", charset.c_str()));
|
||||
LOGDEB(("textHtmlToDoc: charset before parsing: [%s]\n", charset.c_str()));
|
||||
|
||||
MyHtmlParser pres;
|
||||
for (int pass = 0; pass < 2; pass++) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.2 2005-03-31 10:04:07 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mh_mail.cpp,v 1.3 2005-04-04 13:18:46 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
@ -46,7 +46,7 @@ MimeHandler::Status
|
||||
MimeHandlerMail::worker(RclConfig *cnf, const string &fn,
|
||||
const string &mtype, Rcl::Doc &docout, string& ipath)
|
||||
{
|
||||
LOGDEB(("MimeHandlerMail::worker: %s [%s]\n", mtype.c_str(), fn.c_str()));
|
||||
LOGDEB2(("MimeHandlerMail::worker: %s [%s]\n", mtype.c_str(), fn.c_str()));
|
||||
conf = cnf;
|
||||
|
||||
if (!stringlowercmp("message/rfc822", mtype)) {
|
||||
@ -75,7 +75,7 @@ MimeHandlerMail::processmbox(const string &fn, Rcl::Doc &docout, string& ipath)
|
||||
if (ipath != "") {
|
||||
sscanf(ipath.c_str(), "%d", &mtarg);
|
||||
}
|
||||
LOGDEB(("MimeHandlerMail::processmbox: fn %s, mtarg %d\n", fn.c_str(),
|
||||
LOGDEB2(("MimeHandlerMail::processmbox: fn %s, mtarg %d\n", fn.c_str(),
|
||||
mtarg));
|
||||
|
||||
FILE *fp;
|
||||
@ -125,7 +125,6 @@ MimeHandlerMail::processmbox(const string &fn, Rcl::Doc &docout, string& ipath)
|
||||
}
|
||||
}
|
||||
msgnum++;
|
||||
LOGDEB(("MimeHandlerMail::processmbox: got msg %d\n", msgnum));
|
||||
fseek(fp, end, SEEK_SET);
|
||||
} while (mtarg > 0 && msgnum < mtarg);
|
||||
|
||||
@ -173,25 +172,37 @@ MimeHandlerMail::processone(const string &fn, Binc::MimeDocument& doc,
|
||||
}
|
||||
|
||||
// Handle some headers. We should process rfc2047 encoding here
|
||||
// Also there should be no 8bit chars, but there sometimes are. So
|
||||
// we transcode as if from iso-8859-1, which is better than
|
||||
// getting utf8 conversion errors later on
|
||||
Binc::HeaderItem hi;
|
||||
string transcoded;
|
||||
if (doc.h.getFirstHeader("Subject", hi)) {
|
||||
docout.title = hi.getValue();
|
||||
transcode(hi.getValue(), transcoded, "iso-8859-1", "UTF-8");
|
||||
docout.title = transcoded;
|
||||
}
|
||||
if (doc.h.getFirstHeader("From", hi)) {
|
||||
docout.text += string("From: ") + hi.getValue() + string("\n");
|
||||
transcode(hi.getValue(), transcoded, "iso-8859-1", "UTF-8");
|
||||
docout.text += string("From: ") + transcoded + string("\n");
|
||||
}
|
||||
if (doc.h.getFirstHeader("To", hi)) {
|
||||
docout.text += string("To: ") + hi.getValue() + string("\n");
|
||||
transcode(hi.getValue(), transcoded, "iso-8859-1", "UTF-8");
|
||||
docout.text += string("To: ") + transcoded + string("\n");
|
||||
}
|
||||
if (doc.h.getFirstHeader("Date", hi)) {
|
||||
docout.text += string("Date: ") + hi.getValue() + string("\n");
|
||||
transcode(hi.getValue(), transcoded, "iso-8859-1", "UTF-8");
|
||||
docout.text += string("Date: ") + transcoded + string("\n");
|
||||
}
|
||||
if (doc.h.getFirstHeader("Subject", hi)) {
|
||||
transcode(hi.getValue(), transcoded, "iso-8859-1", "UTF-8");
|
||||
docout.text += string("Subject: ") + transcoded + string("\n");
|
||||
}
|
||||
|
||||
LOGDEB(("MimeHandlerMail::processone: ismultipart %d mime subtype '%s'\n",
|
||||
LOGDEB2(("MimeHandlerMail::processone: ismultipart %d mime subtype '%s'\n",
|
||||
doc.isMultipart(), doc.getSubType().c_str()));
|
||||
walkmime(conf, docout.text, doc, 0);
|
||||
|
||||
LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str()));
|
||||
//LOGDEB(("MimeHandlerMail::processone: text: '%s'\n", docout.text.c_str()));
|
||||
return MimeHandler::MHDone;
|
||||
}
|
||||
|
||||
@ -206,13 +217,14 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
|
||||
}
|
||||
|
||||
if (doc.isMultipart()) {
|
||||
LOGDEB(("walkmime: ismultipart %d subtype '%s'\n",
|
||||
LOGDEB2(("walkmime: ismultipart %d subtype '%s'\n",
|
||||
doc.isMultipart(), doc.getSubType().c_str()));
|
||||
// We only handle alternative and mixed for now. For
|
||||
// We only handle alternative, related and mixed for now. For
|
||||
// alternative, we look for a text/plain part, else html and
|
||||
// process it For mixed, we process each part.
|
||||
// process it For mixed and related, we process each part.
|
||||
std::vector<Binc::MimePart>::iterator it;
|
||||
if (!stringicmp("mixed", doc.getSubType())) {
|
||||
if (!stringicmp("mixed", doc.getSubType()) ||
|
||||
!stringicmp("related", doc.getSubType())) {
|
||||
for (it = doc.members.begin(); it != doc.members.end();it++) {
|
||||
walkmime(cnf, out, *it, depth+1);
|
||||
}
|
||||
@ -247,18 +259,32 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
|
||||
if (doc.h.getFirstHeader("Content-Type", hi)) {
|
||||
ctt = hi.getValue();
|
||||
}
|
||||
LOGDEB(("walkmime:content-type: %s\n", ctt.c_str()));
|
||||
LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));
|
||||
MimeHeaderValue content_type;
|
||||
parseMimeHeaderValue(ctt, content_type);
|
||||
if (stringlowercmp("text/plain", content_type.value) &&
|
||||
stringlowercmp("text/html", content_type.value)) {
|
||||
return;
|
||||
}
|
||||
string charset = "us-ascii";
|
||||
|
||||
// Normally the default charset is us-ascii. But it happens that
|
||||
// 8 bit chars exist in a message that is stated as us-ascii. Ie the
|
||||
// mailer used by yahoo support ('KANA') does this. We could convert
|
||||
// to iso-8859 only if the transfer-encoding is 8 bit, or test for
|
||||
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
|
||||
string charset = "iso-8859-1";
|
||||
map<string,string>::const_iterator it;
|
||||
it = content_type.params.find(string("charset"));
|
||||
if (it != content_type.params.end())
|
||||
charset = it->second;
|
||||
if (charset.empty() ||
|
||||
!stringlowercmp("us-ascii", charset) ||
|
||||
!stringlowercmp("default", charset) ||
|
||||
!stringlowercmp("x-user-defined", charset) ||
|
||||
!stringlowercmp("x-unknown", charset) ||
|
||||
!stringlowercmp("unknown", charset) ) {
|
||||
charset = "iso-8859-1";
|
||||
}
|
||||
|
||||
// Content disposition
|
||||
string ctd = "inline";
|
||||
@ -277,7 +303,7 @@ static void walkmime(RclConfig *cnf, string &out, Binc::MimePart& doc,
|
||||
cte = hi.getValue();
|
||||
}
|
||||
|
||||
LOGDEB(("walkmime: final: body start offset %d, length %d\n",
|
||||
LOGDEB2(("walkmime: final: body start offset %d, length %d\n",
|
||||
doc.getBodyStartOffset(), doc.getBodyLength()));
|
||||
string body;
|
||||
doc.getBody(body, 0, doc.bodylength);
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.25 2005-03-31 10:04:07 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.26 2005-04-04 13:18:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -216,7 +216,8 @@ bool Rcl::dumb_string(const string &in, string &out)
|
||||
return true;
|
||||
if (!unac_cpp(in, inter)) {
|
||||
LOGERR(("dumb_string: unac_cpp failed for %s\n", in.c_str()));
|
||||
return false;
|
||||
// Ok, no need to stop the whole show
|
||||
inter = "";
|
||||
}
|
||||
out.reserve(inter.length());
|
||||
for (unsigned int i = 0; i < inter.length(); i++) {
|
||||
@ -268,7 +269,7 @@ truncate_to_word(string & input, string::size_type maxlen)
|
||||
|
||||
bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
||||
{
|
||||
LOGDEB(("Rcl::Db::add: fn %s\n", fn.c_str()));
|
||||
LOGDEB1(("Rcl::Db::add: fn %s\n", fn.c_str()));
|
||||
if (pdata == 0)
|
||||
return false;
|
||||
Native *ndb = (Native *)pdata;
|
||||
@ -288,7 +289,7 @@ bool Rcl::Db::add(const string &fn, const Rcl::Doc &idoc)
|
||||
TextSplit splitter(&splitData);
|
||||
|
||||
string noacc;
|
||||
if (!unac_cpp(doc.title, noacc)) {
|
||||
if (!dumb_string(doc.title, noacc)) {
|
||||
LOGERR(("Rcl::Db::add: unac failed\n"));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: mimeconf,v 1.5 2005-03-25 09:40:28 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: mimeconf,v 1.6 2005-04-04 13:18:47 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll : associations of mime types to processing filters.
|
||||
# There are different sections for decompression, 'interning' for indexing
|
||||
@ -49,7 +49,8 @@ application/vnd.sun.xml.writer.template = exec rclsoff
|
||||
# External viewers, launched when you double-click a result entry
|
||||
[view]
|
||||
text/plain = xemacs %f
|
||||
text/html = firefox -remote "openFile(%u)"
|
||||
#text/html = firefox -remote "openFile(%u)"
|
||||
text/html = firefox %u
|
||||
application/pdf = xpdf %f
|
||||
application/postscript = gv %f
|
||||
application/msword = openoffice-1.1.3-swriter %f
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: fstreewalk.cpp,v 1.3 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: fstreewalk.cpp,v 1.4 2005-04-04 13:18:47 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#ifndef TEST_FSTREEWALK
|
||||
@ -7,8 +7,10 @@ static char rcsid[] = "@(#$Id: fstreewalk.cpp,v 1.3 2005-02-10 15:21:12 dockes E
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
#include <fnmatch.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <list>
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "pathut.h"
|
||||
@ -19,6 +21,7 @@ using namespace std;
|
||||
class FsTreeWalker::Internal {
|
||||
Options options;
|
||||
stringstream reason;
|
||||
list<string> skippedNames;
|
||||
int errors;
|
||||
void logsyserr(const char *call, const string ¶m)
|
||||
{
|
||||
@ -53,6 +56,18 @@ int FsTreeWalker::getErrCnt()
|
||||
return data->errors;
|
||||
}
|
||||
|
||||
bool FsTreeWalker::addSkippedName(const string& pattern)
|
||||
{
|
||||
data->skippedNames.push_back(pattern);
|
||||
return true;
|
||||
}
|
||||
|
||||
void FsTreeWalker::clearSkippedNames()
|
||||
{
|
||||
data->skippedNames.clear();
|
||||
}
|
||||
|
||||
|
||||
FsTreeWalker::Status FsTreeWalker::walk(const string &top,
|
||||
FsTreeWalkerCB& cb)
|
||||
{
|
||||
@ -94,10 +109,23 @@ FsTreeWalker::Status FsTreeWalker::walk(const string &top,
|
||||
|
||||
struct dirent *ent;
|
||||
while ((ent = readdir(d)) != 0) {
|
||||
// We do process hidden files for now
|
||||
// We do process hidden files for now, only skip . and ..
|
||||
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
||||
continue;
|
||||
|
||||
if (!data->skippedNames.empty()) {
|
||||
list<string>::const_iterator it;
|
||||
for (it = data->skippedNames.begin();
|
||||
it != data->skippedNames.end(); it++) {
|
||||
if (fnmatch(it->c_str(), ent->d_name, 0) == 0) {
|
||||
//fprintf(stderr,
|
||||
//"Skipping [%s] because of pattern match\n", ent->d_name);
|
||||
goto skip;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string fn = top;
|
||||
path_cat(fn, ent->d_name);
|
||||
|
||||
@ -125,6 +153,10 @@ FsTreeWalker::Status FsTreeWalker::walk(const string &top,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
skip: ;
|
||||
|
||||
// We skip other file types (devices etc...)
|
||||
}
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _FSTREEWALK_H_INCLUDED_
|
||||
#define _FSTREEWALK_H_INCLUDED_
|
||||
/* @(#$Id: fstreewalk.h,v 1.2 2005-02-10 15:21:12 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: fstreewalk.h,v 1.3 2005-04-04 13:18:47 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
@ -22,6 +22,14 @@ class FsTreeWalker {
|
||||
Status walk(const std::string &dir, FsTreeWalkerCB& cb);
|
||||
std::string getReason();
|
||||
int getErrCnt();
|
||||
bool addSkippedName(const std::string &pattern); // Add a pattern
|
||||
// for directory
|
||||
// entries (file
|
||||
// or dir) to be
|
||||
// ignored (ie:
|
||||
// #* , *~)
|
||||
void clearSkippedNames(); // Clear all patterns
|
||||
|
||||
private:
|
||||
class Internal;
|
||||
Internal *data;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user