improve transcode error printing
This commit is contained in:
parent
441820d1ef
commit
c5ebe00247
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.55 2007-05-22 07:40:00 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.56 2007-05-30 12:31:19 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -394,8 +394,14 @@ DbIndexer::processone(const std::string &fn, const struct stat *stp,
|
||||
// If this fails, the file name won't be indexed, no big deal
|
||||
// Note that we used to do the full path here, but I ended up believing
|
||||
// that it made more sense to use only the file name
|
||||
string utf8fn;
|
||||
transcode(path_getsimple(fn), utf8fn, charset, "UTF-8");
|
||||
string utf8fn; int ercnt;
|
||||
if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
|
||||
LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
|
||||
charset.c_str(), path_getsimple(fn).c_str()));
|
||||
} else if (ercnt) {
|
||||
LOGDEB(("processone: fn transcode %d errors from [%s] to UTF-8: %s\n",
|
||||
ercnt, charset.c_str(), path_getsimple(fn).c_str()));
|
||||
}
|
||||
|
||||
FileInterner::Status fis = FileInterner::FIAgain;
|
||||
bool hadNullIpath = false;
|
||||
|
||||
@ -49,6 +49,7 @@ bool MimeHandlerHtml::set_document_file(const string &fn)
|
||||
LOGINFO(("textHtmlToDoc: cant read: %s\n", fn.c_str()));
|
||||
return false;
|
||||
}
|
||||
m_filename = fn;
|
||||
return set_document_string(otext);
|
||||
}
|
||||
|
||||
@ -64,8 +65,13 @@ bool MimeHandlerHtml::next_document()
|
||||
if (m_havedoc == false)
|
||||
return false;
|
||||
m_havedoc = false;
|
||||
// If set_doc(fn), take note of file name.
|
||||
string fn = m_filename;
|
||||
m_filename.erase();
|
||||
|
||||
string charset = m_defcharset;
|
||||
LOGDEB(("textHtmlToDoc: next_document. defcharset: %s\n",charset.c_str()));
|
||||
LOGDEB(("textHtmlToDoc: next_document. defcharset: %s\n",
|
||||
charset.c_str()));
|
||||
|
||||
// - We first try to convert from the default configured charset
|
||||
// (which may depend of the current directory) to utf-8. If this
|
||||
@ -82,13 +88,23 @@ bool MimeHandlerHtml::next_document()
|
||||
LOGDEB(("Html::mkDoc: pass %d\n", pass));
|
||||
MyHtmlParser p;
|
||||
// Try transcoding. If it fails, use original text.
|
||||
if (!transcode(m_html, transcoded, charset, "UTF-8")) {
|
||||
LOGERR(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8\n",
|
||||
charset.c_str()));
|
||||
int ecnt;
|
||||
if (!transcode(m_html, transcoded, charset, "UTF-8", &ecnt)) {
|
||||
LOGDEB(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8 for"
|
||||
"[%s]", charset.c_str(), fn.empty()?"unknown":fn.c_str()));
|
||||
transcoded = m_html;
|
||||
// We don't know the charset, at all
|
||||
p.ocharset = p.charset = charset = "";
|
||||
} else {
|
||||
if (ecnt) {
|
||||
if (pass == 0) {
|
||||
LOGDEB(("textHtmlToDoc: init transcode had %d errors for "
|
||||
"[%s]", ecnt, fn.empty()?"unknown":fn.c_str()));
|
||||
} else {
|
||||
LOGERR(("textHtmlToDoc: final transcode had %d errors for "
|
||||
"[%s]", ecnt, fn.empty()?"unknown":fn.c_str()));
|
||||
}
|
||||
}
|
||||
// ocharset has the putative source charset, transcoded is now
|
||||
// in utf-8
|
||||
p.ocharset = charset;
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
*/
|
||||
#ifndef _HTML_H_INCLUDED_
|
||||
#define _HTML_H_INCLUDED_
|
||||
/* @(#$Id: mh_html.h,v 1.9 2006-12-16 15:39:54 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: mh_html.h,v 1.10 2007-05-30 12:31:19 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
@ -38,6 +38,7 @@ class MimeHandlerHtml : public RecollFilter {
|
||||
}
|
||||
virtual bool next_document();
|
||||
private:
|
||||
string m_filename;
|
||||
string m_html;
|
||||
};
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.21 2007-05-23 09:19:48 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: plaintorich.cpp,v 1.22 2007-05-30 12:31:19 dockes Exp $ (C) 2005 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -39,7 +39,6 @@ using std::set;
|
||||
#include "debuglog.h"
|
||||
#include "textsplit.h"
|
||||
#include "utf8iter.h"
|
||||
#include "transcode.h"
|
||||
#include "smallut.h"
|
||||
#include "plaintorich.h"
|
||||
#include "cancelcheck.h"
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: xadump.cpp,v 1.14 2007-01-13 14:41:40 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: xadump.cpp,v 1.15 2007-05-30 12:31:19 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -24,8 +24,6 @@ static char rcsid[] = "@(#$Id: xadump.cpp,v 1.14 2007-01-13 14:41:40 dockes Exp
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "transcode.h"
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using namespace std;
|
||||
#endif /* NO_NAMESPACES */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: transcode.cpp,v 1.9 2006-11-20 15:29:08 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: transcode.cpp,v 1.10 2007-05-30 12:31:19 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
/*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
@ -100,9 +100,8 @@ bool transcode(const string &in, string &out, const string &icode,
|
||||
error:
|
||||
if (icopen)
|
||||
iconv_close(ic);
|
||||
//fprintf(stderr, "TRANSCODE OUT:\n%s\n", out.c_str());
|
||||
if (mecnt)
|
||||
LOGINFO(("transcode: [%s]->[%s] %d errors\n",
|
||||
LOGDEB(("transcode: [%s]->[%s] %d errors\n",
|
||||
icode.c_str(), ocode.c_str(), mecnt));
|
||||
if (ecnt)
|
||||
*ecnt = mecnt;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user