got rid of unused csguess module

This commit is contained in:
Jean-Francois Dockes 2012-04-06 15:14:01 +02:00
parent 1e96ee6df6
commit 9f402d33cb
10 changed files with 3 additions and 265 deletions

View File

@ -28,14 +28,6 @@ subtreelist : $(SUBTREELIST_OBJS)
subtreelist.o : subtreelist.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_SUBTREELIST -c subtreelist.cpp
CSGUESS_OBJS= trcsguess.o $(BIGLIB)
csguess : $(CSGUESS_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o csguess $(CSGUESS_OBJS) \
$(LIBICONV)
trcsguess.o : csguess.cpp
$(CXX) $(ALL_CXXFLAGS) -DTEST_CSGUESS -c -o trcsguess.o \
csguess.cpp
MIMETYPE_OBJS= trmimetype.o $(BIGLIB)
mimetype : $(MIMETYPE_OBJS)
$(CXX) $(ALL_CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) \

View File

@ -1,215 +0,0 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_CSGUESS
// This code was converted from estraier / qdbm / myconf.c:
/**************************************************************************
* Copyright (C) 2000-2004 Mikio Hirabayashi
*
* This file is part of QDBM, Quick Database Manager.
*
* QDBM is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License or any later
* version. QDBM is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details. You should have received a copy of the GNU
* Lesser General Public License along with QDBM; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307 USA.
* *********************************************************/
#include <errno.h>
#include <cstring>
#include <iostream>
#ifndef NO_NAMESPACES
using std::string;
#endif /* NO_NAMESPACES */
#include <iconv.h>
#include "csguess.h"
#include "autoconfig.h"
#ifdef RCL_ICONV_INBUF_CONST
#define ICV_P2_TYPE const char**
#else
#define ICV_P2_TYPE char**
#endif
// The values from estraier were 32768, 256, 0.001
const int ICONVCHECKSIZ = 32768;
const int ICONVMISSMAX = 256;
const double ICONVALLWRAT = 0.001;
// Try to transcode and count errors (for charset guessing)
static int transcodeErrCnt(const char *ptr, int size,
const char *icode, const char *ocode)
{
iconv_t ic;
char obuf[2*ICONVCHECKSIZ], *wp, *rp;
size_t isiz, osiz;
int miss;
isiz = size;
if((ic = iconv_open(ocode, icode)) == (iconv_t)-1)
return size;
miss = 0;
rp = (char *)ptr;
while(isiz > 0){
osiz = 2*ICONVCHECKSIZ;
wp = obuf;
if(iconv(ic, (ICV_P2_TYPE)&rp, &isiz, &wp, &osiz) == (size_t)-1){
if(errno == EILSEQ || errno == EINVAL){
rp++;
isiz--;
miss++;
if(miss >= ICONVMISSMAX)
break;
} else {
miss = size;
break;
}
}
}
if(iconv_close(ic) == -1)
return size;
return miss;
}
// Try to guess character encoding. This could be optimized quite a
// lot by avoiding the multiple passes on the document, to be done
// after usefulness is demonstrated...
string csguess(const string &in, const string &dflt)
{
const char *hypo;
int i, miss;
const char *text = in.c_str();
bool cr = false;
int size = in.length();
if (size > ICONVCHECKSIZ)
size = ICONVCHECKSIZ;
// UTF-16 with normal prefix ?
if (size >= 2 && (!memcmp(text, "\xfe\xff", 2) ||
!memcmp(text, "\xff\xfe", 2)))
return "UTF-16";
// If we find a zero at an appropriate position, guess it's UTF-16
// anyway. This is a quite expensive test for other texts as we'll
// have to scan the whole thing.
for (i = 0; i < size - 1; i += 2) {
if (text[i] == 0 && text[i + 1] != 0)
return "UTF-16BE";
if (text[i + 1] == 0 && text[i] != 0)
return "UTF-16LE";
}
// Look for iso-2022 (rfc1468) specific escape sequences. As
// iso-2022 begins in ascii, and typically soon escapes, these
// succeed fast for a japanese text, but are quite expensive for
// any other
for (i = 0; i < size - 3; i++) {
if (text[i] == 0x1b) {
i++;
if (text[i] == '(' && strchr("BJHI", text[i + 1]))
return "ISO-2022-JP";
if (text[i] == '$' && strchr("@B(", text[i + 1]))
return "ISO-2022-JP";
}
}
// Try conversions from ascii and utf-8. These are unlikely to succeed
// by mistake.
if (transcodeErrCnt(text, size, "US-ASCII", "UTF-16BE") < 1)
return "US-ASCII";
if (transcodeErrCnt(text, size, "UTF-8", "UTF-16BE") < 1)
return "UTF-8";
hypo = 0;
for (i = 0; i < size; i++) {
if (text[i] == 0xd) {
cr = true;
break;
}
}
if (cr) {
if ((miss = transcodeErrCnt(text, size, "Shift_JIS", "EUC-JP")) < 1)
return "Shift_JIS";
if (!hypo && miss / (double)size <= ICONVALLWRAT)
hypo = "Shift_JIS";
if ((miss = transcodeErrCnt(text, size, "EUC-JP", "UTF-16BE")) < 1)
return "EUC-JP";
if (!hypo && miss / (double)size <= ICONVALLWRAT)
hypo = "EUC-JP";
} else {
if ((miss = transcodeErrCnt(text, size, "EUC-JP", "UTF-16BE")) < 1)
return "EUC-JP";
if (!hypo && miss / (double)size <= ICONVALLWRAT)
hypo = "EUC-JP";
if ((miss = transcodeErrCnt(text, size, "Shift_JIS", "EUC-JP")) < 1)
return "Shift_JIS";
if (!hypo && miss / (double)size <= ICONVALLWRAT)
hypo = "Shift_JIS";
}
if ((miss = transcodeErrCnt(text, size, "UTF-8", "UTF-16BE")) < 1)
return "UTF-8";
if (!hypo && miss / (double)size <= ICONVALLWRAT)
hypo = "UTF-8";
if ((miss = transcodeErrCnt(text, size, "CP932", "UTF-16BE")) < 1)
return "CP932";
if (!hypo && miss / (double)size <= ICONVALLWRAT)
hypo = "CP932";
return hypo ? hypo : dflt;
}
#else
#include <errno.h>
#include <cstdlib>
#include <string>
#include <iostream>
using namespace std;
#include "readfile.h"
#include "csguess.h"
int main(int argc, char **argv)
{
if (argc != 2) {
cerr << "Usage: trcsguess <filename> <default>" << endl;
exit(1);
}
const string filename = argv[1];
const string dflt = argv[2];
string text;
if (!file_to_string(filename, text)) {
cerr << "Couldnt read file, errno " << errno << endl;
exit(1);
}
cout << csguess(text, dflt) << endl;
exit(0);
}
#endif

View File

@ -1,28 +0,0 @@
/* Copyright (C) 2004 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _CSGUESS_H_INCLUDED_
#define _CSGUESS_H_INCLUDED_
#include <string>
// Try to guess the character set. This might guess unicode encodings, and
// some asian charsets, but has no chance, for example, of discriminating
// betweeen the different iso8859-xx charsets.
extern std::string csguess(const std::string &in, const std::string &dflt);
#endif /* _CSGUESS_H_INCLUDED_ */

View File

@ -39,7 +39,6 @@
#include "readfile.h"
#include "indexer.h"
#include "fsindexer.h"
#include "csguess.h"
#include "transcode.h"
#include "debuglog.h"
#include "internfile.h"

View File

@ -18,7 +18,6 @@
#include "cstr.h"
#include "mimehandler.h"
#include "debuglog.h"
#include "csguess.h"
#include "readfile.h"
#include "transcode.h"
#include "mimeparse.h"

View File

@ -29,7 +29,6 @@ using namespace std;
#include "cstr.h"
#include "mh_text.h"
#include "csguess.h"
#include "debuglog.h"
#include "readfile.h"
#include "md5.h"

View File

@ -6,8 +6,8 @@ LIBS = librcl.a
all: $(LIBS)
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o csguess.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp csguess.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
librcl.a : $(DEPS) $(OBJS) unac.o
ar ru librcl.a $(OBJS) unac.o
@ -31,8 +31,6 @@ unacpp.o : ../common/unacpp.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../common/unacpp.cpp
beaglequeue.o : ../index/beaglequeue.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../index/beaglequeue.cpp
csguess.o : ../index/csguess.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../index/csguess.cpp
fsindexer.o : ../index/fsindexer.cpp $(depth)/mk/localdefs
$(CXX) $(ALL_CXXFLAGS) -c ../index/fsindexer.cpp
indexer.o : ../index/indexer.cpp $(depth)/mk/localdefs
@ -191,9 +189,6 @@ unacpp.dep.stamp : ../common/unacpp.cpp $(depth)/mk/localdefs
beaglequeue.dep.stamp : ../index/beaglequeue.cpp $(depth)/mk/localdefs
$(CXX) -M $(ALL_CXXFLAGS) ../index/beaglequeue.cpp > beaglequeue.dep
touch beaglequeue.dep.stamp
csguess.dep.stamp : ../index/csguess.cpp $(depth)/mk/localdefs
$(CXX) -M $(ALL_CXXFLAGS) ../index/csguess.cpp > csguess.dep
touch csguess.dep.stamp
fsindexer.dep.stamp : ../index/fsindexer.cpp $(depth)/mk/localdefs
$(CXX) -M $(ALL_CXXFLAGS) ../index/fsindexer.cpp > fsindexer.dep
touch fsindexer.dep.stamp
@ -364,7 +359,6 @@ include rclinit.dep
include textsplit.dep
include unacpp.dep
include beaglequeue.dep
include csguess.dep
include fsindexer.dep
include indexer.dep
include mimetype.dep

View File

@ -12,7 +12,6 @@ ${depth}/common/rclinit.cpp \
${depth}/common/textsplit.cpp \
${depth}/common/unacpp.cpp \
${depth}/index/beaglequeue.cpp \
${depth}/index/csguess.cpp \
${depth}/index/fsindexer.cpp \
${depth}/index/indexer.cpp \
${depth}/index/mimetype.cpp \

View File

@ -185,8 +185,6 @@ index/
index/Makefile
index/beaglequeue.cpp
index/beaglequeue.h
index/csguess.cpp
index/csguess.h
index/fsindexer.cpp
index/fsindexer.h
index/indexer.cpp

View File

@ -52,6 +52,7 @@ application/vnd.sun.xml.writer.template = libreoffice %f
application/vnd.wordperfect = libreoffice %f
application/x-chm = kchmviewer %f
application/x-dia-diagram = dia %f
application/x-fsdirectory = dolphin %f
application/x-gnuinfo = xterm -e "info -f %f"
application/x-gnumeric = gnumeric %f