got rid of unused csguess module
This commit is contained in:
parent
1e96ee6df6
commit
9f402d33cb
@ -28,14 +28,6 @@ subtreelist : $(SUBTREELIST_OBJS)
|
||||
subtreelist.o : subtreelist.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -DTEST_SUBTREELIST -c subtreelist.cpp
|
||||
|
||||
CSGUESS_OBJS= trcsguess.o $(BIGLIB)
|
||||
csguess : $(CSGUESS_OBJS)
|
||||
$(CXX) $(ALL_CXXFLAGS) -o csguess $(CSGUESS_OBJS) \
|
||||
$(LIBICONV)
|
||||
trcsguess.o : csguess.cpp
|
||||
$(CXX) $(ALL_CXXFLAGS) -DTEST_CSGUESS -c -o trcsguess.o \
|
||||
csguess.cpp
|
||||
|
||||
MIMETYPE_OBJS= trmimetype.o $(BIGLIB)
|
||||
mimetype : $(MIMETYPE_OBJS)
|
||||
$(CXX) $(ALL_CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) \
|
||||
|
||||
@ -1,215 +0,0 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef TEST_CSGUESS
|
||||
|
||||
// This code was converted from estraier / qdbm / myconf.c:
|
||||
|
||||
/**************************************************************************
|
||||
* Copyright (C) 2000-2004 Mikio Hirabayashi
|
||||
*
|
||||
* This file is part of QDBM, Quick Database Manager.
|
||||
*
|
||||
* QDBM is free software; you can redistribute it and/or modify it under the
|
||||
* terms of the GNU Lesser General Public License as published by the Free
|
||||
* Software Foundation; either version 2.1 of the License or any later
|
||||
* version. QDBM is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
* License for more details. You should have received a copy of the GNU
|
||||
* Lesser General Public License along with QDBM; if not, write to the Free
|
||||
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
* 02111-1307 USA.
|
||||
* *********************************************************/
|
||||
|
||||
#include <errno.h>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
#ifndef NO_NAMESPACES
|
||||
using std::string;
|
||||
#endif /* NO_NAMESPACES */
|
||||
|
||||
#include <iconv.h>
|
||||
|
||||
#include "csguess.h"
|
||||
#include "autoconfig.h"
|
||||
#ifdef RCL_ICONV_INBUF_CONST
|
||||
#define ICV_P2_TYPE const char**
|
||||
#else
|
||||
#define ICV_P2_TYPE char**
|
||||
#endif
|
||||
|
||||
// The values from estraier were 32768, 256, 0.001
|
||||
const int ICONVCHECKSIZ = 32768;
|
||||
const int ICONVMISSMAX = 256;
|
||||
const double ICONVALLWRAT = 0.001;
|
||||
|
||||
// Try to transcode and count errors (for charset guessing)
|
||||
static int transcodeErrCnt(const char *ptr, int size,
|
||||
const char *icode, const char *ocode)
|
||||
{
|
||||
iconv_t ic;
|
||||
char obuf[2*ICONVCHECKSIZ], *wp, *rp;
|
||||
size_t isiz, osiz;
|
||||
int miss;
|
||||
isiz = size;
|
||||
if((ic = iconv_open(ocode, icode)) == (iconv_t)-1)
|
||||
return size;
|
||||
miss = 0;
|
||||
rp = (char *)ptr;
|
||||
while(isiz > 0){
|
||||
osiz = 2*ICONVCHECKSIZ;
|
||||
wp = obuf;
|
||||
if(iconv(ic, (ICV_P2_TYPE)&rp, &isiz, &wp, &osiz) == (size_t)-1){
|
||||
if(errno == EILSEQ || errno == EINVAL){
|
||||
rp++;
|
||||
isiz--;
|
||||
miss++;
|
||||
if(miss >= ICONVMISSMAX)
|
||||
break;
|
||||
} else {
|
||||
miss = size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(iconv_close(ic) == -1)
|
||||
return size;
|
||||
return miss;
|
||||
}
|
||||
|
||||
// Try to guess character encoding. This could be optimized quite a
|
||||
// lot by avoiding the multiple passes on the document, to be done
|
||||
// after usefulness is demonstrated...
|
||||
string csguess(const string &in, const string &dflt)
|
||||
{
|
||||
const char *hypo;
|
||||
int i, miss;
|
||||
const char *text = in.c_str();
|
||||
bool cr = false;
|
||||
|
||||
int size = in.length();
|
||||
if (size > ICONVCHECKSIZ)
|
||||
size = ICONVCHECKSIZ;
|
||||
|
||||
// UTF-16 with normal prefix ?
|
||||
if (size >= 2 && (!memcmp(text, "\xfe\xff", 2) ||
|
||||
!memcmp(text, "\xff\xfe", 2)))
|
||||
return "UTF-16";
|
||||
|
||||
// If we find a zero at an appropriate position, guess it's UTF-16
|
||||
// anyway. This is a quite expensive test for other texts as we'll
|
||||
// have to scan the whole thing.
|
||||
for (i = 0; i < size - 1; i += 2) {
|
||||
if (text[i] == 0 && text[i + 1] != 0)
|
||||
return "UTF-16BE";
|
||||
if (text[i + 1] == 0 && text[i] != 0)
|
||||
return "UTF-16LE";
|
||||
}
|
||||
|
||||
// Look for iso-2022 (rfc1468) specific escape sequences. As
|
||||
// iso-2022 begins in ascii, and typically soon escapes, these
|
||||
// succeed fast for a japanese text, but are quite expensive for
|
||||
// any other
|
||||
for (i = 0; i < size - 3; i++) {
|
||||
if (text[i] == 0x1b) {
|
||||
i++;
|
||||
if (text[i] == '(' && strchr("BJHI", text[i + 1]))
|
||||
return "ISO-2022-JP";
|
||||
if (text[i] == '$' && strchr("@B(", text[i + 1]))
|
||||
return "ISO-2022-JP";
|
||||
}
|
||||
}
|
||||
|
||||
// Try conversions from ascii and utf-8. These are unlikely to succeed
|
||||
// by mistake.
|
||||
if (transcodeErrCnt(text, size, "US-ASCII", "UTF-16BE") < 1)
|
||||
return "US-ASCII";
|
||||
|
||||
if (transcodeErrCnt(text, size, "UTF-8", "UTF-16BE") < 1)
|
||||
return "UTF-8";
|
||||
|
||||
hypo = 0;
|
||||
for (i = 0; i < size; i++) {
|
||||
if (text[i] == 0xd) {
|
||||
cr = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cr) {
|
||||
if ((miss = transcodeErrCnt(text, size, "Shift_JIS", "EUC-JP")) < 1)
|
||||
return "Shift_JIS";
|
||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||
hypo = "Shift_JIS";
|
||||
if ((miss = transcodeErrCnt(text, size, "EUC-JP", "UTF-16BE")) < 1)
|
||||
return "EUC-JP";
|
||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||
hypo = "EUC-JP";
|
||||
} else {
|
||||
if ((miss = transcodeErrCnt(text, size, "EUC-JP", "UTF-16BE")) < 1)
|
||||
return "EUC-JP";
|
||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||
hypo = "EUC-JP";
|
||||
if ((miss = transcodeErrCnt(text, size, "Shift_JIS", "EUC-JP")) < 1)
|
||||
return "Shift_JIS";
|
||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||
hypo = "Shift_JIS";
|
||||
}
|
||||
if ((miss = transcodeErrCnt(text, size, "UTF-8", "UTF-16BE")) < 1)
|
||||
return "UTF-8";
|
||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||
hypo = "UTF-8";
|
||||
if ((miss = transcodeErrCnt(text, size, "CP932", "UTF-16BE")) < 1)
|
||||
return "CP932";
|
||||
if (!hypo && miss / (double)size <= ICONVALLWRAT)
|
||||
hypo = "CP932";
|
||||
|
||||
return hypo ? hypo : dflt;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include "readfile.h"
|
||||
#include "csguess.h"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
cerr << "Usage: trcsguess <filename> <default>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
const string filename = argv[1];
|
||||
const string dflt = argv[2];
|
||||
string text;
|
||||
if (!file_to_string(filename, text)) {
|
||||
cerr << "Couldnt read file, errno " << errno << endl;
|
||||
exit(1);
|
||||
}
|
||||
cout << csguess(text, dflt) << endl;
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
@ -1,28 +0,0 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef _CSGUESS_H_INCLUDED_
|
||||
#define _CSGUESS_H_INCLUDED_
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
// Try to guess the character set. This might guess unicode encodings, and
|
||||
// some asian charsets, but has no chance, for example, of discriminating
|
||||
// betweeen the different iso8859-xx charsets.
|
||||
extern std::string csguess(const std::string &in, const std::string &dflt);
|
||||
|
||||
#endif /* _CSGUESS_H_INCLUDED_ */
|
||||
@ -39,7 +39,6 @@
|
||||
#include "readfile.h"
|
||||
#include "indexer.h"
|
||||
#include "fsindexer.h"
|
||||
#include "csguess.h"
|
||||
#include "transcode.h"
|
||||
#include "debuglog.h"
|
||||
#include "internfile.h"
|
||||
|
||||
@ -18,7 +18,6 @@
|
||||
#include "cstr.h"
|
||||
#include "mimehandler.h"
|
||||
#include "debuglog.h"
|
||||
#include "csguess.h"
|
||||
#include "readfile.h"
|
||||
#include "transcode.h"
|
||||
#include "mimeparse.h"
|
||||
|
||||
@ -29,7 +29,6 @@ using namespace std;
|
||||
|
||||
#include "cstr.h"
|
||||
#include "mh_text.h"
|
||||
#include "csguess.h"
|
||||
#include "debuglog.h"
|
||||
#include "readfile.h"
|
||||
#include "md5.h"
|
||||
|
||||
@ -6,8 +6,8 @@ LIBS = librcl.a
|
||||
|
||||
all: $(LIBS)
|
||||
|
||||
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o csguess.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o
|
||||
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp csguess.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
|
||||
OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o
|
||||
DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp
|
||||
|
||||
librcl.a : $(DEPS) $(OBJS) unac.o
|
||||
ar ru librcl.a $(OBJS) unac.o
|
||||
@ -31,8 +31,6 @@ unacpp.o : ../common/unacpp.cpp $(depth)/mk/localdefs
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../common/unacpp.cpp
|
||||
beaglequeue.o : ../index/beaglequeue.cpp $(depth)/mk/localdefs
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../index/beaglequeue.cpp
|
||||
csguess.o : ../index/csguess.cpp $(depth)/mk/localdefs
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../index/csguess.cpp
|
||||
fsindexer.o : ../index/fsindexer.cpp $(depth)/mk/localdefs
|
||||
$(CXX) $(ALL_CXXFLAGS) -c ../index/fsindexer.cpp
|
||||
indexer.o : ../index/indexer.cpp $(depth)/mk/localdefs
|
||||
@ -191,9 +189,6 @@ unacpp.dep.stamp : ../common/unacpp.cpp $(depth)/mk/localdefs
|
||||
beaglequeue.dep.stamp : ../index/beaglequeue.cpp $(depth)/mk/localdefs
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../index/beaglequeue.cpp > beaglequeue.dep
|
||||
touch beaglequeue.dep.stamp
|
||||
csguess.dep.stamp : ../index/csguess.cpp $(depth)/mk/localdefs
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../index/csguess.cpp > csguess.dep
|
||||
touch csguess.dep.stamp
|
||||
fsindexer.dep.stamp : ../index/fsindexer.cpp $(depth)/mk/localdefs
|
||||
$(CXX) -M $(ALL_CXXFLAGS) ../index/fsindexer.cpp > fsindexer.dep
|
||||
touch fsindexer.dep.stamp
|
||||
@ -364,7 +359,6 @@ include rclinit.dep
|
||||
include textsplit.dep
|
||||
include unacpp.dep
|
||||
include beaglequeue.dep
|
||||
include csguess.dep
|
||||
include fsindexer.dep
|
||||
include indexer.dep
|
||||
include mimetype.dep
|
||||
|
||||
@ -12,7 +12,6 @@ ${depth}/common/rclinit.cpp \
|
||||
${depth}/common/textsplit.cpp \
|
||||
${depth}/common/unacpp.cpp \
|
||||
${depth}/index/beaglequeue.cpp \
|
||||
${depth}/index/csguess.cpp \
|
||||
${depth}/index/fsindexer.cpp \
|
||||
${depth}/index/indexer.cpp \
|
||||
${depth}/index/mimetype.cpp \
|
||||
|
||||
@ -185,8 +185,6 @@ index/
|
||||
index/Makefile
|
||||
index/beaglequeue.cpp
|
||||
index/beaglequeue.h
|
||||
index/csguess.cpp
|
||||
index/csguess.h
|
||||
index/fsindexer.cpp
|
||||
index/fsindexer.h
|
||||
index/indexer.cpp
|
||||
|
||||
@ -52,6 +52,7 @@ application/vnd.sun.xml.writer.template = libreoffice %f
|
||||
application/vnd.wordperfect = libreoffice %f
|
||||
|
||||
application/x-chm = kchmviewer %f
|
||||
application/x-dia-diagram = dia %f
|
||||
application/x-fsdirectory = dolphin %f
|
||||
application/x-gnuinfo = xterm -e "info -f %f"
|
||||
application/x-gnumeric = gnumeric %f
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user