diff --git a/src/index/Makefile b/src/index/Makefile index 7c24c36b..04c983d9 100644 --- a/src/index/Makefile +++ b/src/index/Makefile @@ -28,14 +28,6 @@ subtreelist : $(SUBTREELIST_OBJS) subtreelist.o : subtreelist.cpp $(CXX) $(ALL_CXXFLAGS) -DTEST_SUBTREELIST -c subtreelist.cpp -CSGUESS_OBJS= trcsguess.o $(BIGLIB) -csguess : $(CSGUESS_OBJS) - $(CXX) $(ALL_CXXFLAGS) -o csguess $(CSGUESS_OBJS) \ - $(LIBICONV) -trcsguess.o : csguess.cpp - $(CXX) $(ALL_CXXFLAGS) -DTEST_CSGUESS -c -o trcsguess.o \ - csguess.cpp - MIMETYPE_OBJS= trmimetype.o $(BIGLIB) mimetype : $(MIMETYPE_OBJS) $(CXX) $(ALL_CXXFLAGS) -o mimetype $(MIMETYPE_OBJS) \ diff --git a/src/index/csguess.cpp b/src/index/csguess.cpp deleted file mode 100644 index 28da1ed1..00000000 --- a/src/index/csguess.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* Copyright (C) 2004 J.F.Dockes - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#ifndef TEST_CSGUESS - -// This code was converted from estraier / qdbm / myconf.c: - -/************************************************************************** - * Copyright (C) 2000-2004 Mikio Hirabayashi - * - * This file is part of QDBM, Quick Database Manager. - * - * QDBM is free software; you can redistribute it and/or modify it under the - * terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License or any later - * version. QDBM is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public - * License for more details. You should have received a copy of the GNU - * Lesser General Public License along with QDBM; if not, write to the Free - * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307 USA. - * *********************************************************/ - -#include -#include -#include - -#ifndef NO_NAMESPACES -using std::string; -#endif /* NO_NAMESPACES */ - -#include - -#include "csguess.h" -#include "autoconfig.h" -#ifdef RCL_ICONV_INBUF_CONST -#define ICV_P2_TYPE const char** -#else -#define ICV_P2_TYPE char** -#endif - -// The values from estraier were 32768, 256, 0.001 -const int ICONVCHECKSIZ = 32768; -const int ICONVMISSMAX = 256; -const double ICONVALLWRAT = 0.001; - -// Try to transcode and count errors (for charset guessing) -static int transcodeErrCnt(const char *ptr, int size, - const char *icode, const char *ocode) -{ - iconv_t ic; - char obuf[2*ICONVCHECKSIZ], *wp, *rp; - size_t isiz, osiz; - int miss; - isiz = size; - if((ic = iconv_open(ocode, icode)) == (iconv_t)-1) - return size; - miss = 0; - rp = (char *)ptr; - while(isiz > 0){ - osiz = 2*ICONVCHECKSIZ; - wp = obuf; - if(iconv(ic, (ICV_P2_TYPE)&rp, &isiz, &wp, &osiz) == (size_t)-1){ - if(errno == EILSEQ || errno == EINVAL){ - rp++; - isiz--; - miss++; - if(miss >= ICONVMISSMAX) - break; - } else { - miss = size; - break; - } - } - } - if(iconv_close(ic) == -1) - return size; - return miss; -} - -// Try to guess character encoding. This could be optimized quite a -// lot by avoiding the multiple passes on the document, to be done -// after usefulness is demonstrated... -string csguess(const string &in, const string &dflt) -{ - const char *hypo; - int i, miss; - const char *text = in.c_str(); - bool cr = false; - - int size = in.length(); - if (size > ICONVCHECKSIZ) - size = ICONVCHECKSIZ; - - // UTF-16 with normal prefix ? - if (size >= 2 && (!memcmp(text, "\xfe\xff", 2) || - !memcmp(text, "\xff\xfe", 2))) - return "UTF-16"; - - // If we find a zero at an appropriate position, guess it's UTF-16 - // anyway. This is a quite expensive test for other texts as we'll - // have to scan the whole thing. - for (i = 0; i < size - 1; i += 2) { - if (text[i] == 0 && text[i + 1] != 0) - return "UTF-16BE"; - if (text[i + 1] == 0 && text[i] != 0) - return "UTF-16LE"; - } - - // Look for iso-2022 (rfc1468) specific escape sequences. As - // iso-2022 begins in ascii, and typically soon escapes, these - // succeed fast for a japanese text, but are quite expensive for - // any other - for (i = 0; i < size - 3; i++) { - if (text[i] == 0x1b) { - i++; - if (text[i] == '(' && strchr("BJHI", text[i + 1])) - return "ISO-2022-JP"; - if (text[i] == '$' && strchr("@B(", text[i + 1])) - return "ISO-2022-JP"; - } - } - - // Try conversions from ascii and utf-8. These are unlikely to succeed - // by mistake. - if (transcodeErrCnt(text, size, "US-ASCII", "UTF-16BE") < 1) - return "US-ASCII"; - - if (transcodeErrCnt(text, size, "UTF-8", "UTF-16BE") < 1) - return "UTF-8"; - - hypo = 0; - for (i = 0; i < size; i++) { - if (text[i] == 0xd) { - cr = true; - break; - } - } - - if (cr) { - if ((miss = transcodeErrCnt(text, size, "Shift_JIS", "EUC-JP")) < 1) - return "Shift_JIS"; - if (!hypo && miss / (double)size <= ICONVALLWRAT) - hypo = "Shift_JIS"; - if ((miss = transcodeErrCnt(text, size, "EUC-JP", "UTF-16BE")) < 1) - return "EUC-JP"; - if (!hypo && miss / (double)size <= ICONVALLWRAT) - hypo = "EUC-JP"; - } else { - if ((miss = transcodeErrCnt(text, size, "EUC-JP", "UTF-16BE")) < 1) - return "EUC-JP"; - if (!hypo && miss / (double)size <= ICONVALLWRAT) - hypo = "EUC-JP"; - if ((miss = transcodeErrCnt(text, size, "Shift_JIS", "EUC-JP")) < 1) - return "Shift_JIS"; - if (!hypo && miss / (double)size <= ICONVALLWRAT) - hypo = "Shift_JIS"; - } - if ((miss = transcodeErrCnt(text, size, "UTF-8", "UTF-16BE")) < 1) - return "UTF-8"; - if (!hypo && miss / (double)size <= ICONVALLWRAT) - hypo = "UTF-8"; - if ((miss = transcodeErrCnt(text, size, "CP932", "UTF-16BE")) < 1) - return "CP932"; - if (!hypo && miss / (double)size <= ICONVALLWRAT) - hypo = "CP932"; - - return hypo ? hypo : dflt; -} - -#else - -#include - -#include -#include -#include - -using namespace std; - -#include "readfile.h" -#include "csguess.h" - -int main(int argc, char **argv) -{ - if (argc != 2) { - cerr << "Usage: trcsguess " << endl; - exit(1); - } - const string filename = argv[1]; - const string dflt = argv[2]; - string text; - if (!file_to_string(filename, text)) { - cerr << "Couldnt read file, errno " << errno << endl; - exit(1); - } - cout << csguess(text, dflt) << endl; - exit(0); -} -#endif diff --git a/src/index/csguess.h b/src/index/csguess.h deleted file mode 100644 index 34176b98..00000000 --- a/src/index/csguess.h +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright (C) 2004 J.F.Dockes - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#ifndef _CSGUESS_H_INCLUDED_ -#define _CSGUESS_H_INCLUDED_ - -#include - - -// Try to guess the character set. This might guess unicode encodings, and -// some asian charsets, but has no chance, for example, of discriminating -// betweeen the different iso8859-xx charsets. -extern std::string csguess(const std::string &in, const std::string &dflt); - -#endif /* _CSGUESS_H_INCLUDED_ */ diff --git a/src/index/fsindexer.cpp b/src/index/fsindexer.cpp index 9cd4715f..3c4e7519 100644 --- a/src/index/fsindexer.cpp +++ b/src/index/fsindexer.cpp @@ -39,7 +39,6 @@ #include "readfile.h" #include "indexer.h" #include "fsindexer.h" -#include "csguess.h" #include "transcode.h" #include "debuglog.h" #include "internfile.h" diff --git a/src/internfile/mh_html.cpp b/src/internfile/mh_html.cpp index e8e07d05..31615397 100644 --- a/src/internfile/mh_html.cpp +++ b/src/internfile/mh_html.cpp @@ -18,7 +18,6 @@ #include "cstr.h" #include "mimehandler.h" #include "debuglog.h" -#include "csguess.h" #include "readfile.h" #include "transcode.h" #include "mimeparse.h" diff --git a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp index a9b7a203..03a2461b 100644 --- a/src/internfile/mh_text.cpp +++ b/src/internfile/mh_text.cpp @@ -29,7 +29,6 @@ using namespace std; #include "cstr.h" #include "mh_text.h" -#include "csguess.h" #include "debuglog.h" #include "readfile.h" #include "md5.h" diff --git a/src/lib/Makefile b/src/lib/Makefile index efb07466..dda50cc4 100644 --- a/src/lib/Makefile +++ b/src/lib/Makefile @@ -6,8 +6,8 @@ LIBS = librcl.a all: $(LIBS) -OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o csguess.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o -DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp csguess.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp +OBJS = rclaspell.o beaglequeuecache.o cstr.o rclconfig.o rclinit.o textsplit.o unacpp.o beaglequeue.o fsindexer.o indexer.o mimetype.o subtreelist.o htmlparse.o myhtmlparse.o mimehandler.o internfile.o mh_exec.o mh_execm.o mh_html.o mh_mail.o mh_mbox.o mh_text.o txtdcode.o docseq.o docseqdb.o docseqhist.o filtseq.o dynconf.o plaintorich.o recollq.o reslistpager.o sortseq.o wasastringtoquery.o wasatorcl.o rcldb.o rcldoc.o rclquery.o searchdata.o stemdb.o stoplist.o base64.o circache.o closefrom.o conftree.o copyfile.o debuglog.o ecrontab.o execmd.o fstreewalk.o idfile.o fileudi.o md5.o mimeparse.o netcon.o pathut.o pxattr.o rclionice.o readfile.o smallut.o transcode.o wipedir.o x11mon.o mime-getpart.o mime-parsefull.o mime-parseonlyheader.o mime-printbody.o mime-printdoc.o mime-printheader.o mime.o convert.o iodevice.o iofactory.o +DEPS = rclaspell.dep.stamp beaglequeuecache.dep.stamp cstr.dep.stamp rclconfig.dep.stamp rclinit.dep.stamp textsplit.dep.stamp unacpp.dep.stamp beaglequeue.dep.stamp fsindexer.dep.stamp indexer.dep.stamp mimetype.dep.stamp subtreelist.dep.stamp htmlparse.dep.stamp myhtmlparse.dep.stamp mimehandler.dep.stamp internfile.dep.stamp mh_exec.dep.stamp mh_execm.dep.stamp mh_html.dep.stamp mh_mail.dep.stamp mh_mbox.dep.stamp mh_text.dep.stamp txtdcode.dep.stamp docseq.dep.stamp docseqdb.dep.stamp docseqhist.dep.stamp filtseq.dep.stamp dynconf.dep.stamp plaintorich.dep.stamp recollq.dep.stamp reslistpager.dep.stamp sortseq.dep.stamp wasastringtoquery.dep.stamp wasatorcl.dep.stamp rcldb.dep.stamp rcldoc.dep.stamp rclquery.dep.stamp searchdata.dep.stamp stemdb.dep.stamp stoplist.dep.stamp base64.dep.stamp circache.dep.stamp closefrom.dep.stamp conftree.dep.stamp copyfile.dep.stamp debuglog.dep.stamp ecrontab.dep.stamp execmd.dep.stamp fstreewalk.dep.stamp idfile.dep.stamp fileudi.dep.stamp md5.dep.stamp mimeparse.dep.stamp netcon.dep.stamp pathut.dep.stamp pxattr.dep.stamp rclionice.dep.stamp readfile.dep.stamp smallut.dep.stamp transcode.dep.stamp wipedir.dep.stamp x11mon.dep.stamp mime-getpart.dep.stamp mime-parsefull.dep.stamp mime-parseonlyheader.dep.stamp mime-printbody.dep.stamp mime-printdoc.dep.stamp mime-printheader.dep.stamp mime.dep.stamp convert.dep.stamp iodevice.dep.stamp iofactory.dep.stamp librcl.a : $(DEPS) $(OBJS) unac.o ar ru librcl.a $(OBJS) unac.o @@ -31,8 +31,6 @@ unacpp.o : ../common/unacpp.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../common/unacpp.cpp beaglequeue.o : ../index/beaglequeue.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../index/beaglequeue.cpp -csguess.o : ../index/csguess.cpp $(depth)/mk/localdefs - $(CXX) $(ALL_CXXFLAGS) -c ../index/csguess.cpp fsindexer.o : ../index/fsindexer.cpp $(depth)/mk/localdefs $(CXX) $(ALL_CXXFLAGS) -c ../index/fsindexer.cpp indexer.o : ../index/indexer.cpp $(depth)/mk/localdefs @@ -191,9 +189,6 @@ unacpp.dep.stamp : ../common/unacpp.cpp $(depth)/mk/localdefs beaglequeue.dep.stamp : ../index/beaglequeue.cpp $(depth)/mk/localdefs $(CXX) -M $(ALL_CXXFLAGS) ../index/beaglequeue.cpp > beaglequeue.dep touch beaglequeue.dep.stamp -csguess.dep.stamp : ../index/csguess.cpp $(depth)/mk/localdefs - $(CXX) -M $(ALL_CXXFLAGS) ../index/csguess.cpp > csguess.dep - touch csguess.dep.stamp fsindexer.dep.stamp : ../index/fsindexer.cpp $(depth)/mk/localdefs $(CXX) -M $(ALL_CXXFLAGS) ../index/fsindexer.cpp > fsindexer.dep touch fsindexer.dep.stamp @@ -364,7 +359,6 @@ include rclinit.dep include textsplit.dep include unacpp.dep include beaglequeue.dep -include csguess.dep include fsindexer.dep include indexer.dep include mimetype.dep diff --git a/src/lib/mkMake b/src/lib/mkMake index 510a435d..7574c059 100755 --- a/src/lib/mkMake +++ b/src/lib/mkMake @@ -12,7 +12,6 @@ ${depth}/common/rclinit.cpp \ ${depth}/common/textsplit.cpp \ ${depth}/common/unacpp.cpp \ ${depth}/index/beaglequeue.cpp \ -${depth}/index/csguess.cpp \ ${depth}/index/fsindexer.cpp \ ${depth}/index/indexer.cpp \ ${depth}/index/mimetype.cpp \ diff --git a/src/mk/manifest.txt b/src/mk/manifest.txt index 03452b52..335ef5e5 100644 --- a/src/mk/manifest.txt +++ b/src/mk/manifest.txt @@ -185,8 +185,6 @@ index/ index/Makefile index/beaglequeue.cpp index/beaglequeue.h -index/csguess.cpp -index/csguess.h index/fsindexer.cpp index/fsindexer.h index/indexer.cpp diff --git a/src/sampleconf/mimeview b/src/sampleconf/mimeview index 14bf4983..d421e06b 100644 --- a/src/sampleconf/mimeview +++ b/src/sampleconf/mimeview @@ -52,6 +52,7 @@ application/vnd.sun.xml.writer.template = libreoffice %f application/vnd.wordperfect = libreoffice %f application/x-chm = kchmviewer %f +application/x-dia-diagram = dia %f application/x-fsdirectory = dolphin %f application/x-gnuinfo = xterm -e "info -f %f" application/x-gnumeric = gnumeric %f