diff --git a/src/VERSION b/src/VERSION index 2eb3c4fe..5a2a5806 100644 --- a/src/VERSION +++ b/src/VERSION @@ -1 +1 @@ -0.5 +0.6 diff --git a/src/filters/rcldoc b/src/filters/rcldoc new file mode 100755 index 00000000..aaae036b --- /dev/null +++ b/src/filters/rcldoc @@ -0,0 +1,74 @@ +#!/bin/sh +# @(#$Id: rcldoc,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes +# Parts taken from Estraier: +#================================================================ +# Estraier: a personal full-text search system +# Copyright (C) 2003-2004 Mikio Hirabayashi +#================================================================ +#================================================================ +# rcldoc +# Extract text from an msword file by executing either antiword +# (or wvware maybe if we need it one day) +# +# The default is to use antiword, the code would need modifications to +# work with wvWare +# +#================================================================ + + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rcldoc" +decoder="antiword -i -1 -m UTF-8" +# Not ready to use this for now (it outputs html, so the code below has to +# be simplified.) +#decoder="wvWare -1 -c UTF-8" + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Convert a word file to unformatted HTML text.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi + +# output the result +$decoder "$infile" | +awk ' +BEGIN { + printf("\n") + printf("\n") + printf("\n

"); + esc = 1 +} +{ + if ($0 ~ /-$/) { + sub(/-$/, "", $0) + printf("%s", $0); + } else if($0 == "\f") { + printf("

\n
\n

") + } else { + if(esc > 0) { + gsub(/&/, "\\&", $0) + gsub(//, "\\>", $0) + } + print $0 + } +} +END { + printf("

\n"); +}' | iconv -f UTF-8 -t UTF-8 -c -s + +# exit normally +exit 0 diff --git a/src/filters/rclsoff b/src/filters/rclsoff new file mode 100755 index 00000000..6d8c3499 --- /dev/null +++ b/src/filters/rclsoff @@ -0,0 +1,125 @@ +#!/bin/sh +# @(#$Id: rclsoff,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes +# Parts taken from Estraier: +#================================================================ +# Estraier: a personal full-text search system +# Copyright (C) 2003-2004 Mikio Hirabayashi +#================================================================ +#================================================================ +# rclsoff +# Extract text from an openoffice/soffice file +# +#================================================================ + + +# set variables +LANG=C ; export LANG +LC_ALL=C ; export LC_ALL +progname="rclsoff" + + +# show help message +if test $# -ne 1 -o "$1" = "--help" +then + printf 'Convert an openoffice file to unformatted HTML text.\n' + printf 'Usage: %s [infile]\n' "$progname" + exit 1 +fi + +infile="$1" + +# check the input file existence +if test ! -f "$infile" +then + printf '%s: %s: no such file\n' "$progname" "$infile" + exit 1 +fi + +# We need a temporary directory +if test z"$RECOLL_TMPDIR" != z; then + ttdir=$RECOLL_TMPDIR +elif test z"$TMPDIR" != z ; then + ttdir=$TMPDIR +else + ttdir=/tmp +fi +tmpdir=$ttdir/rclsoff_tmp$$ +mkdir $tmpdir || exit 1 +mkdir $tmpdir/rclsofftmp || exit 1 + +cleanup() +{ + # Note that we're using a constant part (rclsofftmp), that hopefully + # guarantees that we can't do big mistakes here. + rm -rf $tmpdir/rclsofftmp + rmdir $tmpdir +} + +trap cleanup EXIT SIGHUP SIGQUIT SIGINT SIGTERM + +# Unzip the input file and change to the unzipped directory +unzip -q -d $tmpdir/rclsofftmp $infile +cd $tmpdir/rclsofftmp + +# Note: there can be newlines inside the description field, we don't want +# them... +descsedprog='//,/<\/dc:description>/{ +s!.*!! +s!.*!! +p +} +' +description=`sed -n -e "$descsedprog" < meta.xml | tr '\n' ' '` + +subject=`sed -e "s/\"/'/" -e 's/.*\([^<]*\).*/\1/p;d' < meta.xml` + +title=`sed -e "s/\"/'/" -e 's/.*\([^<]*\).*/\1/p;d' < meta.xml` + +keywords=`sed -e "s/\"/'/" -e 's/.*\([^<]*\).*/\1/p;d' \ + < meta.xml` + +# Note: next expr inserts a newline at each end of paragraph (for preview) +content="`sed -e 's!!\\ +!g' -e 's/<[^>]*>/ /g' < content.xml`" + +#echo description "$description" +#echo subject "$subject" +#echo title "$title" +#echo keywords "$keywords" +#echo content "$content" + +# output the result +echo '' +echo '' "$title" '' +echo '' +echo '' +echo '' +echo '

' + +echo "$content" | sed -e "s/'/'/g" -e 's/"/"/g' |\ +awk ' +BEGIN { + esc = 1 +} +{ + if ($0 ~ /-$/) { + sub(/-$/, "", $0) + printf("%s", $0); + } else if($0 == "\f") { + printf("

\n
\n

") + } else { + if(esc > 0) { + gsub(/&/, "\\&", $0) + gsub(//, "\\>", $0) + } + printf("%s
", $0) + } +} +END { + printf("

\n"); +}' | iconv -f UTF-8 -t UTF-8 -c -s + +cd / +# exit normally +exit 0 diff --git a/src/index/indexer.cpp b/src/index/indexer.cpp index d728c7a7..a256cbd5 100644 --- a/src/index/indexer.cpp +++ b/src/index/indexer.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: indexer.cpp,v 1.4 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: indexer.cpp,v 1.5 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -22,6 +22,8 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.4 2005-02-04 14:21:17 dockes Exp #include "transcode.h" #include "debuglog.h" #include "internfile.h" +#include "smallut.h" +#include "wipedir.h" using namespace std; @@ -38,11 +40,21 @@ class DbIndexer { string dbdir; list *topdirs; Rcl::Db db; + string tmpdir; public: DbIndexer(RclConfig *cnf, const string &dbd, list *top) : config(cnf), dbdir(dbd), topdirs(top) { } + ~DbIndexer() { + if (tmpdir.length()) { + wipedir(tmpdir); + if (rmdir(tmpdir.c_str()) < 0) { + LOGERR(("DbIndexer::~DbIndexer: cant clear temp dir %s\n", + tmpdir.c_str())); + } + } + } friend FsTreeWalker::Status indexfile(void *, const std::string &, const struct stat *, FsTreeWalker::CbFlag); @@ -52,6 +64,12 @@ class DbIndexer { bool DbIndexer::index() { + string tdir; + + if (!maketmpdir(tmpdir)) { + LOGERR(("DbIndexer: cant create temp directory\n")); + return false; + } if (!db.open(dbdir, Rcl::Db::DbUpd)) { LOGERR(("DbIndexer::index: error opening database in %s\n", dbdir.c_str())); @@ -106,7 +124,7 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp, } Rcl::Doc doc; - if (!internfile(fn, me->config, doc)) + if (!internfile(fn, me->config, doc, me->tmpdir)) return FsTreeWalker::FtwOk; // Set up common fields: diff --git a/src/index/mimetype.cpp b/src/index/mimetype.cpp index bf72b90c..ab0c0d83 100644 --- a/src/index/mimetype.cpp +++ b/src/index/mimetype.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.4 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include @@ -9,15 +9,34 @@ using std::string; #include "mimetype.h" -string mimetype(const string &filename, ConfTree *mtypes) +string mimetype(const string &fn, ConfTree *mtypes) { if (mtypes == 0) return ""; - // If filename has a suffix and we find it in the map, we're done - string::size_type dot = filename.find_last_of("."); + static list stoplist; + if (stoplist.empty()) { + string stp; + if (mtypes->get(string("recoll_noindex"), stp, "")) { + ConfTree::stringToStrings(stp, stoplist); + } + } + + if (!stoplist.empty()) { + for (list::const_iterator it = stoplist.begin(); + it != stoplist.end(); it++) { + if (it->length() > fn.length()) + continue; + if (!fn.compare(fn.length() - it->length(), string::npos, + *it)) + return ""; + } + } + + // If the file name has a suffix and we find it in the map, we're done + string::size_type dot = fn.find_last_of("."); if (dot != string::npos) { - string suff = filename.substr(dot); + string suff = fn.substr(dot); for (unsigned int i = 0; i < suff.length(); i++) suff[i] = tolower(suff[i]); @@ -25,7 +44,8 @@ string mimetype(const string &filename, ConfTree *mtypes) if (mtypes->get(suff, mtype, "")) return mtype; } - // Look at file data + + // Look at file data ? One day maybe return ""; } diff --git a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp index ec578ca7..9e087cc9 100644 --- a/src/internfile/internfile.cpp +++ b/src/internfile/internfile.cpp @@ -1,5 +1,5 @@ #ifndef lint -static char rcsid[] = "@(#$Id: internfile.cpp,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: internfile.cpp,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #include #include @@ -16,38 +16,17 @@ using namespace std; #include "mimehandler.h" #include "execmd.h" #include "pathut.h" +#include "wipedir.h" static bool uncompressfile(RclConfig *conf, const string& ifn, - const list& cmdv, string& tdir, + const list& cmdv, const string& tdir, string& tfile) { - const char *tmpdir = getenv("RECOLL_TMPDIR"); - if (!tmpdir) - tmpdir = getenv("TMPDIR"); - if (!tmpdir) - tmpdir = "/tmp"; - tdir = tmpdir; - path_cat(tdir, "rcltmpXXXXXX"); - { - char *cp = strdup(tdir.c_str()); - if (!cp) { - LOGERR(("uncompressfile: out of memory (for file name !)\n")); - return false; - } - if (!mktemp(cp)) { - free(cp); - LOGERR(("uncompressfile: mktemp failed\n")); - return false; - } - tdir = cp; - free(cp); - } - - if (mkdir(tdir.c_str(), 0700) < 0) { - LOGERR(("uncompressfile: mkdir %s failed\n", tdir.c_str())); + // Make sure tmp dir is empty. we guarantee this to filters + if (wipedir(tdir) != 0) { + LOGERR(("uncompressfile: can't clear temp dir %s\n", tdir.c_str())); return false; } - string cmd = find_filter(conf, cmdv.front()); // Substitute file name and temp dir in command elements @@ -92,32 +71,26 @@ static bool uncompressfile(RclConfig *conf, const string& ifn, static void tmpcleanup(const string& tdir, const string& tfile) { - if (tdir.empty()) + if (tdir.empty() || tfile.empty()) return; - if (!tfile.empty()) { - if (unlink(tfile.c_str()) < 0) { - LOGERR(("tmpcleanup: unlink(%s) errno %d\n", tfile.c_str(), - errno)); - return; - } - } - if (rmdir(tdir.c_str()) < 0) { - LOGERR(("tmpcleanup: rmdir(%s) errno %d\n", tdir.c_str(), errno)); + if (unlink(tfile.c_str()) < 0) { + LOGERR(("tmpcleanup: unlink(%s) errno %d\n", tfile.c_str(), + errno)); return; } } -bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc) +bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc, + const string& tdir) { string fn = ifn; - string tdir; string tfile; MimeHandler *handler = 0; bool ret = false; string mime = mimetype(fn, config->getMimeMap()); if (mime.empty()) { - // No mime type ?? pass on. + // No mime type: not listed in our map. LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str())); return false; } diff --git a/src/internfile/internfile.h b/src/internfile/internfile.h index 10ce9d36..acf1aa6c 100644 --- a/src/internfile/internfile.h +++ b/src/internfile/internfile.h @@ -1,6 +1,6 @@ #ifndef _INTERNFILE_H_INCLUDED_ #define _INTERNFILE_H_INCLUDED_ -/* @(#$Id: internfile.h,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: internfile.h,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes */ #include @@ -9,6 +9,6 @@ /// Turn external file into internal representation, according to mime type etc extern bool internfile(const std::string &fn, RclConfig *config, - Rcl::Doc& doc); + Rcl::Doc& doc, const string& tdir); #endif /* _INTERNFILE_H_INCLUDED_ */ diff --git a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp index 6565a945..f959566f 100644 --- a/src/internfile/myhtmlparse.cpp +++ b/src/internfile/myhtmlparse.cpp @@ -27,10 +27,15 @@ #include "mimeparse.h" +// The original version for this compresses whitespace and suppresses newlines +// I can see no good reason to do this, and it actually helps preview to keep +// whitespace, especially if the html comes from a filter that generated it +// from text (ie: inside '
 tags)
 void
 MyHtmlParser::process_text(const string &text)
 {
     if (!in_script_tag && !in_style_tag) {
+#if 0
 	string::size_type b = 0;
 	while ((b = text.find_first_not_of(WHITESPACE, b)) != string::npos) {
 	    if (pending_space || b != 0)
@@ -45,6 +50,11 @@ MyHtmlParser::process_text(const string &text)
 	    dump += text.substr(b, e - b);
 	    b = e + 1;
 	}
+#else
+	if (pending_space)
+	    dump += ' ';
+	dump += text;
+#endif
     }
 }
 
diff --git a/src/lib/Makefile b/src/lib/Makefile
index 08aff159..3c1673f4 100644
--- a/src/lib/Makefile
+++ b/src/lib/Makefile
@@ -7,14 +7,14 @@ LIBS = librcl.a
 all: $(LIBS)
 
 OBJS = conftree.o csguess.o debuglog.o \
-     execmd.o \
+     execmd.o wipedir.o \
      fstreewalk.o html.o htmlparse.o indexer.o internfile.o \
      mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
      rclconfig.o rcldb.o readfile.o smallut.o \
      textsplit.o transcode.o \
      unacpp.o unac.o
 SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
-     ../utils/execmd.cpp \
+     ../utils/execmd.cpp ../utils/wipedir.cpp \
      ../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
      ../index/indexer.cpp ../common/internfile.cpp \
      ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
@@ -39,6 +39,8 @@ debuglog.o : ../utils/debuglog.cpp
 	$(CXX) $(CXXFLAGS) -c $<
 execmd.o : ../utils/execmd.cpp 
 	$(CXX) $(CXXFLAGS) -c $<
+wipedir.o : ../utils/wipedir.cpp 
+	$(CXX) $(CXXFLAGS) -c $<
 fstreewalk.o : ../utils/fstreewalk.cpp 
 	$(CXX) $(CXXFLAGS) -c $<
 html.o : ../common/html.cpp 
diff --git a/src/qtgui/main.cpp b/src/qtgui/main.cpp
index 7c03cb1f..db8ca82d 100644
--- a/src/qtgui/main.cpp
+++ b/src/qtgui/main.cpp
@@ -10,10 +10,13 @@
 #include "rclconfig.h"
 #include "pathut.h"
 #include "recoll.h"
+#include "smallut.h"
+#include "wipedir.h"
 
 RclConfig *rclconfig;
 Rcl::Db *rcldb;
 int recollNeedsExit;
+string tmpdir;
 
 
 void recollCleanup()
@@ -23,10 +26,15 @@ void recollCleanup()
     rcldb = 0;
     delete rclconfig;
     rclconfig = 0;
+    if (tmpdir.length()) {
+	wipedir(tmpdir);
+	rmdir(tmpdir.c_str());
+	tmpdir.erase();
+    }
 }
 
 
-static void sigcleanup(int sig)
+static void sigcleanup(int)
 {
     fprintf(stderr, "sigcleanup\n");
     // Cant call exit from here, because the atexit cleanup does some
@@ -71,6 +79,13 @@ int main( int argc, char ** argv )
 			      QString("No db directory in configuration"));
 	exit(1);
     }
+
+    if (!maketmpdir(tmpdir)) {
+	QMessageBox::critical(0, "Recoll",
+			      QString("Cannot create temporary directory"));
+	exit(1);
+    }
+	
     dbdir = path_tildexpand(dbdir);
 
     rcldb = new Rcl::Db;
diff --git a/src/qtgui/recoll.h b/src/qtgui/recoll.h
index d7c11b42..f607c79b 100644
--- a/src/qtgui/recoll.h
+++ b/src/qtgui/recoll.h
@@ -1,6 +1,6 @@
 #ifndef _RECOLL_H_INCLUDED_
 #define _RECOLL_H_INCLUDED_
-/* @(#$Id: recoll.h,v 1.1 2005-02-01 17:20:05 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: recoll.h,v 1.2 2005-02-09 12:07:30 dockes Exp $  (C) 2004 J.F.Dockes */
 
 #include "rclconfig.h"
 #include "rcldb.h"
@@ -11,6 +11,7 @@ extern void recollCleanup();
 // Misc declarations in need of sharing between the UI files
 extern RclConfig *rclconfig;
 extern Rcl::Db *rcldb;
+extern string tmpdir;
 
 extern int recollNeedsExit;
 
diff --git a/src/qtgui/recollmain.ui.h b/src/qtgui/recollmain.ui.h
index 13d937f4..02b08413 100644
--- a/src/qtgui/recollmain.ui.h
+++ b/src/qtgui/recollmain.ui.h
@@ -95,33 +95,25 @@ static string plaintorich(const string &in, const list& terms,
     myTextSplitCB cb(terms);
     TextSplit splitter(&cb, true);
     splitter.text_to_words(in);
-    string out1;
-    if (cb.tboffs.empty()) {
-	out1 = in;
-    } else { 
-	list >::iterator it = cb.tboffs.begin();
-	for (unsigned int i = 0; i < in.length() ; i++) {
-	    if (it != cb.tboffs.end()) {
-		if (i == (unsigned int)it->first) {
-		    out1 += "";
-		} else if (i == (unsigned int)it->second) {
-		    if (it != cb.tboffs.end())
-			it++;
-		    out1 += "";
-		}
-	    }
-	    out1 += in[i];
-	}
-    }
     string out = "

"; - for (string::const_iterator it = out1.begin();it != out1.end(); it++) { - if (*it == '\n') { - out += "
"; - // out += '\n'; + list >::iterator it = cb.tboffs.begin(); + for (unsigned int i = 0; i < in.length(); i++) { + if (it != cb.tboffs.end()) { + if (i == (unsigned int)it->first) { + out += ""; + } else if (i == (unsigned int)it->second) { + if (it != cb.tboffs.end()) + it++; + out += ""; + } + } + if (in[i] == '\n') { + out += "
\n"; } else { - out += *it; + out += in[i]; } } + termoffsets = cb.tboffs; return out; } @@ -208,7 +200,7 @@ void RecollMain::reslistTE_clicked(int par, int car) // for preview: string fn = urltolocalpath(doc.url); Rcl::Doc fdoc; - if (!internfile(fn, rclconfig, fdoc)) { + if (!internfile(fn, rclconfig, fdoc, tmpdir)) { QMessageBox::warning(0, "Recoll", QString("Can't turn doc into internal rep ") + doc.mimetype.c_str()); diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index f39887d1..0280da51 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -1,4 +1,4 @@ -# @(#$Id: mimeconf,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes +# @(#$Id: mimeconf,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes # Recoll : associations of mime types to processing filters. # There are different sections for decompression, 'interning' for indexing @@ -29,6 +29,18 @@ text/plain = internal text/html = internal application/pdf = exec rclpdf application/postscript = exec rclps +application/msword = exec rcldoc + +application/vnd.sun.xml.calc = exec rclsoff +application/vnd.sun.xml.calc.template = exec rclsoff +application/vnd.sun.xml.draw = exec rclsoff +application/vnd.sun.xml.draw.template = exec rclsoff +application/vnd.sun.xml.impress = exec rclsoff +application/vnd.sun.xml.impress.template = exec rclsoff +application/vnd.sun.xml.math = exec rclsoff +application/vnd.sun.xml.writer = exec rclsoff +application/vnd.sun.xml.writer.global = exec rclsoff +application/vnd.sun.xml.writer.template = exec rclsoff ## # External viewers, launched when you double-click a result entry @@ -37,3 +49,15 @@ text/plain = xemacs %f text/html = firefox -a firefox -remote "openFile(%u)" application/pdf = xpdf %f application/postscript = gv %f +application/msword = openoffice-1.1.3-swriter + +application/vnd.sun.xml.calc = openoffice-1.1.3 %f +application/vnd.sun.xml.calc.template = openoffice-1.1.3 %f +application/vnd.sun.xml.draw = openoffice-1.1.3 %f +application/vnd.sun.xml.draw.template = openoffice-1.1.3 %f +application/vnd.sun.xml.impress = openoffice-1.1.3 %f +application/vnd.sun.xml.impress.template = openoffice-1.1.3 %f +application/vnd.sun.xml.math = openoffice-1.1.3 %f +application/vnd.sun.xml.writer = openoffice-1.1.3 %f +application/vnd.sun.xml.writer.global = openoffice-1.1.3 %f +application/vnd.sun.xml.writer.template = openoffice-1.1.3 %f diff --git a/src/sampleconf/mimemap b/src/sampleconf/mimemap index 56d87963..23c6a1ab 100644 --- a/src/sampleconf/mimemap +++ b/src/sampleconf/mimemap @@ -1,4 +1,4 @@ -# @(#$Id: mimemap,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes +# @(#$Id: mimemap,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes # Recoll: associations of file name extensions to mime types .txt = text/plain @@ -20,5 +20,25 @@ .Z = application/x-gzip .bz2 = application/x-bzip2 +.doc = application/msword + +.sxc = application/vnd.sun.xml.calc +.stc = application/vnd.sun.xml.calc.template +.sxd = application/vnd.sun.xml.draw +.std = application/vnd.sun.xml.draw.template +.sxi = application/vnd.sun.xml.impress +.sti = application/vnd.sun.xml.impress.template +.sxm = application/vnd.sun.xml.math +.sxw = application/vnd.sun.xml.writer +.sxg = application/vnd.sun.xml.writer.global +.stw = application/vnd.sun.xml.writer.template + +.wpd = application/vnd.wordperfect +.rtf = text/rtf + + +# A list of stuff that we don't want to touch at all +recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz + [FILE] # This section for future non suffix-based extension (ie detect mail folders) diff --git a/src/utils/Makefile b/src/utils/Makefile index d62f6c9a..f35fa2ca 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -2,7 +2,7 @@ include ../mk/sysconf BIGLIB = ../lib/librcl.a -PROGS = smallut trfstreewalk trpathut transcode trmimeparse trexecmd +PROGS = wipedir smallut trfstreewalk trpathut transcode trmimeparse trexecmd all: $(PROGS) FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o @@ -44,5 +44,12 @@ smallut : $(SMALLUT_OBJS) trsmallut.o : ../utils/smallut.cpp $(CXX) $(CXXFLAGS) -DTEST_SMALLUT -c -o trsmallut.o \ smallut.cpp + +WIPEDIR_OBJS= trwipedir.o $(BIGLIB) +wipedir : $(WIPEDIR_OBJS) + $(CXX) $(CXXFLAGS) -o wipedir $(WIPEDIR_OBJS) $(LIBICONV) +trwipedir.o : ../utils/wipedir.cpp + $(CXX) $(CXXFLAGS) -DTEST_WIPEDIR -c -o trwipedir.o \ + wipedir.cpp clean: rm -f *.o $(PROGS) diff --git a/src/utils/smallut.cpp b/src/utils/smallut.cpp index cfad866b..ca404ff9 100644 --- a/src/utils/smallut.cpp +++ b/src/utils/smallut.cpp @@ -1,14 +1,53 @@ #ifndef lint -static char rcsid[] = "@(#$Id: smallut.cpp,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes"; +static char rcsid[] = "@(#$Id: smallut.cpp,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes"; #endif #ifndef TEST_SMALLUT #include #include +#include +#include +#include #include "smallut.h" +#include "debuglog.h" +#include "pathut.h" #define MIN(A,B) ((A)<(B)?(A):(B)) +bool maketmpdir(string& tdir) +{ + const char *tmpdir = getenv("RECOLL_TMPDIR"); + if (!tmpdir) + tmpdir = getenv("TMPDIR"); + if (!tmpdir) + tmpdir = "/tmp"; + tdir = tmpdir; + path_cat(tdir, "rcltmpXXXXXX"); + { + char *cp = strdup(tdir.c_str()); + if (!cp) { + LOGERR(("maketmpdir: out of memory (for file name !)\n")); + tdir.erase(); + return false; + } + if (!mktemp(cp)) { + free(cp); + LOGERR(("maketmpdir: mktemp failed\n")); + tdir.erase(); + return false; + } + tdir = cp; + free(cp); + } + + if (mkdir(tdir.c_str(), 0700) < 0) { + LOGERR(("maketmpdir: mkdir %s failed\n", tdir.c_str())); + tdir.erase(); + return false; + } + return true; +} + int stringicmp(const string & s1, const string& s2) { string::const_iterator it1 = s1.begin(); diff --git a/src/utils/smallut.h b/src/utils/smallut.h index 9129f37c..db500713 100644 --- a/src/utils/smallut.h +++ b/src/utils/smallut.h @@ -1,6 +1,6 @@ #ifndef _SMALLUT_H_INCLUDED_ #define _SMALLUT_H_INCLUDED_ -/* @(#$Id: smallut.h,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */ +/* @(#$Id: smallut.h,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */ #include using std::string; @@ -9,4 +9,6 @@ extern int stringicmp(const string& s1, const string& s2); extern int stringlowercmp(const string& alreadylower, const string& s2); extern int stringuppercmp(const string& alreadyupper, const string& s2); +extern bool maketmpdir(string& tdir); + #endif /* _SMALLUT_H_INCLUDED_ */ diff --git a/src/utils/wipedir.cpp b/src/utils/wipedir.cpp new file mode 100644 index 00000000..e9f46bd9 --- /dev/null +++ b/src/utils/wipedir.cpp @@ -0,0 +1,97 @@ +#ifndef lint +static char rcsid[] = "@(#$Id: wipedir.cpp,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes"; +#endif + +#ifndef TEST_WIPEDIR +#include +#include +#include + +#include +using namespace std; + +#include "debuglog.h" +#include "pathut.h" +#include "wipedir.h" + +int wipedir(const string& dir) +{ + struct stat st; + int statret; + int ret = -1; + + statret = stat(dir.c_str(), &st); + if (statret == -1) { + LOGERR(("wipedir: cant stat %s, errno %d\n", dir.c_str(), errno)); + return -1; + } + if (!S_ISDIR(st.st_mode)) { + LOGERR(("wipedir: %s not a directory\n", dir.c_str())); + return -1; + } + + if (access(dir.c_str(), R_OK|W_OK|X_OK) < 0) { + LOGERR(("wipedir: no write access to %s\n", dir.c_str())); + return -1; + } + + DIR *d = opendir(dir.c_str()); + if (d == 0) { + LOGERR(("wipedir: cant opendir %s, errno %d\n", dir.c_str(), errno)); + return -1; + } + int remaining = 0; + struct dirent *ent; + while ((ent = readdir(d)) != 0) { + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + + string fn = dir; + path_cat(fn, ent->d_name); + + struct stat st; + int statret = stat(fn.c_str(), &st); + if (statret == -1) { + LOGERR(("wipedir: cant stat %s, errno %d\n", fn.c_str(), errno)); + goto out; + } + if (S_ISDIR(st.st_mode)) { + remaining++; + } else { + if (unlink(fn.c_str()) < 0) { + LOGERR(("wipedir: cant unlink %s, errno %d\n", + fn.c_str(), errno)); + goto out; + } + } + } + + ret = remaining; + out: + if (d) + closedir(d); + return ret; +} + + +#else // FILEUT_TEST + +#include + +#include "wipedir.h" + +using namespace std; + +int main(int argc, const char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: wipedir

\n"); + exit(1); + } + string dir = argv[1]; + int cnt = wipedir(dir); + printf("wipedir returned %d\n", cnt); + exit(0); +} + +#endif diff --git a/src/utils/wipedir.h b/src/utils/wipedir.h new file mode 100644 index 00000000..1434baf5 --- /dev/null +++ b/src/utils/wipedir.h @@ -0,0 +1,13 @@ +#ifndef _FILEUT_H_INCLUDED_ +#define _FILEUT_H_INCLUDED_ +/* @(#$Id: wipedir.h,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */ + +#include + +/** + * Remove all files inside directory (not recursive). + * @return 0 if ok, count of remaining entries (ie: subdirs), or -1 for error + */ +int wipedir(const std::string& dirname); + +#endif /* _FILEUT_H_INCLUDED_ */