added support for openoffice and word + optimized decomp temp dir usage

This commit is contained in:
dockes 2005-02-09 12:07:30 +00:00
parent 8f376bbe37
commit 152d47306e
19 changed files with 516 additions and 84 deletions

View File

@ -1 +1 @@
0.5
0.6

74
src/filters/rcldoc Executable file
View File

@ -0,0 +1,74 @@
#!/bin/sh
# @(#$Id: rcldoc,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
# Parts taken from Estraier:
#================================================================
# Estraier: a personal full-text search system
# Copyright (C) 2003-2004 Mikio Hirabayashi
#================================================================
#================================================================
# rcldoc
# Extract text from an msword file by executing either antiword
# (or wvware maybe if we need it one day)
#
# The default is to use antiword, the code would need modifications to
# work with wvWare
#
#================================================================
# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="rcldoc"
decoder="antiword -i -1 -m UTF-8"
# Not ready to use this for now (it outputs html, so the code below has to
# be simplified.)
#decoder="wvWare -1 -c UTF-8"
# show help message
if test $# -ne 1 -o "$1" = "--help"
then
printf 'Convert a word file to unformatted HTML text.\n'
printf 'Usage: %s [infile]\n' "$progname"
exit 1
fi
infile="$1"
# check the input file existence
if test ! -f "$infile"
then
printf '%s: %s: no such file\n' "$progname" "$infile"
exit 1
fi
# output the result
$decoder "$infile" |
awk '
BEGIN {
printf("<html><head><title></title>\n")
printf("<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n")
printf("</head>\n<body><p>");
esc = 1
}
{
if ($0 ~ /-$/) {
sub(/-$/, "", $0)
printf("%s", $0);
} else if($0 == "\f") {
printf("</p>\n<hr>\n<p>")
} else {
if(esc > 0) {
gsub(/&/, "\\&amp;", $0)
gsub(/</, "\\&lt;", $0)
gsub(/>/, "\\&gt;", $0)
}
print $0
}
}
END {
printf("</p></body></html>\n");
}' | iconv -f UTF-8 -t UTF-8 -c -s
# exit normally
exit 0

125
src/filters/rclsoff Executable file
View File

@ -0,0 +1,125 @@
#!/bin/sh
# @(#$Id: rclsoff,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
# Parts taken from Estraier:
#================================================================
# Estraier: a personal full-text search system
# Copyright (C) 2003-2004 Mikio Hirabayashi
#================================================================
#================================================================
# rclsoff
# Extract text from an openoffice/soffice file
#
#================================================================
# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="rclsoff"
# show help message
if test $# -ne 1 -o "$1" = "--help"
then
printf 'Convert an openoffice file to unformatted HTML text.\n'
printf 'Usage: %s [infile]\n' "$progname"
exit 1
fi
infile="$1"
# check the input file existence
if test ! -f "$infile"
then
printf '%s: %s: no such file\n' "$progname" "$infile"
exit 1
fi
# We need a temporary directory
if test z"$RECOLL_TMPDIR" != z; then
ttdir=$RECOLL_TMPDIR
elif test z"$TMPDIR" != z ; then
ttdir=$TMPDIR
else
ttdir=/tmp
fi
tmpdir=$ttdir/rclsoff_tmp$$
mkdir $tmpdir || exit 1
mkdir $tmpdir/rclsofftmp || exit 1
cleanup()
{
# Note that we're using a constant part (rclsofftmp), that hopefully
# guarantees that we can't do big mistakes here.
rm -rf $tmpdir/rclsofftmp
rmdir $tmpdir
}
trap cleanup EXIT SIGHUP SIGQUIT SIGINT SIGTERM
# Unzip the input file and change to the unzipped directory
unzip -q -d $tmpdir/rclsofftmp $infile
cd $tmpdir/rclsofftmp
# Note: there can be newlines inside the description field, we don't want
# them...
descsedprog='/<dc:description>/,/<\/dc:description>/{
s!.*<dc:description>!!
s!</dc:description>.*!!
p
}
'
description=`sed -n -e "$descsedprog" < meta.xml | tr '\n' ' '`
subject=`sed -e "s/\"/'/" -e 's/.*<dc:subject>\([^<]*\).*/\1/p;d' < meta.xml`
title=`sed -e "s/\"/'/" -e 's/.*<dc:title>\([^<]*\).*/\1/p;d' < meta.xml`
keywords=`sed -e "s/\"/'/" -e 's/.*<meta:keyword>\([^<]*\).*/\1/p;d' \
< meta.xml`
# Note: next expr inserts a newline at each end of paragraph (for preview)
content="`sed -e 's!</text:p>!\\
!g' -e 's/<[^>]*>/ /g' < content.xml`"
#echo description "$description"
#echo subject "$subject"
#echo title "$title"
#echo keywords "$keywords"
#echo content "$content"
# output the result
echo '<html><head>'
echo '<title>' "$title" '</title>'
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
echo '<meta name="description" content="' "$description $subject" '">'
echo '<meta name="keywords" content="' "$keywords" '">'
echo '</head><body><p>'
echo "$content" | sed -e "s/&apos;/'/g" -e 's/&quot;/"/g' |\
awk '
BEGIN {
esc = 1
}
{
if ($0 ~ /-$/) {
sub(/-$/, "", $0)
printf("%s", $0);
} else if($0 == "\f") {
printf("</p>\n<hr>\n<p>")
} else {
if(esc > 0) {
gsub(/&/, "\\&amp;", $0)
gsub(/</, "\\&lt;", $0)
gsub(/>/, "\\&gt;", $0)
}
printf("%s<br>", $0)
}
}
END {
printf("</p></body></html>\n");
}' | iconv -f UTF-8 -t UTF-8 -c -s
cd /
# exit normally
exit 0

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.4 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.5 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <stdio.h>
#include <sys/stat.h>
@ -22,6 +22,8 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.4 2005-02-04 14:21:17 dockes Exp
#include "transcode.h"
#include "debuglog.h"
#include "internfile.h"
#include "smallut.h"
#include "wipedir.h"
using namespace std;
@ -38,11 +40,21 @@ class DbIndexer {
string dbdir;
list<string> *topdirs;
Rcl::Db db;
string tmpdir;
public:
DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top)
: config(cnf), dbdir(dbd), topdirs(top)
{ }
~DbIndexer() {
if (tmpdir.length()) {
wipedir(tmpdir);
if (rmdir(tmpdir.c_str()) < 0) {
LOGERR(("DbIndexer::~DbIndexer: cant clear temp dir %s\n",
tmpdir.c_str()));
}
}
}
friend FsTreeWalker::Status
indexfile(void *, const std::string &, const struct stat *,
FsTreeWalker::CbFlag);
@ -52,6 +64,12 @@ class DbIndexer {
bool DbIndexer::index()
{
string tdir;
if (!maketmpdir(tmpdir)) {
LOGERR(("DbIndexer: cant create temp directory\n"));
return false;
}
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
LOGERR(("DbIndexer::index: error opening database in %s\n",
dbdir.c_str()));
@ -106,7 +124,7 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
}
Rcl::Doc doc;
if (!internfile(fn, me->config, doc))
if (!internfile(fn, me->config, doc, me->tmpdir))
return FsTreeWalker::FtwOk;
// Set up common fields:

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.4 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <ctype.h>
@ -9,15 +9,34 @@ using std::string;
#include "mimetype.h"
string mimetype(const string &filename, ConfTree *mtypes)
string mimetype(const string &fn, ConfTree *mtypes)
{
if (mtypes == 0)
return "";
// If filename has a suffix and we find it in the map, we're done
string::size_type dot = filename.find_last_of(".");
static list<string> stoplist;
if (stoplist.empty()) {
string stp;
if (mtypes->get(string("recoll_noindex"), stp, "")) {
ConfTree::stringToStrings(stp, stoplist);
}
}
if (!stoplist.empty()) {
for (list<string>::const_iterator it = stoplist.begin();
it != stoplist.end(); it++) {
if (it->length() > fn.length())
continue;
if (!fn.compare(fn.length() - it->length(), string::npos,
*it))
return "";
}
}
// If the file name has a suffix and we find it in the map, we're done
string::size_type dot = fn.find_last_of(".");
if (dot != string::npos) {
string suff = filename.substr(dot);
string suff = fn.substr(dot);
for (unsigned int i = 0; i < suff.length(); i++)
suff[i] = tolower(suff[i]);
@ -25,7 +44,8 @@ string mimetype(const string &filename, ConfTree *mtypes)
if (mtypes->get(suff, mtype, ""))
return mtype;
}
// Look at file data
// Look at file data ? One day maybe
return "";
}

View File

@ -1,5 +1,5 @@
#ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <unistd.h>
#include <sys/types.h>
@ -16,38 +16,17 @@ using namespace std;
#include "mimehandler.h"
#include "execmd.h"
#include "pathut.h"
#include "wipedir.h"
static bool uncompressfile(RclConfig *conf, const string& ifn,
const list<string>& cmdv, string& tdir,
const list<string>& cmdv, const string& tdir,
string& tfile)
{
const char *tmpdir = getenv("RECOLL_TMPDIR");
if (!tmpdir)
tmpdir = getenv("TMPDIR");
if (!tmpdir)
tmpdir = "/tmp";
tdir = tmpdir;
path_cat(tdir, "rcltmpXXXXXX");
{
char *cp = strdup(tdir.c_str());
if (!cp) {
LOGERR(("uncompressfile: out of memory (for file name !)\n"));
return false;
}
if (!mktemp(cp)) {
free(cp);
LOGERR(("uncompressfile: mktemp failed\n"));
return false;
}
tdir = cp;
free(cp);
}
if (mkdir(tdir.c_str(), 0700) < 0) {
LOGERR(("uncompressfile: mkdir %s failed\n", tdir.c_str()));
// Make sure tmp dir is empty. we guarantee this to filters
if (wipedir(tdir) != 0) {
LOGERR(("uncompressfile: can't clear temp dir %s\n", tdir.c_str()));
return false;
}
string cmd = find_filter(conf, cmdv.front());
// Substitute file name and temp dir in command elements
@ -92,32 +71,26 @@ static bool uncompressfile(RclConfig *conf, const string& ifn,
static void tmpcleanup(const string& tdir, const string& tfile)
{
if (tdir.empty())
if (tdir.empty() || tfile.empty())
return;
if (!tfile.empty()) {
if (unlink(tfile.c_str()) < 0) {
LOGERR(("tmpcleanup: unlink(%s) errno %d\n", tfile.c_str(),
errno));
return;
}
}
if (rmdir(tdir.c_str()) < 0) {
LOGERR(("tmpcleanup: rmdir(%s) errno %d\n", tdir.c_str(), errno));
if (unlink(tfile.c_str()) < 0) {
LOGERR(("tmpcleanup: unlink(%s) errno %d\n", tfile.c_str(),
errno));
return;
}
}
bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc)
bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
const string& tdir)
{
string fn = ifn;
string tdir;
string tfile;
MimeHandler *handler = 0;
bool ret = false;
string mime = mimetype(fn, config->getMimeMap());
if (mime.empty()) {
// No mime type ?? pass on.
// No mime type: not listed in our map.
LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str()));
return false;
}

View File

@ -1,6 +1,6 @@
#ifndef _INTERNFILE_H_INCLUDED_
#define _INTERNFILE_H_INCLUDED_
/* @(#$Id: internfile.h,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: internfile.h,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
@ -9,6 +9,6 @@
/// Turn external file into internal representation, according to mime type etc
extern bool internfile(const std::string &fn, RclConfig *config,
Rcl::Doc& doc);
Rcl::Doc& doc, const string& tdir);
#endif /* _INTERNFILE_H_INCLUDED_ */

View File

@ -27,10 +27,15 @@
#include "mimeparse.h"
// The original version for this compresses whitespace and suppresses newlines
// I can see no good reason to do this, and it actually helps preview to keep
// whitespace, especially if the html comes from a filter that generated it
// from text (ie: inside '<pre> tags)
void
MyHtmlParser::process_text(const string &text)
{
if (!in_script_tag && !in_style_tag) {
#if 0
string::size_type b = 0;
while ((b = text.find_first_not_of(WHITESPACE, b)) != string::npos) {
if (pending_space || b != 0)
@ -45,6 +50,11 @@ MyHtmlParser::process_text(const string &text)
dump += text.substr(b, e - b);
b = e + 1;
}
#else
if (pending_space)
dump += ' ';
dump += text;
#endif
}
}

View File

@ -7,14 +7,14 @@ LIBS = librcl.a
all: $(LIBS)
OBJS = conftree.o csguess.o debuglog.o \
execmd.o \
execmd.o wipedir.o \
fstreewalk.o html.o htmlparse.o indexer.o internfile.o \
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
rclconfig.o rcldb.o readfile.o smallut.o \
textsplit.o transcode.o \
unacpp.o unac.o
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
../utils/execmd.cpp \
../utils/execmd.cpp ../utils/wipedir.cpp \
../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
../index/indexer.cpp ../common/internfile.cpp \
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
@ -39,6 +39,8 @@ debuglog.o : ../utils/debuglog.cpp
$(CXX) $(CXXFLAGS) -c $<
execmd.o : ../utils/execmd.cpp
$(CXX) $(CXXFLAGS) -c $<
wipedir.o : ../utils/wipedir.cpp
$(CXX) $(CXXFLAGS) -c $<
fstreewalk.o : ../utils/fstreewalk.cpp
$(CXX) $(CXXFLAGS) -c $<
html.o : ../common/html.cpp

View File

@ -10,10 +10,13 @@
#include "rclconfig.h"
#include "pathut.h"
#include "recoll.h"
#include "smallut.h"
#include "wipedir.h"
RclConfig *rclconfig;
Rcl::Db *rcldb;
int recollNeedsExit;
string tmpdir;
void recollCleanup()
@ -23,10 +26,15 @@ void recollCleanup()
rcldb = 0;
delete rclconfig;
rclconfig = 0;
if (tmpdir.length()) {
wipedir(tmpdir);
rmdir(tmpdir.c_str());
tmpdir.erase();
}
}
static void sigcleanup(int sig)
static void sigcleanup(int)
{
fprintf(stderr, "sigcleanup\n");
// Cant call exit from here, because the atexit cleanup does some
@ -71,6 +79,13 @@ int main( int argc, char ** argv )
QString("No db directory in configuration"));
exit(1);
}
if (!maketmpdir(tmpdir)) {
QMessageBox::critical(0, "Recoll",
QString("Cannot create temporary directory"));
exit(1);
}
dbdir = path_tildexpand(dbdir);
rcldb = new Rcl::Db;

View File

@ -1,6 +1,6 @@
#ifndef _RECOLL_H_INCLUDED_
#define _RECOLL_H_INCLUDED_
/* @(#$Id: recoll.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: recoll.h,v 1.2 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */
#include "rclconfig.h"
#include "rcldb.h"
@ -11,6 +11,7 @@ extern void recollCleanup();
// Misc declarations in need of sharing between the UI files
extern RclConfig *rclconfig;
extern Rcl::Db *rcldb;
extern string tmpdir;
extern int recollNeedsExit;

View File

@ -95,33 +95,25 @@ static string plaintorich(const string &in, const list<string>& terms,
myTextSplitCB cb(terms);
TextSplit splitter(&cb, true);
splitter.text_to_words(in);
string out1;
if (cb.tboffs.empty()) {
out1 = in;
} else {
list<pair<int, int> >::iterator it = cb.tboffs.begin();
for (unsigned int i = 0; i < in.length() ; i++) {
if (it != cb.tboffs.end()) {
if (i == (unsigned int)it->first) {
out1 += "<termtag>";
} else if (i == (unsigned int)it->second) {
if (it != cb.tboffs.end())
it++;
out1 += "</termtag>";
}
}
out1 += in[i];
}
}
string out = "<qt><head><title></title></head><body><p>";
for (string::const_iterator it = out1.begin();it != out1.end(); it++) {
if (*it == '\n') {
out += "<br>";
// out += '\n';
list<pair<int, int> >::iterator it = cb.tboffs.begin();
for (unsigned int i = 0; i < in.length(); i++) {
if (it != cb.tboffs.end()) {
if (i == (unsigned int)it->first) {
out += "<termtag>";
} else if (i == (unsigned int)it->second) {
if (it != cb.tboffs.end())
it++;
out += "</termtag>";
}
}
if (in[i] == '\n') {
out += "<br>\n";
} else {
out += *it;
out += in[i];
}
}
termoffsets = cb.tboffs;
return out;
}
@ -208,7 +200,7 @@ void RecollMain::reslistTE_clicked(int par, int car)
// for preview:
string fn = urltolocalpath(doc.url);
Rcl::Doc fdoc;
if (!internfile(fn, rclconfig, fdoc)) {
if (!internfile(fn, rclconfig, fdoc, tmpdir)) {
QMessageBox::warning(0, "Recoll",
QString("Can't turn doc into internal rep ") +
doc.mimetype.c_str());

View File

@ -1,4 +1,4 @@
# @(#$Id: mimeconf,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimeconf,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll : associations of mime types to processing filters.
# There are different sections for decompression, 'interning' for indexing
@ -29,6 +29,18 @@ text/plain = internal
text/html = internal
application/pdf = exec rclpdf
application/postscript = exec rclps
application/msword = exec rcldoc
application/vnd.sun.xml.calc = exec rclsoff
application/vnd.sun.xml.calc.template = exec rclsoff
application/vnd.sun.xml.draw = exec rclsoff
application/vnd.sun.xml.draw.template = exec rclsoff
application/vnd.sun.xml.impress = exec rclsoff
application/vnd.sun.xml.impress.template = exec rclsoff
application/vnd.sun.xml.math = exec rclsoff
application/vnd.sun.xml.writer = exec rclsoff
application/vnd.sun.xml.writer.global = exec rclsoff
application/vnd.sun.xml.writer.template = exec rclsoff
##
# External viewers, launched when you double-click a result entry
@ -37,3 +49,15 @@ text/plain = xemacs %f
text/html = firefox -a firefox -remote "openFile(%u)"
application/pdf = xpdf %f
application/postscript = gv %f
application/msword = openoffice-1.1.3-swriter
application/vnd.sun.xml.calc = openoffice-1.1.3 %f
application/vnd.sun.xml.calc.template = openoffice-1.1.3 %f
application/vnd.sun.xml.draw = openoffice-1.1.3 %f
application/vnd.sun.xml.draw.template = openoffice-1.1.3 %f
application/vnd.sun.xml.impress = openoffice-1.1.3 %f
application/vnd.sun.xml.impress.template = openoffice-1.1.3 %f
application/vnd.sun.xml.math = openoffice-1.1.3 %f
application/vnd.sun.xml.writer = openoffice-1.1.3 %f
application/vnd.sun.xml.writer.global = openoffice-1.1.3 %f
application/vnd.sun.xml.writer.template = openoffice-1.1.3 %f

View File

@ -1,4 +1,4 @@
# @(#$Id: mimemap,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes
# @(#$Id: mimemap,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
# Recoll: associations of file name extensions to mime types
.txt = text/plain
@ -20,5 +20,25 @@
.Z = application/x-gzip
.bz2 = application/x-bzip2
.doc = application/msword
.sxc = application/vnd.sun.xml.calc
.stc = application/vnd.sun.xml.calc.template
.sxd = application/vnd.sun.xml.draw
.std = application/vnd.sun.xml.draw.template
.sxi = application/vnd.sun.xml.impress
.sti = application/vnd.sun.xml.impress.template
.sxm = application/vnd.sun.xml.math
.sxw = application/vnd.sun.xml.writer
.sxg = application/vnd.sun.xml.writer.global
.stw = application/vnd.sun.xml.writer.template
.wpd = application/vnd.wordperfect
.rtf = text/rtf
# A list of stuff that we don't want to touch at all
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz
[FILE]
# This section for future non suffix-based extension (ie detect mail folders)

View File

@ -2,7 +2,7 @@ include ../mk/sysconf
BIGLIB = ../lib/librcl.a
PROGS = smallut trfstreewalk trpathut transcode trmimeparse trexecmd
PROGS = wipedir smallut trfstreewalk trpathut transcode trmimeparse trexecmd
all: $(PROGS)
FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o
@ -44,5 +44,12 @@ smallut : $(SMALLUT_OBJS)
trsmallut.o : ../utils/smallut.cpp
$(CXX) $(CXXFLAGS) -DTEST_SMALLUT -c -o trsmallut.o \
smallut.cpp
WIPEDIR_OBJS= trwipedir.o $(BIGLIB)
wipedir : $(WIPEDIR_OBJS)
$(CXX) $(CXXFLAGS) -o wipedir $(WIPEDIR_OBJS) $(LIBICONV)
trwipedir.o : ../utils/wipedir.cpp
$(CXX) $(CXXFLAGS) -DTEST_WIPEDIR -c -o trwipedir.o \
wipedir.cpp
clean:
rm -f *.o $(PROGS)

View File

@ -1,14 +1,53 @@
#ifndef lint
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#ifndef TEST_SMALLUT
#include <string>
#include <ctype.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "smallut.h"
#include "debuglog.h"
#include "pathut.h"
#define MIN(A,B) ((A)<(B)?(A):(B))
bool maketmpdir(string& tdir)
{
const char *tmpdir = getenv("RECOLL_TMPDIR");
if (!tmpdir)
tmpdir = getenv("TMPDIR");
if (!tmpdir)
tmpdir = "/tmp";
tdir = tmpdir;
path_cat(tdir, "rcltmpXXXXXX");
{
char *cp = strdup(tdir.c_str());
if (!cp) {
LOGERR(("maketmpdir: out of memory (for file name !)\n"));
tdir.erase();
return false;
}
if (!mktemp(cp)) {
free(cp);
LOGERR(("maketmpdir: mktemp failed\n"));
tdir.erase();
return false;
}
tdir = cp;
free(cp);
}
if (mkdir(tdir.c_str(), 0700) < 0) {
LOGERR(("maketmpdir: mkdir %s failed\n", tdir.c_str()));
tdir.erase();
return false;
}
return true;
}
int stringicmp(const string & s1, const string& s2)
{
string::const_iterator it1 = s1.begin();

View File

@ -1,6 +1,6 @@
#ifndef _SMALLUT_H_INCLUDED_
#define _SMALLUT_H_INCLUDED_
/* @(#$Id: smallut.h,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
/* @(#$Id: smallut.h,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
using std::string;
@ -9,4 +9,6 @@ extern int stringicmp(const string& s1, const string& s2);
extern int stringlowercmp(const string& alreadylower, const string& s2);
extern int stringuppercmp(const string& alreadyupper, const string& s2);
extern bool maketmpdir(string& tdir);
#endif /* _SMALLUT_H_INCLUDED_ */

97
src/utils/wipedir.cpp Normal file
View File

@ -0,0 +1,97 @@
#ifndef lint
static char rcsid[] = "@(#$Id: wipedir.cpp,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#ifndef TEST_WIPEDIR
#include <dirent.h>
#include <sys/stat.h>
#include <errno.h>
#include <string>
using namespace std;
#include "debuglog.h"
#include "pathut.h"
#include "wipedir.h"
int wipedir(const string& dir)
{
struct stat st;
int statret;
int ret = -1;
statret = stat(dir.c_str(), &st);
if (statret == -1) {
LOGERR(("wipedir: cant stat %s, errno %d\n", dir.c_str(), errno));
return -1;
}
if (!S_ISDIR(st.st_mode)) {
LOGERR(("wipedir: %s not a directory\n", dir.c_str()));
return -1;
}
if (access(dir.c_str(), R_OK|W_OK|X_OK) < 0) {
LOGERR(("wipedir: no write access to %s\n", dir.c_str()));
return -1;
}
DIR *d = opendir(dir.c_str());
if (d == 0) {
LOGERR(("wipedir: cant opendir %s, errno %d\n", dir.c_str(), errno));
return -1;
}
int remaining = 0;
struct dirent *ent;
while ((ent = readdir(d)) != 0) {
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
string fn = dir;
path_cat(fn, ent->d_name);
struct stat st;
int statret = stat(fn.c_str(), &st);
if (statret == -1) {
LOGERR(("wipedir: cant stat %s, errno %d\n", fn.c_str(), errno));
goto out;
}
if (S_ISDIR(st.st_mode)) {
remaining++;
} else {
if (unlink(fn.c_str()) < 0) {
LOGERR(("wipedir: cant unlink %s, errno %d\n",
fn.c_str(), errno));
goto out;
}
}
}
ret = remaining;
out:
if (d)
closedir(d);
return ret;
}
#else // FILEUT_TEST
#include <sys/stat.h>
#include "wipedir.h"
using namespace std;
int main(int argc, const char **argv)
{
if (argc != 2) {
fprintf(stderr, "Usage: wipedir <dir>\n");
exit(1);
}
string dir = argv[1];
int cnt = wipedir(dir);
printf("wipedir returned %d\n", cnt);
exit(0);
}
#endif

13
src/utils/wipedir.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef _FILEUT_H_INCLUDED_
#define _FILEUT_H_INCLUDED_
/* @(#$Id: wipedir.h,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
/**
* Remove all files inside directory (not recursive).
* @return 0 if ok, count of remaining entries (ie: subdirs), or -1 for error
*/
int wipedir(const std::string& dirname);
#endif /* _FILEUT_H_INCLUDED_ */