added support for openoffice and word + optimized decomp temp dir usage
This commit is contained in:
parent
8f376bbe37
commit
152d47306e
@ -1 +1 @@
|
||||
0.5
|
||||
0.6
|
||||
|
||||
74
src/filters/rcldoc
Executable file
74
src/filters/rcldoc
Executable file
@ -0,0 +1,74 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rcldoc,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
# Copyright (C) 2003-2004 Mikio Hirabayashi
|
||||
#================================================================
|
||||
#================================================================
|
||||
# rcldoc
|
||||
# Extract text from an msword file by executing either antiword
|
||||
# (or wvware maybe if we need it one day)
|
||||
#
|
||||
# The default is to use antiword, the code would need modifications to
|
||||
# work with wvWare
|
||||
#
|
||||
#================================================================
|
||||
|
||||
|
||||
# set variables
|
||||
LANG=C ; export LANG
|
||||
LC_ALL=C ; export LC_ALL
|
||||
progname="rcldoc"
|
||||
decoder="antiword -i -1 -m UTF-8"
|
||||
# Not ready to use this for now (it outputs html, so the code below has to
|
||||
# be simplified.)
|
||||
#decoder="wvWare -1 -c UTF-8"
|
||||
|
||||
# show help message
|
||||
if test $# -ne 1 -o "$1" = "--help"
|
||||
then
|
||||
printf 'Convert a word file to unformatted HTML text.\n'
|
||||
printf 'Usage: %s [infile]\n' "$progname"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
infile="$1"
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# output the result
|
||||
$decoder "$infile" |
|
||||
awk '
|
||||
BEGIN {
|
||||
printf("<html><head><title></title>\n")
|
||||
printf("<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n")
|
||||
printf("</head>\n<body><p>");
|
||||
esc = 1
|
||||
}
|
||||
{
|
||||
if ($0 ~ /-$/) {
|
||||
sub(/-$/, "", $0)
|
||||
printf("%s", $0);
|
||||
} else if($0 == "\f") {
|
||||
printf("</p>\n<hr>\n<p>")
|
||||
} else {
|
||||
if(esc > 0) {
|
||||
gsub(/&/, "\\&", $0)
|
||||
gsub(/</, "\\<", $0)
|
||||
gsub(/>/, "\\>", $0)
|
||||
}
|
||||
print $0
|
||||
}
|
||||
}
|
||||
END {
|
||||
printf("</p></body></html>\n");
|
||||
}' | iconv -f UTF-8 -t UTF-8 -c -s
|
||||
|
||||
# exit normally
|
||||
exit 0
|
||||
125
src/filters/rclsoff
Executable file
125
src/filters/rclsoff
Executable file
@ -0,0 +1,125 @@
|
||||
#!/bin/sh
|
||||
# @(#$Id: rclsoff,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# Parts taken from Estraier:
|
||||
#================================================================
|
||||
# Estraier: a personal full-text search system
|
||||
# Copyright (C) 2003-2004 Mikio Hirabayashi
|
||||
#================================================================
|
||||
#================================================================
|
||||
# rclsoff
|
||||
# Extract text from an openoffice/soffice file
|
||||
#
|
||||
#================================================================
|
||||
|
||||
|
||||
# set variables
|
||||
LANG=C ; export LANG
|
||||
LC_ALL=C ; export LC_ALL
|
||||
progname="rclsoff"
|
||||
|
||||
|
||||
# show help message
|
||||
if test $# -ne 1 -o "$1" = "--help"
|
||||
then
|
||||
printf 'Convert an openoffice file to unformatted HTML text.\n'
|
||||
printf 'Usage: %s [infile]\n' "$progname"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
infile="$1"
|
||||
|
||||
# check the input file existence
|
||||
if test ! -f "$infile"
|
||||
then
|
||||
printf '%s: %s: no such file\n' "$progname" "$infile"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# We need a temporary directory
|
||||
if test z"$RECOLL_TMPDIR" != z; then
|
||||
ttdir=$RECOLL_TMPDIR
|
||||
elif test z"$TMPDIR" != z ; then
|
||||
ttdir=$TMPDIR
|
||||
else
|
||||
ttdir=/tmp
|
||||
fi
|
||||
tmpdir=$ttdir/rclsoff_tmp$$
|
||||
mkdir $tmpdir || exit 1
|
||||
mkdir $tmpdir/rclsofftmp || exit 1
|
||||
|
||||
cleanup()
|
||||
{
|
||||
# Note that we're using a constant part (rclsofftmp), that hopefully
|
||||
# guarantees that we can't do big mistakes here.
|
||||
rm -rf $tmpdir/rclsofftmp
|
||||
rmdir $tmpdir
|
||||
}
|
||||
|
||||
trap cleanup EXIT SIGHUP SIGQUIT SIGINT SIGTERM
|
||||
|
||||
# Unzip the input file and change to the unzipped directory
|
||||
unzip -q -d $tmpdir/rclsofftmp $infile
|
||||
cd $tmpdir/rclsofftmp
|
||||
|
||||
# Note: there can be newlines inside the description field, we don't want
|
||||
# them...
|
||||
descsedprog='/<dc:description>/,/<\/dc:description>/{
|
||||
s!.*<dc:description>!!
|
||||
s!</dc:description>.*!!
|
||||
p
|
||||
}
|
||||
'
|
||||
description=`sed -n -e "$descsedprog" < meta.xml | tr '\n' ' '`
|
||||
|
||||
subject=`sed -e "s/\"/'/" -e 's/.*<dc:subject>\([^<]*\).*/\1/p;d' < meta.xml`
|
||||
|
||||
title=`sed -e "s/\"/'/" -e 's/.*<dc:title>\([^<]*\).*/\1/p;d' < meta.xml`
|
||||
|
||||
keywords=`sed -e "s/\"/'/" -e 's/.*<meta:keyword>\([^<]*\).*/\1/p;d' \
|
||||
< meta.xml`
|
||||
|
||||
# Note: next expr inserts a newline at each end of paragraph (for preview)
|
||||
content="`sed -e 's!</text:p>!\\
|
||||
!g' -e 's/<[^>]*>/ /g' < content.xml`"
|
||||
|
||||
#echo description "$description"
|
||||
#echo subject "$subject"
|
||||
#echo title "$title"
|
||||
#echo keywords "$keywords"
|
||||
#echo content "$content"
|
||||
|
||||
# output the result
|
||||
echo '<html><head>'
|
||||
echo '<title>' "$title" '</title>'
|
||||
echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
||||
echo '<meta name="description" content="' "$description $subject" '">'
|
||||
echo '<meta name="keywords" content="' "$keywords" '">'
|
||||
echo '</head><body><p>'
|
||||
|
||||
echo "$content" | sed -e "s/'/'/g" -e 's/"/"/g' |\
|
||||
awk '
|
||||
BEGIN {
|
||||
esc = 1
|
||||
}
|
||||
{
|
||||
if ($0 ~ /-$/) {
|
||||
sub(/-$/, "", $0)
|
||||
printf("%s", $0);
|
||||
} else if($0 == "\f") {
|
||||
printf("</p>\n<hr>\n<p>")
|
||||
} else {
|
||||
if(esc > 0) {
|
||||
gsub(/&/, "\\&", $0)
|
||||
gsub(/</, "\\<", $0)
|
||||
gsub(/>/, "\\>", $0)
|
||||
}
|
||||
printf("%s<br>", $0)
|
||||
}
|
||||
}
|
||||
END {
|
||||
printf("</p></body></html>\n");
|
||||
}' | iconv -f UTF-8 -t UTF-8 -c -s
|
||||
|
||||
cd /
|
||||
# exit normally
|
||||
exit 0
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.4 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.5 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -22,6 +22,8 @@ static char rcsid[] = "@(#$Id: indexer.cpp,v 1.4 2005-02-04 14:21:17 dockes Exp
|
||||
#include "transcode.h"
|
||||
#include "debuglog.h"
|
||||
#include "internfile.h"
|
||||
#include "smallut.h"
|
||||
#include "wipedir.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -38,11 +40,21 @@ class DbIndexer {
|
||||
string dbdir;
|
||||
list<string> *topdirs;
|
||||
Rcl::Db db;
|
||||
string tmpdir;
|
||||
public:
|
||||
DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top)
|
||||
: config(cnf), dbdir(dbd), topdirs(top)
|
||||
{ }
|
||||
|
||||
~DbIndexer() {
|
||||
if (tmpdir.length()) {
|
||||
wipedir(tmpdir);
|
||||
if (rmdir(tmpdir.c_str()) < 0) {
|
||||
LOGERR(("DbIndexer::~DbIndexer: cant clear temp dir %s\n",
|
||||
tmpdir.c_str()));
|
||||
}
|
||||
}
|
||||
}
|
||||
friend FsTreeWalker::Status
|
||||
indexfile(void *, const std::string &, const struct stat *,
|
||||
FsTreeWalker::CbFlag);
|
||||
@ -52,6 +64,12 @@ class DbIndexer {
|
||||
|
||||
bool DbIndexer::index()
|
||||
{
|
||||
string tdir;
|
||||
|
||||
if (!maketmpdir(tmpdir)) {
|
||||
LOGERR(("DbIndexer: cant create temp directory\n"));
|
||||
return false;
|
||||
}
|
||||
if (!db.open(dbdir, Rcl::Db::DbUpd)) {
|
||||
LOGERR(("DbIndexer::index: error opening database in %s\n",
|
||||
dbdir.c_str()));
|
||||
@ -106,7 +124,7 @@ indexfile(void *cdata, const std::string &fn, const struct stat *stp,
|
||||
}
|
||||
|
||||
Rcl::Doc doc;
|
||||
if (!internfile(fn, me->config, doc))
|
||||
if (!internfile(fn, me->config, doc, me->tmpdir))
|
||||
return FsTreeWalker::FtwOk;
|
||||
|
||||
// Set up common fields:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: mimetype.cpp,v 1.4 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
@ -9,15 +9,34 @@ using std::string;
|
||||
|
||||
#include "mimetype.h"
|
||||
|
||||
string mimetype(const string &filename, ConfTree *mtypes)
|
||||
string mimetype(const string &fn, ConfTree *mtypes)
|
||||
{
|
||||
if (mtypes == 0)
|
||||
return "";
|
||||
|
||||
// If filename has a suffix and we find it in the map, we're done
|
||||
string::size_type dot = filename.find_last_of(".");
|
||||
static list<string> stoplist;
|
||||
if (stoplist.empty()) {
|
||||
string stp;
|
||||
if (mtypes->get(string("recoll_noindex"), stp, "")) {
|
||||
ConfTree::stringToStrings(stp, stoplist);
|
||||
}
|
||||
}
|
||||
|
||||
if (!stoplist.empty()) {
|
||||
for (list<string>::const_iterator it = stoplist.begin();
|
||||
it != stoplist.end(); it++) {
|
||||
if (it->length() > fn.length())
|
||||
continue;
|
||||
if (!fn.compare(fn.length() - it->length(), string::npos,
|
||||
*it))
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// If the file name has a suffix and we find it in the map, we're done
|
||||
string::size_type dot = fn.find_last_of(".");
|
||||
if (dot != string::npos) {
|
||||
string suff = filename.substr(dot);
|
||||
string suff = fn.substr(dot);
|
||||
for (unsigned int i = 0; i < suff.length(); i++)
|
||||
suff[i] = tolower(suff[i]);
|
||||
|
||||
@ -25,7 +44,8 @@ string mimetype(const string &filename, ConfTree *mtypes)
|
||||
if (mtypes->get(suff, mtype, ""))
|
||||
return mtype;
|
||||
}
|
||||
// Look at file data
|
||||
|
||||
// Look at file data ? One day maybe
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
@ -16,38 +16,17 @@ using namespace std;
|
||||
#include "mimehandler.h"
|
||||
#include "execmd.h"
|
||||
#include "pathut.h"
|
||||
#include "wipedir.h"
|
||||
|
||||
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
||||
const list<string>& cmdv, string& tdir,
|
||||
const list<string>& cmdv, const string& tdir,
|
||||
string& tfile)
|
||||
{
|
||||
const char *tmpdir = getenv("RECOLL_TMPDIR");
|
||||
if (!tmpdir)
|
||||
tmpdir = getenv("TMPDIR");
|
||||
if (!tmpdir)
|
||||
tmpdir = "/tmp";
|
||||
tdir = tmpdir;
|
||||
path_cat(tdir, "rcltmpXXXXXX");
|
||||
{
|
||||
char *cp = strdup(tdir.c_str());
|
||||
if (!cp) {
|
||||
LOGERR(("uncompressfile: out of memory (for file name !)\n"));
|
||||
return false;
|
||||
}
|
||||
if (!mktemp(cp)) {
|
||||
free(cp);
|
||||
LOGERR(("uncompressfile: mktemp failed\n"));
|
||||
return false;
|
||||
}
|
||||
tdir = cp;
|
||||
free(cp);
|
||||
}
|
||||
|
||||
if (mkdir(tdir.c_str(), 0700) < 0) {
|
||||
LOGERR(("uncompressfile: mkdir %s failed\n", tdir.c_str()));
|
||||
// Make sure tmp dir is empty. we guarantee this to filters
|
||||
if (wipedir(tdir) != 0) {
|
||||
LOGERR(("uncompressfile: can't clear temp dir %s\n", tdir.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
string cmd = find_filter(conf, cmdv.front());
|
||||
|
||||
// Substitute file name and temp dir in command elements
|
||||
@ -92,32 +71,26 @@ static bool uncompressfile(RclConfig *conf, const string& ifn,
|
||||
|
||||
static void tmpcleanup(const string& tdir, const string& tfile)
|
||||
{
|
||||
if (tdir.empty())
|
||||
if (tdir.empty() || tfile.empty())
|
||||
return;
|
||||
if (!tfile.empty()) {
|
||||
if (unlink(tfile.c_str()) < 0) {
|
||||
LOGERR(("tmpcleanup: unlink(%s) errno %d\n", tfile.c_str(),
|
||||
errno));
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (rmdir(tdir.c_str()) < 0) {
|
||||
LOGERR(("tmpcleanup: rmdir(%s) errno %d\n", tdir.c_str(), errno));
|
||||
if (unlink(tfile.c_str()) < 0) {
|
||||
LOGERR(("tmpcleanup: unlink(%s) errno %d\n", tfile.c_str(),
|
||||
errno));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc)
|
||||
bool internfile(const std::string &ifn, RclConfig *config, Rcl::Doc& doc,
|
||||
const string& tdir)
|
||||
{
|
||||
string fn = ifn;
|
||||
string tdir;
|
||||
string tfile;
|
||||
MimeHandler *handler = 0;
|
||||
bool ret = false;
|
||||
|
||||
string mime = mimetype(fn, config->getMimeMap());
|
||||
if (mime.empty()) {
|
||||
// No mime type ?? pass on.
|
||||
// No mime type: not listed in our map.
|
||||
LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str()));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _INTERNFILE_H_INCLUDED_
|
||||
#define _INTERNFILE_H_INCLUDED_
|
||||
/* @(#$Id: internfile.h,v 1.1 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: internfile.h,v 1.2 2005-02-09 12:07:29 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
@ -9,6 +9,6 @@
|
||||
|
||||
/// Turn external file into internal representation, according to mime type etc
|
||||
extern bool internfile(const std::string &fn, RclConfig *config,
|
||||
Rcl::Doc& doc);
|
||||
Rcl::Doc& doc, const string& tdir);
|
||||
|
||||
#endif /* _INTERNFILE_H_INCLUDED_ */
|
||||
|
||||
@ -27,10 +27,15 @@
|
||||
|
||||
#include "mimeparse.h"
|
||||
|
||||
// The original version for this compresses whitespace and suppresses newlines
|
||||
// I can see no good reason to do this, and it actually helps preview to keep
|
||||
// whitespace, especially if the html comes from a filter that generated it
|
||||
// from text (ie: inside '<pre> tags)
|
||||
void
|
||||
MyHtmlParser::process_text(const string &text)
|
||||
{
|
||||
if (!in_script_tag && !in_style_tag) {
|
||||
#if 0
|
||||
string::size_type b = 0;
|
||||
while ((b = text.find_first_not_of(WHITESPACE, b)) != string::npos) {
|
||||
if (pending_space || b != 0)
|
||||
@ -45,6 +50,11 @@ MyHtmlParser::process_text(const string &text)
|
||||
dump += text.substr(b, e - b);
|
||||
b = e + 1;
|
||||
}
|
||||
#else
|
||||
if (pending_space)
|
||||
dump += ' ';
|
||||
dump += text;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -7,14 +7,14 @@ LIBS = librcl.a
|
||||
all: $(LIBS)
|
||||
|
||||
OBJS = conftree.o csguess.o debuglog.o \
|
||||
execmd.o \
|
||||
execmd.o wipedir.o \
|
||||
fstreewalk.o html.o htmlparse.o indexer.o internfile.o \
|
||||
mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
|
||||
rclconfig.o rcldb.o readfile.o smallut.o \
|
||||
textsplit.o transcode.o \
|
||||
unacpp.o unac.o
|
||||
SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
|
||||
../utils/execmd.cpp \
|
||||
../utils/execmd.cpp ../utils/wipedir.cpp \
|
||||
../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
|
||||
../index/indexer.cpp ../common/internfile.cpp \
|
||||
../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
|
||||
@ -39,6 +39,8 @@ debuglog.o : ../utils/debuglog.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
execmd.o : ../utils/execmd.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
wipedir.o : ../utils/wipedir.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
fstreewalk.o : ../utils/fstreewalk.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $<
|
||||
html.o : ../common/html.cpp
|
||||
|
||||
@ -10,10 +10,13 @@
|
||||
#include "rclconfig.h"
|
||||
#include "pathut.h"
|
||||
#include "recoll.h"
|
||||
#include "smallut.h"
|
||||
#include "wipedir.h"
|
||||
|
||||
RclConfig *rclconfig;
|
||||
Rcl::Db *rcldb;
|
||||
int recollNeedsExit;
|
||||
string tmpdir;
|
||||
|
||||
|
||||
void recollCleanup()
|
||||
@ -23,10 +26,15 @@ void recollCleanup()
|
||||
rcldb = 0;
|
||||
delete rclconfig;
|
||||
rclconfig = 0;
|
||||
if (tmpdir.length()) {
|
||||
wipedir(tmpdir);
|
||||
rmdir(tmpdir.c_str());
|
||||
tmpdir.erase();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void sigcleanup(int sig)
|
||||
static void sigcleanup(int)
|
||||
{
|
||||
fprintf(stderr, "sigcleanup\n");
|
||||
// Cant call exit from here, because the atexit cleanup does some
|
||||
@ -71,6 +79,13 @@ int main( int argc, char ** argv )
|
||||
QString("No db directory in configuration"));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!maketmpdir(tmpdir)) {
|
||||
QMessageBox::critical(0, "Recoll",
|
||||
QString("Cannot create temporary directory"));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
dbdir = path_tildexpand(dbdir);
|
||||
|
||||
rcldb = new Rcl::Db;
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _RECOLL_H_INCLUDED_
|
||||
#define _RECOLL_H_INCLUDED_
|
||||
/* @(#$Id: recoll.h,v 1.1 2005-02-01 17:20:05 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: recoll.h,v 1.2 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include "rclconfig.h"
|
||||
#include "rcldb.h"
|
||||
@ -11,6 +11,7 @@ extern void recollCleanup();
|
||||
// Misc declarations in need of sharing between the UI files
|
||||
extern RclConfig *rclconfig;
|
||||
extern Rcl::Db *rcldb;
|
||||
extern string tmpdir;
|
||||
|
||||
extern int recollNeedsExit;
|
||||
|
||||
|
||||
@ -95,33 +95,25 @@ static string plaintorich(const string &in, const list<string>& terms,
|
||||
myTextSplitCB cb(terms);
|
||||
TextSplit splitter(&cb, true);
|
||||
splitter.text_to_words(in);
|
||||
string out1;
|
||||
if (cb.tboffs.empty()) {
|
||||
out1 = in;
|
||||
} else {
|
||||
list<pair<int, int> >::iterator it = cb.tboffs.begin();
|
||||
for (unsigned int i = 0; i < in.length() ; i++) {
|
||||
if (it != cb.tboffs.end()) {
|
||||
if (i == (unsigned int)it->first) {
|
||||
out1 += "<termtag>";
|
||||
} else if (i == (unsigned int)it->second) {
|
||||
if (it != cb.tboffs.end())
|
||||
it++;
|
||||
out1 += "</termtag>";
|
||||
}
|
||||
}
|
||||
out1 += in[i];
|
||||
}
|
||||
}
|
||||
string out = "<qt><head><title></title></head><body><p>";
|
||||
for (string::const_iterator it = out1.begin();it != out1.end(); it++) {
|
||||
if (*it == '\n') {
|
||||
out += "<br>";
|
||||
// out += '\n';
|
||||
list<pair<int, int> >::iterator it = cb.tboffs.begin();
|
||||
for (unsigned int i = 0; i < in.length(); i++) {
|
||||
if (it != cb.tboffs.end()) {
|
||||
if (i == (unsigned int)it->first) {
|
||||
out += "<termtag>";
|
||||
} else if (i == (unsigned int)it->second) {
|
||||
if (it != cb.tboffs.end())
|
||||
it++;
|
||||
out += "</termtag>";
|
||||
}
|
||||
}
|
||||
if (in[i] == '\n') {
|
||||
out += "<br>\n";
|
||||
} else {
|
||||
out += *it;
|
||||
out += in[i];
|
||||
}
|
||||
}
|
||||
|
||||
termoffsets = cb.tboffs;
|
||||
return out;
|
||||
}
|
||||
@ -208,7 +200,7 @@ void RecollMain::reslistTE_clicked(int par, int car)
|
||||
// for preview:
|
||||
string fn = urltolocalpath(doc.url);
|
||||
Rcl::Doc fdoc;
|
||||
if (!internfile(fn, rclconfig, fdoc)) {
|
||||
if (!internfile(fn, rclconfig, fdoc, tmpdir)) {
|
||||
QMessageBox::warning(0, "Recoll",
|
||||
QString("Can't turn doc into internal rep ") +
|
||||
doc.mimetype.c_str());
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: mimeconf,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: mimeconf,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll : associations of mime types to processing filters.
|
||||
# There are different sections for decompression, 'interning' for indexing
|
||||
@ -29,6 +29,18 @@ text/plain = internal
|
||||
text/html = internal
|
||||
application/pdf = exec rclpdf
|
||||
application/postscript = exec rclps
|
||||
application/msword = exec rcldoc
|
||||
|
||||
application/vnd.sun.xml.calc = exec rclsoff
|
||||
application/vnd.sun.xml.calc.template = exec rclsoff
|
||||
application/vnd.sun.xml.draw = exec rclsoff
|
||||
application/vnd.sun.xml.draw.template = exec rclsoff
|
||||
application/vnd.sun.xml.impress = exec rclsoff
|
||||
application/vnd.sun.xml.impress.template = exec rclsoff
|
||||
application/vnd.sun.xml.math = exec rclsoff
|
||||
application/vnd.sun.xml.writer = exec rclsoff
|
||||
application/vnd.sun.xml.writer.global = exec rclsoff
|
||||
application/vnd.sun.xml.writer.template = exec rclsoff
|
||||
|
||||
##
|
||||
# External viewers, launched when you double-click a result entry
|
||||
@ -37,3 +49,15 @@ text/plain = xemacs %f
|
||||
text/html = firefox -a firefox -remote "openFile(%u)"
|
||||
application/pdf = xpdf %f
|
||||
application/postscript = gv %f
|
||||
application/msword = openoffice-1.1.3-swriter
|
||||
|
||||
application/vnd.sun.xml.calc = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.calc.template = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.draw = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.draw.template = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.impress = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.impress.template = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.math = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.writer = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.writer.global = openoffice-1.1.3 %f
|
||||
application/vnd.sun.xml.writer.template = openoffice-1.1.3 %f
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# @(#$Id: mimemap,v 1.2 2005-02-04 09:30:44 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
# @(#$Id: mimemap,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes
|
||||
|
||||
# Recoll: associations of file name extensions to mime types
|
||||
.txt = text/plain
|
||||
@ -20,5 +20,25 @@
|
||||
.Z = application/x-gzip
|
||||
.bz2 = application/x-bzip2
|
||||
|
||||
.doc = application/msword
|
||||
|
||||
.sxc = application/vnd.sun.xml.calc
|
||||
.stc = application/vnd.sun.xml.calc.template
|
||||
.sxd = application/vnd.sun.xml.draw
|
||||
.std = application/vnd.sun.xml.draw.template
|
||||
.sxi = application/vnd.sun.xml.impress
|
||||
.sti = application/vnd.sun.xml.impress.template
|
||||
.sxm = application/vnd.sun.xml.math
|
||||
.sxw = application/vnd.sun.xml.writer
|
||||
.sxg = application/vnd.sun.xml.writer.global
|
||||
.stw = application/vnd.sun.xml.writer.template
|
||||
|
||||
.wpd = application/vnd.wordperfect
|
||||
.rtf = text/rtf
|
||||
|
||||
|
||||
# A list of stuff that we don't want to touch at all
|
||||
recoll_noindex = .tar.gz .tgz .tar.bz2 .tbz
|
||||
|
||||
[FILE]
|
||||
# This section for future non suffix-based extension (ie detect mail folders)
|
||||
|
||||
@ -2,7 +2,7 @@ include ../mk/sysconf
|
||||
|
||||
BIGLIB = ../lib/librcl.a
|
||||
|
||||
PROGS = smallut trfstreewalk trpathut transcode trmimeparse trexecmd
|
||||
PROGS = wipedir smallut trfstreewalk trpathut transcode trmimeparse trexecmd
|
||||
all: $(PROGS)
|
||||
|
||||
FSTREEWALK_OBJS= trfstreewalk.o fstreewalk.o pathut.o
|
||||
@ -44,5 +44,12 @@ smallut : $(SMALLUT_OBJS)
|
||||
trsmallut.o : ../utils/smallut.cpp
|
||||
$(CXX) $(CXXFLAGS) -DTEST_SMALLUT -c -o trsmallut.o \
|
||||
smallut.cpp
|
||||
|
||||
WIPEDIR_OBJS= trwipedir.o $(BIGLIB)
|
||||
wipedir : $(WIPEDIR_OBJS)
|
||||
$(CXX) $(CXXFLAGS) -o wipedir $(WIPEDIR_OBJS) $(LIBICONV)
|
||||
trwipedir.o : ../utils/wipedir.cpp
|
||||
$(CXX) $(CXXFLAGS) -DTEST_WIPEDIR -c -o trwipedir.o \
|
||||
wipedir.cpp
|
||||
clean:
|
||||
rm -f *.o $(PROGS)
|
||||
|
||||
@ -1,14 +1,53 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
static char rcsid[] = "@(#$Id: smallut.cpp,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
#ifndef TEST_SMALLUT
|
||||
#include <string>
|
||||
#include <ctype.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "smallut.h"
|
||||
#include "debuglog.h"
|
||||
#include "pathut.h"
|
||||
|
||||
#define MIN(A,B) ((A)<(B)?(A):(B))
|
||||
|
||||
bool maketmpdir(string& tdir)
|
||||
{
|
||||
const char *tmpdir = getenv("RECOLL_TMPDIR");
|
||||
if (!tmpdir)
|
||||
tmpdir = getenv("TMPDIR");
|
||||
if (!tmpdir)
|
||||
tmpdir = "/tmp";
|
||||
tdir = tmpdir;
|
||||
path_cat(tdir, "rcltmpXXXXXX");
|
||||
{
|
||||
char *cp = strdup(tdir.c_str());
|
||||
if (!cp) {
|
||||
LOGERR(("maketmpdir: out of memory (for file name !)\n"));
|
||||
tdir.erase();
|
||||
return false;
|
||||
}
|
||||
if (!mktemp(cp)) {
|
||||
free(cp);
|
||||
LOGERR(("maketmpdir: mktemp failed\n"));
|
||||
tdir.erase();
|
||||
return false;
|
||||
}
|
||||
tdir = cp;
|
||||
free(cp);
|
||||
}
|
||||
|
||||
if (mkdir(tdir.c_str(), 0700) < 0) {
|
||||
LOGERR(("maketmpdir: mkdir %s failed\n", tdir.c_str()));
|
||||
tdir.erase();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int stringicmp(const string & s1, const string& s2)
|
||||
{
|
||||
string::const_iterator it1 = s1.begin();
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#ifndef _SMALLUT_H_INCLUDED_
|
||||
#define _SMALLUT_H_INCLUDED_
|
||||
/* @(#$Id: smallut.h,v 1.2 2005-02-04 09:39:44 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
/* @(#$Id: smallut.h,v 1.3 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
#include <string>
|
||||
|
||||
using std::string;
|
||||
@ -9,4 +9,6 @@ extern int stringicmp(const string& s1, const string& s2);
|
||||
extern int stringlowercmp(const string& alreadylower, const string& s2);
|
||||
extern int stringuppercmp(const string& alreadyupper, const string& s2);
|
||||
|
||||
extern bool maketmpdir(string& tdir);
|
||||
|
||||
#endif /* _SMALLUT_H_INCLUDED_ */
|
||||
|
||||
97
src/utils/wipedir.cpp
Normal file
97
src/utils/wipedir.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
#ifndef lint
|
||||
static char rcsid[] = "@(#$Id: wipedir.cpp,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes";
|
||||
#endif
|
||||
|
||||
#ifndef TEST_WIPEDIR
|
||||
#include <dirent.h>
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#include "debuglog.h"
|
||||
#include "pathut.h"
|
||||
#include "wipedir.h"
|
||||
|
||||
int wipedir(const string& dir)
|
||||
{
|
||||
struct stat st;
|
||||
int statret;
|
||||
int ret = -1;
|
||||
|
||||
statret = stat(dir.c_str(), &st);
|
||||
if (statret == -1) {
|
||||
LOGERR(("wipedir: cant stat %s, errno %d\n", dir.c_str(), errno));
|
||||
return -1;
|
||||
}
|
||||
if (!S_ISDIR(st.st_mode)) {
|
||||
LOGERR(("wipedir: %s not a directory\n", dir.c_str()));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (access(dir.c_str(), R_OK|W_OK|X_OK) < 0) {
|
||||
LOGERR(("wipedir: no write access to %s\n", dir.c_str()));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DIR *d = opendir(dir.c_str());
|
||||
if (d == 0) {
|
||||
LOGERR(("wipedir: cant opendir %s, errno %d\n", dir.c_str(), errno));
|
||||
return -1;
|
||||
}
|
||||
int remaining = 0;
|
||||
struct dirent *ent;
|
||||
while ((ent = readdir(d)) != 0) {
|
||||
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
||||
continue;
|
||||
|
||||
string fn = dir;
|
||||
path_cat(fn, ent->d_name);
|
||||
|
||||
struct stat st;
|
||||
int statret = stat(fn.c_str(), &st);
|
||||
if (statret == -1) {
|
||||
LOGERR(("wipedir: cant stat %s, errno %d\n", fn.c_str(), errno));
|
||||
goto out;
|
||||
}
|
||||
if (S_ISDIR(st.st_mode)) {
|
||||
remaining++;
|
||||
} else {
|
||||
if (unlink(fn.c_str()) < 0) {
|
||||
LOGERR(("wipedir: cant unlink %s, errno %d\n",
|
||||
fn.c_str(), errno));
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = remaining;
|
||||
out:
|
||||
if (d)
|
||||
closedir(d);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#else // FILEUT_TEST
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "wipedir.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: wipedir <dir>\n");
|
||||
exit(1);
|
||||
}
|
||||
string dir = argv[1];
|
||||
int cnt = wipedir(dir);
|
||||
printf("wipedir returned %d\n", cnt);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
13
src/utils/wipedir.h
Normal file
13
src/utils/wipedir.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef _FILEUT_H_INCLUDED_
|
||||
#define _FILEUT_H_INCLUDED_
|
||||
/* @(#$Id: wipedir.h,v 1.1 2005-02-09 12:07:30 dockes Exp $ (C) 2004 J.F.Dockes */
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Remove all files inside directory (not recursive).
|
||||
* @return 0 if ok, count of remaining entries (ie: subdirs), or -1 for error
|
||||
*/
|
||||
int wipedir(const std::string& dirname);
|
||||
|
||||
#endif /* _FILEUT_H_INCLUDED_ */
|
||||
Loading…
x
Reference in New Issue
Block a user