internal xslt working for single-sheet (abw). Still leaking memory?
This commit is contained in:
parent
2bd4b5ef4f
commit
00c0c5168b
@ -2,6 +2,8 @@
|
||||
CXXFLAGS ?= @CXXFLAGS@
|
||||
LIBXAPIAN=@LIBXAPIAN@
|
||||
XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
|
||||
XSLT_CFLAGS=@XSLT_CFLAGS@
|
||||
XSLT_LINKADD=@XSLT_LINKADD@
|
||||
LIBICONV=@LIBICONV@
|
||||
INCICONV=@INCICONV@
|
||||
LIBFAM = @LIBFAM@
|
||||
@ -29,8 +31,10 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
|
||||
$(COMMONCPPFLAGS) \
|
||||
$(INCICONV) \
|
||||
$(XAPIANCXXFLAGS) \
|
||||
$(XSLT_CFLAGS) \
|
||||
$(X_CFLAGS) \
|
||||
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
|
||||
-DREADFILE_ENABLE_ZLIB -DREADFILE_ENABLE_MINIZ \
|
||||
-D_GNU_SOURCE \
|
||||
$(DEFS)
|
||||
|
||||
@ -121,6 +125,8 @@ internfile/mh_symlink.h \
|
||||
internfile/mh_text.cpp \
|
||||
internfile/mh_text.h \
|
||||
internfile/mh_unknown.h \
|
||||
internfile/mh_xslt.cpp \
|
||||
internfile/mh_xslt.h \
|
||||
internfile/mimehandler.cpp \
|
||||
internfile/mimehandler.h \
|
||||
internfile/myhtmlparse.cpp \
|
||||
@ -224,6 +230,8 @@ utils/md5ut.cpp \
|
||||
utils/md5ut.h \
|
||||
utils/mimeparse.cpp \
|
||||
utils/mimeparse.h \
|
||||
utils/miniz.cpp \
|
||||
utils/miniz.h \
|
||||
utils/netcon.cpp \
|
||||
utils/netcon.h \
|
||||
utils/pathut.cpp \
|
||||
@ -262,7 +270,7 @@ AM_YFLAGS = -d
|
||||
librecoll_la_LDFLAGS = -release $(VERSION) \
|
||||
-Wl,--no-undefined -Wl,--warn-unresolved-symbols
|
||||
|
||||
librecoll_la_LIBADD = $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS)
|
||||
librecoll_la_LIBADD = $(XSLT_LINKADD) $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS)
|
||||
|
||||
# There is probably a better way to do this. The KIO needs to be linked
|
||||
# with librecoll, but librecoll is installed into a non-standard place
|
||||
@ -640,6 +648,7 @@ sampleconf/mimeview
|
||||
filterdir = $(pkgdatadir)/filters
|
||||
filter_DATA = \
|
||||
desktop/hotrecoll.py \
|
||||
filters/abiword.xsl \
|
||||
filters/rcl7z \
|
||||
filters/rclabw.py \
|
||||
filters/rclaptosidman \
|
||||
|
||||
@ -321,6 +321,21 @@ XAPIANCXXFLAGS=`$XAPIAN_CONFIG --cxxflags`
|
||||
#echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA
|
||||
#echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS
|
||||
|
||||
|
||||
XSLT_CONFIG=${XSLT_CONFIG:-no}
|
||||
if test "$XSLT_CONFIG" = "no"; then
|
||||
AC_PATH_PROG(XSLT_CONFIG0, [xslt-config], no)
|
||||
XSLT_CONFIG=$XSLT_CONFIG0
|
||||
fi
|
||||
if test "$XSLT_CONFIG" = "no" ; then
|
||||
AC_MSG_ERROR([Cannot find xslt-config command in $PATH. Is
|
||||
libxslt installed ?])
|
||||
exit 1
|
||||
fi
|
||||
|
||||
XSLT_CFLAGS=`xslt-config --cflags`
|
||||
XSLT_LINKADD=`xslt-config --libs`
|
||||
|
||||
AC_ARG_ENABLE(xadump,
|
||||
AC_HELP_STRING([--enable-xadump],
|
||||
[Enable building the xadump low level Xapian access program.]),
|
||||
@ -527,6 +542,8 @@ AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
|
||||
AC_SUBST(LIBQZEITGEIST)
|
||||
AC_SUBST(RCLVERSION)
|
||||
AC_SUBST(RCLLIBVERSION)
|
||||
AC_SUBST(XSLT_CFLAGS)
|
||||
AC_SUBST(XSLT_LINKADD)
|
||||
|
||||
# All object files depend on localdefs which has the cc flags. Avoid
|
||||
# changing it unless necessary
|
||||
|
||||
88
src/filters/abiword.xsl
Normal file
88
src/filters/abiword.xsl
Normal file
@ -0,0 +1,88 @@
|
||||
<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:ab="http://www.abisource.com/awml.dtd"
|
||||
exclude-result-prefixes="ab"
|
||||
>
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<xsl:apply-templates select="ab:abiword/ab:metadata"/>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<!-- This is for the older abiword format with no namespaces -->
|
||||
<xsl:for-each select="abiword/section">
|
||||
<xsl:apply-templates select="p"/>
|
||||
</xsl:for-each>
|
||||
|
||||
<!-- Newer namespaced format -->
|
||||
<xsl:for-each select="ab:abiword/ab:section">
|
||||
<xsl:for-each select="ab:p">
|
||||
<p><xsl:value-of select="."/></p><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:for-each>
|
||||
</xsl:for-each>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="p">
|
||||
<p><xsl:value-of select="."/></p><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="ab:metadata">
|
||||
<xsl:for-each select="ab:m">
|
||||
<xsl:choose>
|
||||
<xsl:when test="@key = 'dc.creator'">
|
||||
<meta>
|
||||
<xsl:attribute name="name">author</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="@key = 'abiword.keywords'">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="@key = 'dc.subject'">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="@key = 'dc.description'">
|
||||
<meta>
|
||||
<xsl:attribute name="name">abstract</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:when test="@key = 'dc.title'">
|
||||
<title><xsl:value-of select="."/></title><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
@ -38,9 +38,7 @@ public:
|
||||
MimeHandlerMail(RclConfig *cnf, const std::string &id);
|
||||
virtual ~MimeHandlerMail();
|
||||
virtual bool is_data_input_ok(DataInput input) const {
|
||||
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
||||
return true;
|
||||
return false;
|
||||
return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
|
||||
}
|
||||
virtual bool next_document();
|
||||
virtual bool skip_to_document(const std::string& ipath);
|
||||
|
||||
232
src/internfile/mh_xslt.cpp
Normal file
232
src/internfile/mh_xslt.cpp
Normal file
@ -0,0 +1,232 @@
|
||||
/* Copyright (C) 2005 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <libxml/parser.h>
|
||||
#include <libxml/tree.h>
|
||||
#include <libxslt/transform.h>
|
||||
#include <libxslt/xsltInternals.h>
|
||||
#include <libxslt/xsltutils.h>
|
||||
|
||||
#include "cstr.h"
|
||||
#include "mh_xslt.h"
|
||||
#include "log.h"
|
||||
#include "smallut.h"
|
||||
#include "md5ut.h"
|
||||
#include "rclconfig.h"
|
||||
#include "readfile.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
class FileScanXML : public FileScanDo {
|
||||
public:
|
||||
FileScanXML(const string& fn) : m_fn(fn) {}
|
||||
virtual ~FileScanXML() {
|
||||
if (ctxt) {
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
xmlDocPtr getDoc() {
|
||||
int ret;
|
||||
if ((ret = xmlParseChunk(ctxt, nullptr, 0, 1))) {
|
||||
xmlError *error = xmlGetLastError();
|
||||
LOGERR("FileScanXML: final xmlParseChunk failed with error " <<
|
||||
ret << " error: " <<
|
||||
(error ? error->message :
|
||||
" null return from xmlGetLastError()") << "\n");
|
||||
return nullptr;
|
||||
}
|
||||
return ctxt->myDoc;
|
||||
}
|
||||
|
||||
virtual bool init(int64_t size, string *) {
|
||||
LOGDEB1("FileScanXML: init: size " << size << endl);
|
||||
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, m_fn.c_str());
|
||||
if (ctxt == nullptr) {
|
||||
LOGERR("FileScanXML: xmlCreatePushParserCtxt failed\n");
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool data(const char *buf, int cnt, string*) {
|
||||
if (0) {
|
||||
string dt(buf, cnt);
|
||||
LOGDEB1("FileScanXML: data: cnt " << cnt << " data " << dt << endl);
|
||||
} else {
|
||||
LOGDEB1("FileScanXML: data: cnt " << cnt << endl);
|
||||
}
|
||||
int ret;
|
||||
if ((ret = xmlParseChunk(ctxt, buf, cnt, 0))) {
|
||||
xmlError *error = xmlGetLastError();
|
||||
LOGERR("FileScanXML: xmlParseChunk failed with error " <<
|
||||
ret << " for [" << buf << "] error " <<
|
||||
(error ? error->message :
|
||||
" null return from xmlGetLastError()") << "\n");
|
||||
return false;
|
||||
} else {
|
||||
LOGDEB1("xmlParseChunk ok (sent " << cnt << " bytes)\n");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
xmlParserCtxtPtr ctxt{nullptr};
|
||||
string m_fn;
|
||||
};
|
||||
|
||||
class MimeHandlerXslt::Internal {
|
||||
public:
|
||||
~Internal() {
|
||||
if (metaOrAllSS) {
|
||||
xsltFreeStylesheet(metaOrAllSS);
|
||||
}
|
||||
if (dataSS) {
|
||||
xsltFreeStylesheet(dataSS);
|
||||
}
|
||||
}
|
||||
bool ok{false};
|
||||
xsltStylesheet *metaOrAllSS{nullptr};
|
||||
xsltStylesheet *dataSS{nullptr};
|
||||
string result;
|
||||
};
|
||||
|
||||
MimeHandlerXslt::~MimeHandlerXslt()
|
||||
{
|
||||
delete m;
|
||||
}
|
||||
|
||||
MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
|
||||
const std::vector<std::string>& params)
|
||||
: RecollFilter(cnf, id), m(new Internal)
|
||||
{
|
||||
LOGDEB("MimeHandlerXslt: params: " << stringsToString(params) << endl);
|
||||
string filtersdir = path_cat(cnf->getDatadir(), "filters");
|
||||
|
||||
xmlSubstituteEntitiesDefault(0);
|
||||
xmlLoadExtDtdDefaultValue = 0;
|
||||
|
||||
// params can be "xslt stylesheetall" or
|
||||
// "xslt metamember stylesheetmeta datamember stylesheetdata"
|
||||
if (params.size() == 2) {
|
||||
string ssfn = path_cat(filtersdir, params[1]);
|
||||
FileScanXML XMLstyle(ssfn);
|
||||
string reason;
|
||||
if (!file_scan(ssfn, &XMLstyle, &reason)) {
|
||||
LOGERR("MimeHandlerXslt: file_scan failed for style sheet " <<
|
||||
ssfn << " : " << reason << endl);
|
||||
return;
|
||||
}
|
||||
xmlDoc *stl = XMLstyle.getDoc();
|
||||
if (stl == nullptr) {
|
||||
LOGERR("MimeHandlerXslt: getDoc failed for style sheet " <<
|
||||
ssfn << endl);
|
||||
return;
|
||||
}
|
||||
m->metaOrAllSS = xsltParseStylesheetDoc(stl);
|
||||
if (m->metaOrAllSS) {
|
||||
m->ok = true;
|
||||
}
|
||||
} else if (params.size() == 4) {
|
||||
} else {
|
||||
LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
|
||||
stringsToString(params) << endl);
|
||||
}
|
||||
}
|
||||
|
||||
bool MimeHandlerXslt::set_document_file_impl(const std::string& mt,
|
||||
const std::string &file_path)
|
||||
{
|
||||
LOGDEB0("MimeHandlerXslt::set_document_file_: fn: " << file_path << endl);
|
||||
if (!m || !m->ok) {
|
||||
return false;
|
||||
}
|
||||
if (nullptr == m->dataSS) {
|
||||
if (nullptr == m->metaOrAllSS) {
|
||||
LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
|
||||
return false;
|
||||
}
|
||||
FileScanXML XMLdoc(file_path);
|
||||
string md5, reason;
|
||||
if (!file_scan(file_path, &XMLdoc, 0, -1, &reason,
|
||||
m_forPreview ? nullptr : &md5)) {
|
||||
LOGERR("MimeHandlerXslt::set_document_file_impl: file_scan failed "
|
||||
"for " << file_path << " : " << reason << endl);
|
||||
return false;
|
||||
}
|
||||
if (!m_forPreview) {
|
||||
m_metaData[cstr_dj_keymd5] = md5;
|
||||
}
|
||||
xmlDocPtr doc = XMLdoc.getDoc();
|
||||
if (nullptr == doc) {
|
||||
LOGERR("MimeHandlerXslt::set_doc_file_impl: no parsed doc\n");
|
||||
return false;
|
||||
}
|
||||
xmlDocPtr transformed = xsltApplyStylesheet(m->metaOrAllSS, doc, NULL);
|
||||
if (nullptr == transformed) {
|
||||
LOGERR("MimeHandlerXslt::set_doc_file_: xslt transform failed\n");
|
||||
xmlFreeDoc(doc);
|
||||
return false;
|
||||
}
|
||||
xmlChar *outstr;
|
||||
int outlen;
|
||||
xsltSaveResultToString(&outstr, &outlen, transformed, m->metaOrAllSS);
|
||||
m->result = string((const char*)outstr, outlen);
|
||||
xmlFree(outstr);
|
||||
xmlFreeDoc(transformed);
|
||||
xmlFreeDoc(doc);
|
||||
} else {
|
||||
LOGERR("Not ready for multipart yet\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
m_havedoc = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MimeHandlerXslt::set_document_string_impl(const string& mt,
|
||||
const string& msgtxt)
|
||||
{
|
||||
if (!m || !m->ok) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MimeHandlerXslt::next_document()
|
||||
{
|
||||
if (!m || !m->ok) {
|
||||
return false;
|
||||
}
|
||||
if (m_havedoc == false)
|
||||
return false;
|
||||
m_havedoc = false;
|
||||
m_metaData[cstr_dj_keymt] = cstr_texthtml;
|
||||
m_metaData[cstr_dj_keycontent].swap(m->result);
|
||||
LOGDEB1("MimeHandlerXslt::next_document: result: [" <<
|
||||
m_metaData[cstr_dj_keycontent] << "]\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
void MimeHandlerXslt::clear_impl()
|
||||
{
|
||||
m_havedoc = false;
|
||||
m->result.clear();
|
||||
}
|
||||
49
src/internfile/mh_xslt.h
Normal file
49
src/internfile/mh_xslt.h
Normal file
@ -0,0 +1,49 @@
|
||||
/* Copyright (C) 2018 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef _MH_XSLT_H_INCLUDED_
|
||||
#define _MH_XSLT_H_INCLUDED_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "mimehandler.h"
|
||||
|
||||
class MimeHandlerXslt : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerXslt(RclConfig *cnf, const std::string& id,
|
||||
const std::vector<std::string>& params);
|
||||
virtual ~MimeHandlerXslt();
|
||||
|
||||
virtual bool next_document() override;
|
||||
virtual void clear_impl() override;
|
||||
|
||||
virtual bool is_data_input_ok(DataInput input) const override {
|
||||
return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual bool set_document_file_impl(const std::string& mt,
|
||||
const std::string& file_path);
|
||||
virtual bool set_document_string_impl(const std::string& mt,
|
||||
const std::string& data);
|
||||
|
||||
class Internal;
|
||||
private:
|
||||
Internal *m{nullptr};
|
||||
};
|
||||
|
||||
|
||||
#endif /* _MH_XSLT_H_INCLUDED_ */
|
||||
@ -41,6 +41,7 @@ using namespace std;
|
||||
#include "mh_symlink.h"
|
||||
#include "mh_unknown.h"
|
||||
#include "mh_null.h"
|
||||
#include "mh_xslt.h"
|
||||
|
||||
// Performance help: we use a pool of already known and created
|
||||
// handlers. There can be several instances for a given mime type
|
||||
@ -137,11 +138,17 @@ void clearMimeHandlerCache()
|
||||
|
||||
/** For mime types set as "internal" in mimeconf:
|
||||
* create appropriate handler object. */
|
||||
static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||
static RecollFilter *mhFactory(RclConfig *config, const string &mimeOrParams,
|
||||
bool nobuild, string& id)
|
||||
{
|
||||
LOGDEB2("mhFactory(" << mime << ")\n");
|
||||
string lmime(mime);
|
||||
LOGDEB1("mhFactory(" << mimeOrParams << ")\n");
|
||||
vector<string> lparams;
|
||||
stringToStrings(mimeOrParams, lparams);
|
||||
if (lparams.empty()) {
|
||||
// ??
|
||||
return nullptr;
|
||||
}
|
||||
string lmime(lparams[0]);
|
||||
stringtolower(lmime);
|
||||
if (cstr_textplain == lmime) {
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText\n");
|
||||
@ -160,11 +167,11 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||
MD5String("MimeHandlerMail", id);
|
||||
return nobuild ? 0 : new MimeHandlerMail(config, id);
|
||||
} else if ("inode/symlink" == lmime) {
|
||||
LOGDEB2("mhFactory(" << mime << "): ret MimeHandlerSymlink\n");
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerSymlink\n");
|
||||
MD5String("MimeHandlerSymlink", id);
|
||||
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||
} else if ("application/x-zerosize" == lmime) {
|
||||
LOGDEB("mhFactory(" << mime << "): ret MimeHandlerNull\n");
|
||||
LOGDEB("mhFactory(" << lmime << "): returning MimeHandlerNull\n");
|
||||
MD5String("MimeHandlerNull", id);
|
||||
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
||||
} else if (lmime.find("text/") == 0) {
|
||||
@ -175,7 +182,11 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||
// exec) but still opening with a specific editor.
|
||||
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText(x)\n");
|
||||
MD5String("MimeHandlerText", id);
|
||||
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||
return nobuild ? 0 : new MimeHandlerText(config, id);
|
||||
} else if ("xsltproc" == lmime) {
|
||||
// XML Types processed with one or several xslt style sheets.
|
||||
MD5String(mimeOrParams, id);
|
||||
return nobuild ? 0 : new MimeHandlerXslt(config, id, lparams);
|
||||
} else {
|
||||
// We should not get there. It means that "internal" was set
|
||||
// as a handler in mimeconf for a mime type we actually can't
|
||||
@ -262,7 +273,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
|
||||
|
||||
/* Get handler/filter object for given mime type: */
|
||||
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
bool filtertypes)
|
||||
bool filtertypes)
|
||||
{
|
||||
LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
|
||||
filtertypes << "\n");
|
||||
@ -291,7 +302,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
}
|
||||
bool internal = !stringlowercmp("internal", handlertype);
|
||||
if (internal) {
|
||||
// For internal types let the factory compute the id
|
||||
// For internal types let the factory compute the cache id
|
||||
mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id);
|
||||
} else {
|
||||
// exec/execm: use the md5 of the def line
|
||||
@ -304,16 +315,15 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
goto out;
|
||||
|
||||
LOGDEB2("getMimeHandler: " << mtype << " not in cache\n");
|
||||
|
||||
// Not in cache.
|
||||
if (internal) {
|
||||
// If there is a parameter after "internal" it's the mime
|
||||
// type to use. This is so that we can have bogus mime
|
||||
// types like text/x-purple-html-log (for ie: specific
|
||||
// icon) and still use the html filter on them. This is
|
||||
// partly redundant with the localfields/rclaptg, but
|
||||
// better and the latter will probably go away at some
|
||||
// point in the future.
|
||||
// type to use, or the further qualifier (e.g. style sheet
|
||||
// name for xslt types). This is so that we can have bogus
|
||||
// mime types like text/x-purple-html-log (for ie:
|
||||
// specific icon) and still use the html filter on
|
||||
// them. This is partly redundant with the
|
||||
// localfields/rclaptg, but better? (and the latter will
|
||||
// probably go away at some point in the future?).
|
||||
LOGDEB2("handlertype internal, cmdstr [" << cmdstr << "]\n");
|
||||
h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
|
||||
goto out;
|
||||
@ -336,14 +346,10 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We get here if there was no specific error, but there is no
|
||||
// identified mime type, or no handler associated.
|
||||
|
||||
// Finally, unhandled files are either ignored or their name and
|
||||
// generic metadata is indexed, depending on configuration
|
||||
{
|
||||
} else {
|
||||
// No identified mime type, or no handler associated.
|
||||
// Unhandled files are either ignored or their name and
|
||||
// generic metadata is indexed, depending on configuration
|
||||
bool indexunknown = false;
|
||||
cfg->getConfParam("indexallfilenames", &indexunknown);
|
||||
if (indexunknown) {
|
||||
|
||||
@ -110,7 +110,7 @@ application/vnd.sun.xml.writer = execm rclsoff.py
|
||||
application/vnd.sun.xml.writer.global = execm rclsoff.py
|
||||
application/vnd.sun.xml.writer.template = execm rclsoff.py
|
||||
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
|
||||
application/x-abiword = execm rclabw.py
|
||||
application/x-abiword = internal xsltproc abw.xsl
|
||||
application/x-awk = internal text/plain
|
||||
application/x-chm = execm rclchm
|
||||
application/x-dia-diagram = execm rcldia;mimetype=text/plain
|
||||
|
||||
@ -20,15 +20,17 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "md5ut.h"
|
||||
|
||||
#include "readfile.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class FileScanMd5 : public FileScanDo {
|
||||
// Quite incredibly if this class is named FileScanMd5 like the
|
||||
// different one in readfile.cpp, the vtables get mixed up and mh_xslt
|
||||
// crashes while calling a virtual function (gcc 6.3 and 7.3)
|
||||
class FileScanMd5loc : public FileScanDo {
|
||||
public:
|
||||
FileScanMd5(string& d) : digest(d) {}
|
||||
virtual bool init(size_t size, string *)
|
||||
FileScanMd5loc(string& d) : digest(d) {}
|
||||
virtual bool init(int64_t, string *)
|
||||
{
|
||||
MD5Init(&ctx);
|
||||
return true;
|
||||
@ -44,7 +46,7 @@ public:
|
||||
|
||||
bool MD5File(const string& filename, string &digest, string *reason)
|
||||
{
|
||||
FileScanMd5 md5er(digest);
|
||||
FileScanMd5loc md5er(digest);
|
||||
if (!file_scan(filename, &md5er, reason))
|
||||
return false;
|
||||
// We happen to know that digest and md5er.digest are the same object
|
||||
|
||||
7564
src/utils/miniz.cpp
Normal file
7564
src/utils/miniz.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1321
src/utils/miniz.h
Normal file
1321
src/utils/miniz.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -14,7 +14,6 @@
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef TEST_READFILE
|
||||
#ifdef BUILDING_RECOLL
|
||||
#include "autoconfig.h"
|
||||
#else
|
||||
@ -37,20 +36,33 @@
|
||||
|
||||
#include "readfile.h"
|
||||
#include "smallut.h"
|
||||
#include "md5.h"
|
||||
|
||||
using std::string;
|
||||
#ifdef MDU_INCLUDE_LOG
|
||||
#include MDU_INCLUDE_LOG
|
||||
#else
|
||||
#include "log.h"
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
///////////////
|
||||
// Implementation of basic interface: read whole file to memory buffer
|
||||
class FileToString : public FileScanDo {
|
||||
public:
|
||||
FileToString(string& data) : m_data(data) {}
|
||||
string& m_data;
|
||||
bool init(size_t size, string *reason) {
|
||||
|
||||
// Note: the fstat() + reserve() (in init()) calls divide cpu
|
||||
// usage almost by 2 on both linux i586 and macosx (compared to
|
||||
// just append()) Also tried a version with mmap, but it's
|
||||
// actually slower on the mac and not faster on linux.
|
||||
virtual bool init(int64_t size, string *reason) {
|
||||
if (size > 0) {
|
||||
m_data.reserve(size);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool data(const char *buf, int cnt, string *reason) {
|
||||
virtual bool data(const char *buf, int cnt, string *reason) {
|
||||
try {
|
||||
m_data.append(buf, cnt);
|
||||
} catch (...) {
|
||||
@ -59,248 +71,449 @@ public:
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
string& m_data;
|
||||
};
|
||||
|
||||
bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
|
||||
string *reason)
|
||||
{
|
||||
FileToString accum(data);
|
||||
return file_scan(fn, &accum, offs, cnt, reason, nullptr);
|
||||
}
|
||||
|
||||
bool file_to_string(const string& fn, string& data, string *reason)
|
||||
{
|
||||
return file_to_string(fn, data, 0, size_t(-1), reason);
|
||||
}
|
||||
bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
|
||||
string *reason)
|
||||
{
|
||||
FileToString accum(data);
|
||||
return file_scan(fn, &accum, offs, cnt, reason);
|
||||
}
|
||||
|
||||
bool file_scan(const string& fn, FileScanDo* doer, string *reason)
|
||||
{
|
||||
return file_scan(fn, doer, 0, size_t(-1), reason);
|
||||
}
|
||||
|
||||
const int RDBUFSZ = 8192;
|
||||
// Note: the fstat() + reserve() (in init()) calls divide cpu usage almost by 2
|
||||
// on both linux i586 and macosx (compared to just append())
|
||||
// Also tried a version with mmap, but it's actually slower on the mac and not
|
||||
// faster on linux.
|
||||
bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
|
||||
size_t cnttoread, string *reason)
|
||||
{
|
||||
if (startoffs < 0) {
|
||||
*reason += " file_scan: negative startoffs not allowed";
|
||||
return false;
|
||||
/////////////
|
||||
// Callback/filtering interface
|
||||
|
||||
// Abstract class base for both source (origin) and filter
|
||||
// (midstream). Both have a downstream
|
||||
class FileScanUpstream {
|
||||
public:
|
||||
virtual void setDownstream(FileScanDo *down) {
|
||||
m_down = down;
|
||||
}
|
||||
virtual FileScanDo *out() {
|
||||
return m_down;
|
||||
}
|
||||
protected:
|
||||
FileScanDo *m_down{nullptr};
|
||||
};
|
||||
|
||||
// Source element.
|
||||
class FileScanSource : public FileScanUpstream {
|
||||
public:
|
||||
FileScanSource(FileScanDo *down) {
|
||||
setDownstream(down);
|
||||
}
|
||||
virtual bool scan() = 0;
|
||||
};
|
||||
|
||||
// Inside element of a transformation pipe. The idea is that elements
|
||||
// which don't recognize the data get themselves out of the pipe
|
||||
// (pop()). Typically, only one of the decompression modules
|
||||
// (e.g. gzip/bzip2/xz...) would remain. For now there is only gzip,
|
||||
// it pops itself if the data does not have the right magic number
|
||||
class FileScanFilter : public FileScanDo, public FileScanUpstream {
|
||||
public:
|
||||
virtual void insertAtSink(FileScanDo *sink, FileScanUpstream *upstream) {
|
||||
setDownstream(sink);
|
||||
if (m_down) {
|
||||
m_down->setUpstream(this);
|
||||
}
|
||||
setUpstream(upstream);
|
||||
if (m_up) {
|
||||
m_up->setDownstream(this);
|
||||
}
|
||||
}
|
||||
|
||||
bool ret = false;
|
||||
bool noclosing = true;
|
||||
int fd = 0;
|
||||
struct stat st;
|
||||
// Initialize st_size: if fn.empty() , the fstat() call won't happen.
|
||||
st.st_size = 0;
|
||||
// Remove myself from the pipe.
|
||||
virtual void pop() {
|
||||
if (m_down) {
|
||||
m_down->setUpstream(m_up);
|
||||
}
|
||||
if (m_up) {
|
||||
m_up->setDownstream(m_down);
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a file name, open it, else use stdin.
|
||||
if (!fn.empty()) {
|
||||
fd = open(fn.c_str(), O_RDONLY | O_BINARY);
|
||||
if (fd < 0 || fstat(fd, &st) < 0) {
|
||||
catstrerror(reason, "open/stat", errno);
|
||||
virtual void setUpstream(FileScanUpstream *up) override {
|
||||
m_up = up;
|
||||
}
|
||||
|
||||
private:
|
||||
FileScanUpstream *m_up{nullptr};
|
||||
};
|
||||
|
||||
|
||||
#if defined(READFILE_ENABLE_ZLIB)
|
||||
#include <zlib.h>
|
||||
|
||||
class GzFilter : public FileScanFilter {
|
||||
public:
|
||||
virtual ~GzFilter() {
|
||||
if (m_initdone) {
|
||||
inflateEnd(&m_stream);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool init(int64_t size, string *reason) override {
|
||||
LOGDEB1("GzFilter::init\n");
|
||||
if (out()) {
|
||||
return out()->init(size, reason);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool data(const char *buf, int cnt, string *reason) override {
|
||||
LOGDEB1("GzFilter::data: cnt " << cnt << endl);
|
||||
|
||||
int error;
|
||||
m_stream.next_in = (Bytef*)buf;
|
||||
m_stream.avail_in = cnt;
|
||||
|
||||
if (m_initdone == false) {
|
||||
m_initdone = true;
|
||||
// We do not support a first read cnt < 2. We probably should.
|
||||
if (cnt < 2) {
|
||||
if (reason)
|
||||
*reason += "GzFilter: first data count < 2";
|
||||
return false;
|
||||
}
|
||||
const unsigned char *ubuf = (const unsigned char *)buf;
|
||||
if (ubuf[0] != 0x1f || ubuf[1] != 0x8b) {
|
||||
LOGDEB1("GzFilter::data: not gzip. out() is " << out() << "\n");
|
||||
pop();
|
||||
if (out()) {
|
||||
return out()->data(buf, cnt, reason);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
m_stream.opaque = nullptr;
|
||||
m_stream.zalloc = alloc_func;
|
||||
m_stream.zfree = free_func;
|
||||
m_stream.next_out = (Bytef*)m_obuf;
|
||||
m_stream.avail_out = m_obs;
|
||||
if ((error = inflateInit2(&m_stream, 15+32)) != Z_OK) {
|
||||
LOGERR("inflateInit2 error: " << error << endl);
|
||||
if (reason) {
|
||||
*reason += " Zlib inflateinit failed";
|
||||
if (m_stream.msg && *m_stream.msg) {
|
||||
*reason += string(": ") + m_stream.msg;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
while (m_stream.avail_in != 0) {
|
||||
m_stream.next_out = (Bytef*)m_obuf;
|
||||
m_stream.avail_out = m_obs;
|
||||
if ((error = inflate(&m_stream, Z_SYNC_FLUSH)) < Z_OK) {
|
||||
LOGERR("inflate error: " << error << endl);
|
||||
if (reason) {
|
||||
*reason += " Zlib inflate failed";
|
||||
if (m_stream.msg && *m_stream.msg) {
|
||||
*reason += string(": ") + m_stream.msg;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (out() &&
|
||||
!out()->data(m_obuf, m_obs - m_stream.avail_out, reason)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static voidpf alloc_func(voidpf opaque, uInt items, uInt size) {
|
||||
return malloc(items * size);
|
||||
}
|
||||
static void free_func(voidpf opaque, voidpf address) {
|
||||
free(address);
|
||||
}
|
||||
|
||||
bool m_initdone{false};
|
||||
z_stream m_stream;
|
||||
char m_obuf[10000];
|
||||
const int m_obs{10000};
|
||||
};
|
||||
#endif // GZ
|
||||
|
||||
class FileScanMd5 : public FileScanFilter {
|
||||
public:
|
||||
FileScanMd5(string& d) : digest(d) {}
|
||||
virtual bool init(int64_t size, string *reason) override {
|
||||
LOGDEB1("FileScanMd5: init\n");
|
||||
MD5Init(&ctx);
|
||||
if (out()) {
|
||||
return out()->init(size, reason);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
virtual bool data(const char *buf, int cnt, string *reason) override {
|
||||
LOGDEB1("FileScanMd5: data. cnt " << cnt << endl);
|
||||
MD5Update(&ctx, (const unsigned char*)buf, cnt);
|
||||
if (out() && !out()->data(buf, cnt, reason)) {
|
||||
return false;
|
||||
}
|
||||
noclosing = false;
|
||||
return true;
|
||||
}
|
||||
bool finish() {
|
||||
LOGDEB1("FileScanMd5: finish\n");
|
||||
MD5Final(digest, &ctx);
|
||||
return true;
|
||||
}
|
||||
string &digest;
|
||||
MD5_CTX ctx;
|
||||
};
|
||||
|
||||
|
||||
// Source taking data from a regular file
|
||||
class FileScanSourceFile : public FileScanSource {
|
||||
public:
|
||||
FileScanSourceFile(FileScanDo *next, const string& fn, int64_t startoffs,
|
||||
int64_t cnttoread, string *reason)
|
||||
: FileScanSource(next), m_fn(fn), m_startoffs(startoffs),
|
||||
m_cnttoread(cnttoread), m_reason(reason) { }
|
||||
|
||||
virtual bool scan() {
|
||||
LOGDEB1("FileScanSourceFile: reading " << m_fn << " offs " <<
|
||||
m_startoffs<< " cnt " << m_cnttoread << " out " << out() << endl);
|
||||
const int RDBUFSZ = 8192;
|
||||
bool ret = false;
|
||||
bool noclosing = true;
|
||||
int fd = 0;
|
||||
struct stat st;
|
||||
// Initialize st_size: if fn.empty() , the fstat() call won't happen.
|
||||
st.st_size = 0;
|
||||
|
||||
// If we have a file name, open it, else use stdin.
|
||||
if (!m_fn.empty()) {
|
||||
fd = open(m_fn.c_str(), O_RDONLY | O_BINARY);
|
||||
if (fd < 0 || fstat(fd, &st) < 0) {
|
||||
catstrerror(m_reason, "open/stat", errno);
|
||||
return false;
|
||||
}
|
||||
noclosing = false;
|
||||
}
|
||||
|
||||
#if defined O_NOATIME && O_NOATIME != 0
|
||||
if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
|
||||
// perror("fcntl");
|
||||
if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
|
||||
// perror("fcntl");
|
||||
}
|
||||
#endif
|
||||
if (out()) {
|
||||
if (m_cnttoread != -1 && m_cnttoread) {
|
||||
out()->init(m_cnttoread + 1, m_reason);
|
||||
} else if (st.st_size > 0) {
|
||||
out()->init(st.st_size + 1, m_reason);
|
||||
} else {
|
||||
out()->init(0, m_reason);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t curoffs = 0;
|
||||
if (m_startoffs > 0 && !m_fn.empty()) {
|
||||
if (lseek(fd, m_startoffs, SEEK_SET) != m_startoffs) {
|
||||
catstrerror(m_reason, "lseek", errno);
|
||||
return false;
|
||||
}
|
||||
curoffs = m_startoffs;
|
||||
}
|
||||
|
||||
char buf[RDBUFSZ];
|
||||
int64_t totread = 0;
|
||||
for (;;) {
|
||||
size_t toread = RDBUFSZ;
|
||||
if (m_startoffs > 0 && curoffs < m_startoffs) {
|
||||
toread = size_t(MIN(RDBUFSZ, m_startoffs - curoffs));
|
||||
}
|
||||
|
||||
if (m_cnttoread != -1) {
|
||||
toread = MIN(toread, (uint64_t)(m_cnttoread - totread));
|
||||
}
|
||||
ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
|
||||
if (n < 0) {
|
||||
catstrerror(m_reason, "read", errno);
|
||||
goto out;
|
||||
}
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
curoffs += n;
|
||||
if (curoffs - n < m_startoffs) {
|
||||
continue;
|
||||
}
|
||||
if (!out()->data(buf, n, m_reason)) {
|
||||
goto out;
|
||||
}
|
||||
totread += n;
|
||||
if (m_cnttoread > 0 && totread >= m_cnttoread) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
if (fd >= 0 && !noclosing) {
|
||||
close(fd);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
protected:
|
||||
string m_fn;
|
||||
int64_t m_startoffs;
|
||||
int64_t m_cnttoread;
|
||||
string *m_reason;
|
||||
};
|
||||
|
||||
|
||||
#if defined(READFILE_ENABLE_MINIZ)
|
||||
#include "miniz.h"
|
||||
|
||||
// Source taking data from a ZIP archive member
|
||||
class FileScanSourceZip : public FileScanSource {
|
||||
public:
|
||||
FileScanSourceZip(FileScanDo *next, const string& fn, const string& member,
|
||||
string *reason)
|
||||
: FileScanSource(next), m_fn(fn), m_member(member),
|
||||
m_reason(reason) { }
|
||||
|
||||
virtual bool scan() {
|
||||
bool ret = false;
|
||||
mz_zip_archive zip;
|
||||
mz_zip_zero_struct(&zip);
|
||||
void *opaque = this;
|
||||
|
||||
if (!mz_zip_reader_init_file(&zip, m_fn.c_str(), 0)) {
|
||||
if (m_reason) {
|
||||
*m_reason += "mz_zip_reader_init_file() failed: ";
|
||||
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
mz_uint32 file_index;
|
||||
if (mz_zip_reader_locate_file_v2(&zip, m_member.c_str(), NULL, 0,
|
||||
&file_index) < 0) {
|
||||
if (m_reason) {
|
||||
*m_reason += "mz_zip_reader_locate_file() failed: ";
|
||||
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
mz_zip_archive_file_stat zstat;
|
||||
if (!mz_zip_reader_file_stat(&zip, file_index, &zstat)) {
|
||||
if (m_reason) {
|
||||
*m_reason += "mz_zip_reader_file_stat() failed: ";
|
||||
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
if (out()) {
|
||||
if (!out()->init(zstat.m_uncomp_size, m_reason)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mz_zip_reader_extract_to_callback(
|
||||
&zip, file_index, write_cb, opaque, 0)) {
|
||||
if (m_reason) {
|
||||
*m_reason += "mz_zip_reader_extract_to_callback() failed: ";
|
||||
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
mz_zip_reader_end(&zip);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static size_t write_cb(void *pOpaque, mz_uint64 file_ofs,
|
||||
const void *pBuf, size_t n) {
|
||||
const char *cp = (const char*)pBuf;
|
||||
LOGDEB1("write_cb: ofs " << file_ofs << " cnt " << n << " data: " <<
|
||||
string(cp, n) << endl);
|
||||
FileScanSourceZip *ths = (FileScanSourceZip *)pOpaque;
|
||||
if (ths->out()) {
|
||||
if (!ths->out()->data(cp, n, ths->m_reason)) {
|
||||
return (size_t)-1;
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
protected:
|
||||
string m_fn;
|
||||
string m_member;
|
||||
string *m_reason;
|
||||
};
|
||||
|
||||
bool file_scan(const std::string& filename, const std::string& membername,
|
||||
FileScanDo* doer, std::string *reason)
|
||||
{
|
||||
if (membername.empty()) {
|
||||
return file_scan(filename, doer, 0, -1, reason, nullptr);
|
||||
} else {
|
||||
FileScanSourceZip source(doer, filename, membername, reason);
|
||||
return source.scan();
|
||||
}
|
||||
}
|
||||
|
||||
#endif // READFILE_ENABLE_ZIP
|
||||
|
||||
bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
|
||||
int64_t cnttoread, string *reason, string *md5p)
|
||||
{
|
||||
LOGDEB("file_scan: doer " << doer << endl);
|
||||
#if defined(READFILE_ENABLE_ZLIB)
|
||||
bool nodecomp = startoffs != 0;
|
||||
#endif
|
||||
if (startoffs < 0) {
|
||||
startoffs = 0;
|
||||
}
|
||||
|
||||
FileScanSourceFile source(doer, fn, startoffs, cnttoread, reason);
|
||||
FileScanUpstream *up = &source;
|
||||
|
||||
// We compute the MD5 on the uncompressed data, so insert this
|
||||
// right at the source.
|
||||
string digest;
|
||||
FileScanMd5 md5filter(digest);
|
||||
if (md5p) {
|
||||
md5filter.insertAtSink(doer, up);
|
||||
up = &md5filter;
|
||||
}
|
||||
|
||||
#if defined(READFILE_ENABLE_ZLIB)
|
||||
GzFilter gzfilter;
|
||||
if (!nodecomp) {
|
||||
gzfilter.insertAtSink(doer, up);
|
||||
up = &gzfilter;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cnttoread != (size_t) - 1 && cnttoread) {
|
||||
doer->init(cnttoread + 1, reason);
|
||||
} else if (st.st_size > 0) {
|
||||
doer->init(size_t(st.st_size + 1), reason);
|
||||
} else {
|
||||
doer->init(0, reason);
|
||||
}
|
||||
bool ret = source.scan();
|
||||
|
||||
int64_t curoffs = 0;
|
||||
if (startoffs > 0 && !fn.empty()) {
|
||||
if (lseek(fd, startoffs, SEEK_SET) != startoffs) {
|
||||
catstrerror(reason, "lseek", errno);
|
||||
return false;
|
||||
}
|
||||
curoffs = startoffs;
|
||||
}
|
||||
|
||||
char buf[RDBUFSZ];
|
||||
size_t totread = 0;
|
||||
for (;;) {
|
||||
size_t toread = RDBUFSZ;
|
||||
if (startoffs > 0 && curoffs < startoffs) {
|
||||
toread = size_t(MIN(RDBUFSZ, startoffs - curoffs));
|
||||
}
|
||||
|
||||
if (cnttoread != size_t(-1)) {
|
||||
toread = MIN(toread, cnttoread - totread);
|
||||
}
|
||||
ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
|
||||
if (n < 0) {
|
||||
catstrerror(reason, "read", errno);
|
||||
goto out;
|
||||
}
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
curoffs += n;
|
||||
if (curoffs - n < startoffs) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!doer->data(buf, n, reason)) {
|
||||
goto out;
|
||||
}
|
||||
totread += n;
|
||||
if (cnttoread > 0 && totread >= cnttoread) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ret = true;
|
||||
out:
|
||||
if (fd >= 0 && !noclosing) {
|
||||
close(fd);
|
||||
if (md5p) {
|
||||
md5filter.finish();
|
||||
MD5HexPrint(digest, *md5p);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else // Test
|
||||
#include "autoconfig.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include "safesysstat.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
|
||||
#include "readfile.h"
|
||||
#include "fstreewalk.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class myCB : public FsTreeWalkerCB {
|
||||
public:
|
||||
FsTreeWalker::Status processone(const string& path,
|
||||
const struct stat *st,
|
||||
FsTreeWalker::CbFlag flg) {
|
||||
if (flg == FsTreeWalker::FtwDirEnter) {
|
||||
//cout << "[Entering " << path << "]" << endl;
|
||||
} else if (flg == FsTreeWalker::FtwDirReturn) {
|
||||
//cout << "[Returning to " << path << "]" << endl;
|
||||
} else if (flg == FsTreeWalker::FtwRegular) {
|
||||
//cout << path << endl;
|
||||
string s, reason;
|
||||
if (!file_to_string(path, s, &reason)) {
|
||||
cerr << "Failed: " << reason << " : " << path << endl;
|
||||
} else {
|
||||
//cout <<
|
||||
//"================================================" << endl;
|
||||
cout << path << endl;
|
||||
// cout << s;
|
||||
}
|
||||
reason.clear();
|
||||
}
|
||||
return FsTreeWalker::FtwOk;
|
||||
}
|
||||
};
|
||||
|
||||
static int op_flags;
|
||||
#define OPT_MOINS 0x1
|
||||
#define OPT_c 0x2
|
||||
#define OPT_o 0x4
|
||||
|
||||
static const char *thisprog;
|
||||
static char usage [] =
|
||||
"trreadfile [-o offs] [-c cnt] topdirorfile\n\n"
|
||||
;
|
||||
static void
|
||||
Usage(void)
|
||||
bool file_scan(const string& fn, FileScanDo* doer, string *reason)
|
||||
{
|
||||
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
|
||||
exit(1);
|
||||
return file_scan(fn, doer, 0, -1, reason, nullptr);
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
int64_t offs = 0;
|
||||
size_t cnt = size_t(-1);
|
||||
thisprog = argv[0];
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
while (argc > 0 && **argv == '-') {
|
||||
(*argv)++;
|
||||
if (!(**argv))
|
||||
/* Cas du "adb - core" */
|
||||
{
|
||||
Usage();
|
||||
}
|
||||
while (**argv)
|
||||
switch (*(*argv)++) {
|
||||
case 'c':
|
||||
op_flags |= OPT_c;
|
||||
if (argc < 2) {
|
||||
Usage();
|
||||
}
|
||||
cnt = atoll(*(++argv));
|
||||
argc--;
|
||||
goto b1;
|
||||
case 'o':
|
||||
op_flags |= OPT_o;
|
||||
if (argc < 2) {
|
||||
Usage();
|
||||
}
|
||||
offs = strtoull(*(++argv), 0, 0);
|
||||
argc--;
|
||||
goto b1;
|
||||
default:
|
||||
Usage();
|
||||
break;
|
||||
}
|
||||
b1:
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
if (argc != 1) {
|
||||
Usage();
|
||||
}
|
||||
string top = *argv++;
|
||||
argc--;
|
||||
cerr << "filename " << top << " offs " << offs << " cnt " << cnt << endl;
|
||||
|
||||
struct stat st;
|
||||
if (!top.empty() && stat(top.c_str(), &st) < 0) {
|
||||
perror("stat");
|
||||
exit(1);
|
||||
}
|
||||
if (!top.empty() && S_ISDIR(st.st_mode)) {
|
||||
FsTreeWalker walker;
|
||||
myCB cb;
|
||||
walker.walk(top, cb);
|
||||
if (walker.getErrCnt() > 0) {
|
||||
cout << walker.getReason();
|
||||
}
|
||||
} else {
|
||||
string s, reason;
|
||||
if (!file_to_string(top, s, offs, cnt, &reason)) {
|
||||
cerr << reason << endl;
|
||||
exit(1);
|
||||
} else {
|
||||
cout << s;
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
#endif //TEST_READFILE
|
||||
|
||||
@ -21,30 +21,72 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* Read file in chunks, calling an accumulator for each chunk. Can be used
|
||||
* for reading in a file, computing an md5...
|
||||
*/
|
||||
class FileScanUpstream;
|
||||
|
||||
/** Data sink for the file reader. */
|
||||
class FileScanDo {
|
||||
public:
|
||||
virtual ~FileScanDo() {}
|
||||
virtual bool init(size_t size, std::string *reason) = 0;
|
||||
virtual bool data(const char *buf, int cnt, std::string* reason) = 0;
|
||||
/* Initialize and allocate.
|
||||
* @param size if set, lower bound of data size.
|
||||
* @param reason[output] set to error message in case of error.
|
||||
* @return false for error (file_scan will return), true if ok.
|
||||
*/
|
||||
virtual bool init(int64_t size, std::string *reason) = 0;
|
||||
/* Process chunk of data
|
||||
* @param buf the data buffer.
|
||||
* @param cnt byte count.
|
||||
* @param reason[output] set to error message in case of error.
|
||||
* @return false for error (file_scan will return), true if ok.
|
||||
*/
|
||||
virtual bool data(const char *buf, int cnt, std::string *reason) = 0;
|
||||
|
||||
virtual void setUpstream(FileScanUpstream*) {}
|
||||
};
|
||||
bool file_scan(const std::string& filename, FileScanDo* doer, std::string *reason = 0);
|
||||
/* Same but only process count cnt from offset offs. Set cnt to size_t(-1)
|
||||
* for no limit */
|
||||
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t offs, size_t cnt,
|
||||
std::string *reason = 0);
|
||||
|
||||
/** Open and read file, calling the FileScanDo data() method for each chunk.
|
||||
*
|
||||
* @param filename File name. Use empty value for stdin
|
||||
|
||||
* @param doer the data processor. The init() method will be called
|
||||
* initially witht a lower bound of the data size (may be used to
|
||||
* reserve a buffer), or with a 0 size if nothing is known about the
|
||||
* size. The data() method will be called for every chunk of data
|
||||
* read.
|
||||
* @param offs Start offset. If not zero, will disable decompression
|
||||
* (set to -1 to start at 0 with no decompression).
|
||||
* @param cnt Max bytes in output. Set cnt to -1 for no limit.
|
||||
* @param[output] md5p If not null, points to a string to store the hex ascii
|
||||
* md5 of the uncompressed data.
|
||||
* @param[output] reason If not null, points to a string for storing an
|
||||
* error message if the return value is false.
|
||||
* @return true if the operation ended normally, else false.
|
||||
*/
|
||||
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t startoffs,
|
||||
int64_t cnttoread, std::string *reason, std::string *md5p);
|
||||
|
||||
/** Same as above, not offset/cnt/md5 */
|
||||
bool file_scan(const std::string& filename, FileScanDo* doer,
|
||||
std::string *reason);
|
||||
|
||||
|
||||
#if defined(READFILE_ENABLE_MINIZ)
|
||||
/* Process a zip archive member */
|
||||
bool file_scan(const std::string& filename, const std::string& membername,
|
||||
FileScanDo* doer, std::string *reason);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Read file into string.
|
||||
* @return true for ok, false else
|
||||
*/
|
||||
bool file_to_string(const std::string& filename, std::string& data, std::string *reason = 0);
|
||||
bool file_to_string(const std::string& filename, std::string& data,
|
||||
std::string *reason = 0);
|
||||
|
||||
/** Read file chunk into string. Set cnt to size_t(-1) for whole file */
|
||||
/** Read file chunk into string. Set cnt to -1 for going to
|
||||
* eof, offs to -1 for going from the start without decompression */
|
||||
bool file_to_string(const std::string& filename, std::string& data,
|
||||
int64_t offs, size_t cnt, std::string *reason = 0);
|
||||
|
||||
|
||||
#endif /* _READFILE_H_INCLUDED_ */
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user