merged internal-xsl branch

This commit is contained in:
Jean-Francois Dockes 2019-01-30 08:47:50 +01:00
commit c0d87a3995
60 changed files with 11095 additions and 703 deletions

View File

@ -2,6 +2,8 @@
CXXFLAGS ?= @CXXFLAGS@ CXXFLAGS ?= @CXXFLAGS@
LIBXAPIAN=@LIBXAPIAN@ LIBXAPIAN=@LIBXAPIAN@
XAPIANCXXFLAGS=@XAPIANCXXFLAGS@ XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
XSLT_CFLAGS=@XSLT_CFLAGS@
XSLT_LINKADD=@XSLT_LINKADD@
LIBICONV=@LIBICONV@ LIBICONV=@LIBICONV@
INCICONV=@INCICONV@ INCICONV=@INCICONV@
LIBFAM = @LIBFAM@ LIBFAM = @LIBFAM@
@ -29,8 +31,10 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
$(COMMONCPPFLAGS) \ $(COMMONCPPFLAGS) \
$(INCICONV) \ $(INCICONV) \
$(XAPIANCXXFLAGS) \ $(XAPIANCXXFLAGS) \
$(XSLT_CFLAGS) \
$(X_CFLAGS) \ $(X_CFLAGS) \
-DRECOLL_DATADIR=\"${pkgdatadir}\" \ -DRECOLL_DATADIR=\"${pkgdatadir}\" \
-DREADFILE_ENABLE_ZLIB -DREADFILE_ENABLE_MINIZ -DREADFILE_ENABLE_MD5 \
-D_GNU_SOURCE \ -D_GNU_SOURCE \
$(DEFS) $(DEFS)
@ -121,6 +125,8 @@ internfile/mh_symlink.h \
internfile/mh_text.cpp \ internfile/mh_text.cpp \
internfile/mh_text.h \ internfile/mh_text.h \
internfile/mh_unknown.h \ internfile/mh_unknown.h \
internfile/mh_xslt.cpp \
internfile/mh_xslt.h \
internfile/mimehandler.cpp \ internfile/mimehandler.cpp \
internfile/mimehandler.h \ internfile/mimehandler.h \
internfile/myhtmlparse.cpp \ internfile/myhtmlparse.cpp \
@ -224,6 +230,8 @@ utils/md5ut.cpp \
utils/md5ut.h \ utils/md5ut.h \
utils/mimeparse.cpp \ utils/mimeparse.cpp \
utils/mimeparse.h \ utils/mimeparse.h \
utils/miniz.cpp \
utils/miniz.h \
utils/netcon.cpp \ utils/netcon.cpp \
utils/netcon.h \ utils/netcon.h \
utils/pathut.cpp \ utils/pathut.cpp \
@ -262,7 +270,7 @@ AM_YFLAGS = -d
librecoll_la_LDFLAGS = -release $(VERSION) \ librecoll_la_LDFLAGS = -release $(VERSION) \
-Wl,--no-undefined -Wl,--warn-unresolved-symbols -Wl,--no-undefined -Wl,--warn-unresolved-symbols
librecoll_la_LIBADD = $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS) librecoll_la_LIBADD = $(XSLT_LINKADD) $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS)
# There is probably a better way to do this. The KIO needs to be linked # There is probably a better way to do this. The KIO needs to be linked
# with librecoll, but librecoll is installed into a non-standard place # with librecoll, but librecoll is installed into a non-standard place
@ -640,6 +648,18 @@ sampleconf/mimeview
filterdir = $(pkgdatadir)/filters filterdir = $(pkgdatadir)/filters
filter_DATA = \ filter_DATA = \
desktop/hotrecoll.py \ desktop/hotrecoll.py \
filters/abiword.xsl \
filters/fb2.xsl \
filters/gnumeric.xsl \
filters/msodump.zip \
filters/okular-note.xsl \
filters/opendoc-body.xsl \
filters/opendoc-flat.xsl \
filters/opendoc-meta.xsl \
filters/openxml-xls-body.xsl \
filters/openxml-word-body.xsl \
filters/openxml-meta.xsl \
filters/ppt-dump.py \
filters/rcl7z \ filters/rcl7z \
filters/rclabw.py \ filters/rclabw.py \
filters/rclaptosidman \ filters/rclaptosidman \
@ -671,19 +691,19 @@ filters/rcllatinstops.zip \
filters/rcllyx \ filters/rcllyx \
filters/rclman \ filters/rclman \
filters/rclmidi.py \ filters/rclmidi.py \
filters/rclpdf.py \
filters/rclps \
filters/rclokulnote.py \ filters/rclokulnote.py \
filters/rclopxml.py \ filters/rclopxml.py \
filters/rclpdf.py \
filters/rclppt.py \ filters/rclppt.py \
filters/rclps \
filters/rclpurple \ filters/rclpurple \
filters/rclpython \ filters/rclpython \
filters/rclrar \ filters/rclrar \
filters/rclrtf.py \ filters/rclrtf.py \
filters/rclscribus \ filters/rclscribus \
filters/rclshowinfo \ filters/rclshowinfo \
filters/rclsoff.py \
filters/rclsoff-flat.py \ filters/rclsoff-flat.py \
filters/rclsoff.py \
filters/rclsvg.py \ filters/rclsvg.py \
filters/rcltar \ filters/rcltar \
filters/rcltex \ filters/rcltex \
@ -697,11 +717,11 @@ filters/rclxmp.py \
filters/rclxslt.py \ filters/rclxslt.py \
filters/rclzip \ filters/rclzip \
filters/recoll-we-move-files.py \ filters/recoll-we-move-files.py \
filters/ppt-dump.py \ filters/recollepub.zip \
filters/svg.xsl \
filters/xls-dump.py \ filters/xls-dump.py \
filters/xlsxmltocsv.py \ filters/xlsxmltocsv.py \
filters/msodump.zip \ filters/xml.xsl \
filters/recollepub.zip \
python/recoll/recoll/conftree.py \ python/recoll/recoll/conftree.py \
python/recoll/recoll/rclconfig.py python/recoll/recoll/rclconfig.py

View File

@ -1 +1 @@
1.25.0pre0 1.25.1

View File

@ -112,7 +112,10 @@ overriden in the c++ code by ifdefs _WIN32 anyway */
#define PACKAGE_NAME "Recoll" #define PACKAGE_NAME "Recoll"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "Recoll 1.24.1" #define PACKAGE_STRING "Recoll 1.25.1"
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.25.1"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "recoll" #define PACKAGE_TARNAME "recoll"
@ -120,9 +123,6 @@ overriden in the c++ code by ifdefs _WIN32 anyway */
/* Define to the home page for this package. */ /* Define to the home page for this package. */
#define PACKAGE_URL "" #define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.24.1"
/* putenv parameter is const */ /* putenv parameter is const */
/* #undef PUTENV_ARG_CONST */ /* #undef PUTENV_ARG_CONST */

View File

@ -394,6 +394,7 @@ bool RclConfig::updateMainConfig()
setKeyDir(cstr_null); setKeyDir(cstr_null);
// Texsplit customization
bool bvalue = false; bool bvalue = false;
if (getConfParam("nocjk", &bvalue) && bvalue == true) { if (getConfParam("nocjk", &bvalue) && bvalue == true) {
TextSplit::cjkProcessing(false); TextSplit::cjkProcessing(false);
@ -405,16 +406,18 @@ bool RclConfig::updateMainConfig()
TextSplit::cjkProcessing(true); TextSplit::cjkProcessing(true);
} }
} }
bvalue = false; bvalue = false;
if (getConfParam("nonumbers", &bvalue) && bvalue == true) { if (getConfParam("nonumbers", &bvalue) && bvalue == true) {
TextSplit::noNumbers(); TextSplit::noNumbers();
} }
bvalue = false; bvalue = false;
if (getConfParam("dehyphenate", &bvalue)) { if (getConfParam("dehyphenate", &bvalue)) {
TextSplit::deHyphenate(bvalue); TextSplit::deHyphenate(bvalue);
} }
bvalue = false;
if (getConfParam("backslashasletter", &bvalue)) {
TextSplit::backslashAsLetter(bvalue);
}
bvalue = true; bvalue = true;
if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) { if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) {
@ -1623,7 +1626,12 @@ string RclConfig::findFilter(const string &icmd) const
// Prepend $datadir/filters // Prepend $datadir/filters
temp = path_cat(m_datadir, "filters"); temp = path_cat(m_datadir, "filters");
PATH = temp + path_PATHsep() + PATH; PATH = temp + path_PATHsep() + PATH;
#ifdef _WIN32
// Windows only: use the bundled Python
temp = path_cat(m_datadir, "filters");
temp = path_cat(temp, "python");
PATH = temp + path_PATHsep() + PATH;
#endif
// Prepend possible configuration parameter? // Prepend possible configuration parameter?
if (getConfParam(string("filtersdir"), temp)) { if (getConfParam(string("filtersdir"), temp)) {
temp = path_tildexpand(temp); temp = path_tildexpand(temp);

View File

@ -137,6 +137,14 @@ public:
}; };
static const CharClassInit charClassInitInstance; static const CharClassInit charClassInitInstance;
void TextSplit::backslashAsLetter(bool on) {
if (on) {
charclasses[int('\\')] = A_LLETTER;
} else {
charclasses[int('\\')] = SPACE;
}
}
static inline int whatcc(unsigned int c) static inline int whatcc(unsigned int c)
{ {
if (c <= 127) { if (c <= 127) {

View File

@ -59,6 +59,11 @@ public:
o_deHyphenate = on; o_deHyphenate = on;
} }
// Process backslashes as letters? Default is off, but it may be
// useful for searching for tex commands. Config variable:
// backslashasletter
static void backslashAsLetter(bool on);
enum Flags { enum Flags {
// Default: will return spans and words (a_b, a, b) // Default: will return spans and words (a_b, a, b)
TXTS_NONE = 0, TXTS_NONE = 0,

View File

@ -7,6 +7,11 @@ using namespace std;
string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple) string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
{ {
#ifdef _WIN32
// On windows file names are read as UTF16 wchar_t and converted to UTF-8
// while scanning directories
return ifn;
#else
string charset = config->getDefCharset(true); string charset = config->getDefCharset(true);
string utf8fn; string utf8fn;
int ercnt; int ercnt;
@ -21,4 +26,5 @@ string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
LOGDEB1("compute_utf8fn: transcoded from [" << lfn << "] to [" << LOGDEB1("compute_utf8fn: transcoded from [" << lfn << "] to [" <<
utf8fn << "] (" << charset << "->" << "UTF-8)\n"); utf8fn << "] (" << charset << "->" << "UTF-8)\n");
return utf8fn; return utf8fn;
#endif
} }

View File

@ -321,6 +321,21 @@ XAPIANCXXFLAGS=`$XAPIAN_CONFIG --cxxflags`
#echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA #echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA
#echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS #echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS
XSLT_CONFIG=${XSLT_CONFIG:-no}
if test "$XSLT_CONFIG" = "no"; then
AC_PATH_PROG(XSLT_CONFIG0, [xslt-config], no)
XSLT_CONFIG=$XSLT_CONFIG0
fi
if test "$XSLT_CONFIG" = "no" ; then
AC_MSG_ERROR([Cannot find xslt-config command in $PATH. Is
libxslt installed ?])
exit 1
fi
XSLT_CFLAGS=`xslt-config --cflags`
XSLT_LINKADD=`xslt-config --libs`
AC_ARG_ENABLE(xadump, AC_ARG_ENABLE(xadump,
AC_HELP_STRING([--enable-xadump], AC_HELP_STRING([--enable-xadump],
[Enable building the xadump low level Xapian access program.]), [Enable building the xadump low level Xapian access program.]),
@ -527,6 +542,8 @@ AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
AC_SUBST(LIBQZEITGEIST) AC_SUBST(LIBQZEITGEIST)
AC_SUBST(RCLVERSION) AC_SUBST(RCLVERSION)
AC_SUBST(RCLLIBVERSION) AC_SUBST(RCLLIBVERSION)
AC_SUBST(XSLT_CFLAGS)
AC_SUBST(XSLT_LINKADD)
# All object files depend on localdefs which has the cc flags. Avoid # All object files depend on localdefs which has the cc flags. Avoid
# changing it unless necessary # changing it unless necessary

88
src/filters/abiword.xsl Normal file
View File

@ -0,0 +1,88 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:ab="http://www.abisource.com/awml.dtd"
exclude-result-prefixes="ab"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:apply-templates select="ab:abiword/ab:metadata"/>
</head>
<body>
<!-- This is for the older abiword format with no namespaces -->
<xsl:for-each select="abiword/section">
<xsl:apply-templates select="p"/>
</xsl:for-each>
<!-- Newer namespaced format -->
<xsl:for-each select="ab:abiword/ab:section">
<xsl:for-each select="ab:p">
<p><xsl:value-of select="."/></p><xsl:text>
</xsl:text>
</xsl:for-each>
</xsl:for-each>
</body>
</html>
</xsl:template>
<xsl:template match="p">
<p><xsl:value-of select="."/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="ab:metadata">
<xsl:for-each select="ab:m">
<xsl:choose>
<xsl:when test="@key = 'dc.creator'">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'abiword.keywords'">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'dc.subject'">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'dc.description'">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'dc.title'">
<title><xsl:value-of select="."/></title><xsl:text>
</xsl:text>
</xsl:when>
<xsl:otherwise>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

56
src/filters/fb2.xsl Executable file
View File

@ -0,0 +1,56 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
exclude-result-prefixes="fb"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/fb:FictionBook">
<html>
<xsl:apply-templates select="fb:description"/>
<xsl:apply-templates select="fb:body"/>
</html>
</xsl:template>
<xsl:template match="fb:description">
<head>
<xsl:apply-templates select="fb:title-info"/>
</head><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="fb:description/fb:title-info">
<xsl:apply-templates select="fb:book-title"/>
<xsl:apply-templates select="fb:author"/>
</xsl:template>
<xsl:template match="fb:description/fb:title-info/fb:book-title">
<title> <xsl:value-of select="."/> </title>
</xsl:template>
<xsl:template match="fb:description/fb:title-info/fb:author">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="fb:first-name"/><xsl:text> </xsl:text>
<xsl:value-of select="fb:middle-name"/><xsl:text> </xsl:text>
<xsl:value-of select="fb:last-name"/>
</xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="fb:body">
<body>
<xsl:apply-templates select="fb:section"/>
</body>
</xsl:template>
<xsl:template match="fb:body/fb:section">
<xsl:for-each select="fb:p">
<p><xsl:value-of select="."/></p>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

79
src/filters/gnumeric.xsl Executable file
View File

@ -0,0 +1,79 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
xmlns:gnm="http://www.gnumeric.org/v10.dtd"
exclude-result-prefixes="office xlink meta ooo dc"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<xsl:apply-templates select="//office:document-meta/office:meta"/>
</head>
<body>
<xsl:apply-templates select="//gnm:Cells"/>
<xsl:apply-templates select="//gnm:Objects"/>
</body>
</html>
</xsl:template>
<xsl:template match="//dc:date">
<meta>
<xsl:attribute name="name">date</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//dc:description">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//meta:keyword">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//dc:title">
<title> <xsl:value-of select="."/> </title>
</xsl:template>
<xsl:template match="//meta:initial-creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="office:meta/*"/>
<xsl:template match="gnm:Cell">
<p><xsl:value-of select="."/></p>
</xsl:template>
<xsl:template match="gnm:CellComment">
<blockquote><xsl:value-of select="@Text"/></blockquote>
</xsl:template>
</xsl:stylesheet>

40
src/filters/okular-note.xsl Executable file
View File

@ -0,0 +1,40 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:strip-space elements="*" />
<xsl:template match="/">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>
Okular notes about: <xsl:value-of select="/documentInfo/@url" />
</title>
</head>
<body>
<xsl:apply-templates />
</body>
</html>
</xsl:template>
<xsl:template match="node()">
<xsl:apply-templates select="@* | node() "/>
</xsl:template>
<xsl:template match="text()">
<p><xsl:value-of select="."/></p>
<xsl:text >
</xsl:text>
</xsl:template>
<xsl:template match="@contents|@author">
<p><xsl:value-of select="." /></p>
<xsl:text >
</xsl:text>
</xsl:template>
<xsl:template match="@*"/>
</xsl:stylesheet>

View File

@ -0,0 +1,32 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
exclude-result-prefixes="text"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="text:p">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="text:h">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="text:s">
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="text:line-break">
<br />
</xsl:template>
<xsl:template match="text:tab">
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>

109
src/filters/opendoc-flat.xsl Executable file
View File

@ -0,0 +1,109 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
exclude-result-prefixes="office xlink meta ooo dc text"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:apply-templates select="/office:document/office:meta" />
</head>
<body>
<xsl:apply-templates select="/office:document/office:body" />
</body></html>
</xsl:template>
<xsl:template match="/office:document/office:meta">
<xsl:apply-templates select="dc:title"/>
<xsl:apply-templates select="dc:description"/>
<xsl:apply-templates select="dc:subject"/>
<xsl:apply-templates select="meta:keyword"/>
<xsl:apply-templates select="dc:creator"/>
</xsl:template>
<xsl:template match="/office:document/office:body">
<xsl:apply-templates select=".//text:p" />
<xsl:apply-templates select=".//text:h" />
<xsl:apply-templates select=".//text:s" />
<xsl:apply-templates select=".//text:line-break" />
<xsl:apply-templates select=".//text:tab" />
</xsl:template>
<xsl:template match="dc:title">
<title> <xsl:value-of select="."/> </title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="meta:keyword">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="office:body//text:p">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="office:body//text:h">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="office:body//text:s">
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="office:body//text:line-break">
<br />
</xsl:template>
<xsl:template match="office:body//text:tab">
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,67 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
exclude-result-prefixes="office xlink meta ooo dc"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/office:document-meta">
<xsl:apply-templates select="office:meta/dc:description"/>
<xsl:apply-templates select="office:meta/dc:subject"/>
<xsl:apply-templates select="office:meta/dc:title"/>
<xsl:apply-templates select="office:meta/meta:keyword"/>
<xsl:apply-templates select="office:meta/dc:creator"/>
</xsl:template>
<xsl:template match="dc:title">
<title> <xsl:value-of select="."/> </title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="meta:keyword">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,49 @@
<?xml version="1.0"?>
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<!-- <xsl:output method="text"/> -->
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="cp:coreProperties">
<xsl:text>&#10;</xsl:text>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<xsl:text>&#10;</xsl:text>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">
<!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec
le meme nom que dans le xml (si on devenait dc-natif) -->
<xsl:text>author</xsl:text>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="dcterms:modified">
<meta>
<xsl:attribute name="name">
<xsl:text>date</xsl:text>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="*">
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,27 @@
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
xmlns:w10="urn:schemas-microsoft-com:office:word"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml">
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="/">
<div>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="w:p">
<p>
<xsl:value-of select="."/>
</p>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,19 @@
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="/">
<div>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="x:t">
<p>
<xsl:value-of select="."/>
</p>
</xsl:template>
</xsl:stylesheet>

View File

@ -12,10 +12,9 @@ import fnmatch
import rclexecm import rclexecm
try: try:
import pylzma
from py7zlib import Archive7z from py7zlib import Archive7z
except: except:
print("RECFILTERROR HELPERNOTFOUND python:pylzma") print("RECFILTERROR HELPERNOTFOUND python:py7zlib")
sys.exit(1); sys.exit(1);
try: try:

View File

@ -15,12 +15,10 @@ if PY3:
from urllib.parse import unquote as urllib_unquote from urllib.parse import unquote as urllib_unquote
from urllib.parse import urlparse as urlparse_urlparse from urllib.parse import urlparse as urlparse_urlparse
from html.parser import HTMLParser from html.parser import HTMLParser
chmpackname = 'pychm3.egg'
else: else:
from urlparse import urlparse as urlparse_urlparse from urlparse import urlparse as urlparse_urlparse
from urllib import unquote as urllib_unquote from urllib import unquote as urllib_unquote
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
chmpackname = 'pychm2.egg'
import subprocess import subprocess
@ -28,12 +26,12 @@ import rclconfig
import rclexecm import rclexecm
# pychm has no official port to Python3, hence no package in the # pychm has no official port to Python3, hence no package in the
# standard place. Recoll bundles a python3 port which we install out # standard place. Linux Recoll bundles a python3 port which is identical
# of the standard python places. Look for it: # to pychm, but named recollchm to avoid conflicts because it is installed
# sys.path[0] is for MSW, where we install the egg in the filters # as a normal python package (in /usr/lib/pythonxx/dist-packages,
# directory? TBD for now # not recoll/filters.). No such issues on Windows
try: try:
# First try the system version if any # First try the system (or recoll-local on Windows) version if any
from chm import chm,chmlib from chm import chm,chmlib
except: except:
try: try:

View File

@ -61,7 +61,7 @@ class Executor(RclBaseHandler):
return True, postproc.wrapData() return True, postproc.wrapData()
else: else:
try: try:
fullcmd = cmd + [filename] fullcmd = cmd + [rclexecm.subprocfile(filename)]
proc = subprocess.Popen(fullcmd, proc = subprocess.Popen(fullcmd,
stdout = subprocess.PIPE) stdout = subprocess.PIPE)
stdout = proc.stdout stdout = proc.stdout

View File

@ -29,13 +29,25 @@ import shutil
import getopt import getopt
import rclconfig import rclconfig
PY3 = sys.version > '3' PY3 = (sys.version > '3')
_mswindows = (sys.platform == "win32")
def makebytes(data): def makebytes(data):
if type(data) == type(u''): if type(data) == type(u''):
return data.encode("UTF-8") return data.encode("UTF-8")
return data return data
def subprocfile(fn):
# On Windows PY3 the list2cmdline() method in subprocess assumes that
# all args are str, and we receive file names as UTF-8. So we need
# to convert.
# On Unix all list elements get converted to bytes in the C
# _posixsubprocess module, nothing to do
if PY3 and _mswindows:
return fn.decode('UTF-8')
else:
return fn
my_config = rclconfig.RclConfig() my_config = rclconfig.RclConfig()
############################################ ############################################
@ -77,7 +89,10 @@ class RclExecM:
self.errfout = sys.stderr self.errfout = sys.stderr
def rclog(self, s, doexit = 0, exitvalue = 1): def rclog(self, s, doexit = 0, exitvalue = 1):
print("RCLMFILT: %s: %s" % (self.myname, s), file=self.errfout) # On windows, and I think that it changed quite recently (Qt change?)
# we get stdout as stderr. So don't write at all
if sys.platform != "win32":
print("RCLMFILT: %s: %s" % (self.myname, s), file=self.errfout)
if doexit: if doexit:
sys.exit(exitvalue) sys.exit(exitvalue)

View File

@ -140,7 +140,7 @@ sub readparam {
# JFD: replaced the "use" call with a runtime load with error checking, # JFD: replaced the "use" call with a runtime load with error checking,
# for compat with the missing filter detection code. # for compat with the missing filter detection code.
#use Image::ExifTool qw(:Public); use Image::ExifTool qw(:Public);
eval {require Image::ExifTool; Image::ExifTool->import(qw(:Public));}; eval {require Image::ExifTool; Image::ExifTool->import(qw(:Public));};
if ($@) { if ($@) {
print "RECFILTERROR HELPERNOTFOUND Perl::Image::ExifTool\n"; print "RECFILTERROR HELPERNOTFOUND Perl::Image::ExifTool\n";

View File

@ -479,7 +479,8 @@ class PDFExtractor:
print("RECFILTERROR HELPERNOTFOUND pdftotext") print("RECFILTERROR HELPERNOTFOUND pdftotext")
sys.exit(1); sys.exit(1);
self.filename = params["filename:"] self.filename = rclexecm.subprocfile(params["filename:"])
#self.em.rclog("openfile: [%s]" % self.filename) #self.em.rclog("openfile: [%s]" % self.filename)
self.currentindex = -1 self.currentindex = -1
self.attextractdone = False self.attextractdone = False

View File

@ -42,7 +42,8 @@ outdir = sys.argv[3]
try: try:
cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile] cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile]
subprocess.check_output(cmd, stderr = subprocess.PIPE) subprocess.check_output(cmd, stderr = subprocess.PIPE)
outputname = glob.glob(os.path.join(outdir, "*")) # Don't use os.path.join, we always want to use '/'
outputname = glob.glob(outdir + "/*")
# There should be only one file in there.. # There should be only one file in there..
print(outputname[0]) print(outputname[0])
except Exception as err: except Exception as err:

76
src/filters/svg.xsl Executable file
View File

@ -0,0 +1,76 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns:dc="http://purl.org/dc/elements/1.1/"
exclude-result-prefixes="svg"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:apply-templates select="svg:svg/svg:title"/>
<xsl:apply-templates select="svg:svg/svg:desc"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
</head>
<body>
<xsl:apply-templates select="//svg:text"/>
</body>
</html>
</xsl:template>
<xsl:template match="svg:desc">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">description</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="svg:title">
<title><xsl:value-of select="."/></title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="svg:text">
<p><xsl:value-of select="."/></p><xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>

34
src/filters/xml.xsl Executable file
View File

@ -0,0 +1,34 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:if test="//*[local-name() = 'title']">
<title>
<xsl:value-of select="//*[local-name() = 'title'][1]"/>
</title>
</xsl:if>
</head>
<body>
<xsl:apply-templates/>
</body>
</html>
</xsl:template>
<xsl:template match="text()">
<xsl:if test="string-length(normalize-space(.)) &gt; 0">
<p><xsl:value-of select="."/></p>
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="*">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>

View File

@ -29,17 +29,15 @@
#include <iostream> #include <iostream>
#ifndef NO_NAMESPACES
using namespace std; using namespace std;
#endif /* NO_NAMESPACES */
bool MimeHandlerHtml::set_document_file_impl(const string& mt, const string &fn) bool MimeHandlerHtml::set_document_file_impl(const string& mt, const string &fn)
{ {
LOGDEB0("textHtmlToDoc: " << fn << "\n"); LOGDEB0("textHtmlToDoc: " << fn << "\n");
string otext; string otext;
if (!file_to_string(fn, otext)) { string reason;
LOGINFO("textHtmlToDoc: cant read: " << fn << "\n"); if (!file_to_string(fn, otext, &reason)) {
LOGERR("textHtmlToDoc: cant read: " << fn << ": " << reason << "\n");
return false; return false;
} }
m_filename = fn; m_filename = fn;

View File

@ -38,9 +38,7 @@ public:
MimeHandlerMail(RclConfig *cnf, const std::string &id); MimeHandlerMail(RclConfig *cnf, const std::string &id);
virtual ~MimeHandlerMail(); virtual ~MimeHandlerMail();
virtual bool is_data_input_ok(DataInput input) const { virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING) return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
return true;
return false;
} }
virtual bool next_document(); virtual bool next_document();
virtual bool skip_to_document(const std::string& ipath); virtual bool skip_to_document(const std::string& ipath);

316
src/internfile/mh_xslt.cpp Normal file
View File

@ -0,0 +1,316 @@
/* Copyright (C) 2005 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxslt/transform.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/xsltutils.h>
#include "cstr.h"
#include "mh_xslt.h"
#include "log.h"
#include "smallut.h"
#include "md5ut.h"
#include "rclconfig.h"
#include "readfile.h"
using namespace std;
// Do we need this? It would need to be called from recollinit
// Call once, not reentrant
// xmlInitParser();
// LIBXML_TEST_VERSION;
// Probably not: xmlCleanupParser();
class FileScanXML : public FileScanDo {
public:
FileScanXML(const string& fn) : m_fn(fn) {}
virtual ~FileScanXML() {
if (ctxt) {
xmlFreeParserCtxt(ctxt);
}
}
xmlDocPtr getDoc() {
int ret;
if ((ret = xmlParseChunk(ctxt, nullptr, 0, 1))) {
xmlError *error = xmlGetLastError();
LOGERR("FileScanXML: final xmlParseChunk failed with error " <<
ret << " error: " <<
(error ? error->message :
" null return from xmlGetLastError()") << "\n");
return nullptr;
}
return ctxt->myDoc;
}
virtual bool init(int64_t size, string *) {
LOGDEB1("FileScanXML: init: size " << size << endl);
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, m_fn.c_str());
if (ctxt == nullptr) {
LOGERR("FileScanXML: xmlCreatePushParserCtxt failed\n");
return false;
} else {
return true;
}
}
virtual bool data(const char *buf, int cnt, string*) {
if (0) {
string dt(buf, cnt);
LOGDEB1("FileScanXML: data: cnt " << cnt << " data " << dt << endl);
} else {
LOGDEB1("FileScanXML: data: cnt " << cnt << endl);
}
int ret;
if ((ret = xmlParseChunk(ctxt, buf, cnt, 0))) {
xmlError *error = xmlGetLastError();
LOGERR("FileScanXML: xmlParseChunk failed with error " <<
ret << " for [" << buf << "] error " <<
(error ? error->message :
" null return from xmlGetLastError()") << "\n");
return false;
} else {
LOGDEB1("xmlParseChunk ok (sent " << cnt << " bytes)\n");
return true;
}
}
private:
xmlParserCtxtPtr ctxt{nullptr};
string m_fn;
};
class MimeHandlerXslt::Internal {
public:
Internal(MimeHandlerXslt *_p)
: p(_p) {}
~Internal() {
if (metaOrAllSS) {
xsltFreeStylesheet(metaOrAllSS);
}
if (bodySS) {
xsltFreeStylesheet(bodySS);
}
}
xsltStylesheet *prepare_stylesheet(const string& ssnm);
bool process_doc_or_string(bool forpv, const string& fn, const string& data);
bool apply_stylesheet(
const string& fn, const string& member, const string& data,
xsltStylesheet *ssp, string& result, string *md5p);
MimeHandlerXslt *p;
bool ok{false};
string metamember;
xsltStylesheet *metaOrAllSS{nullptr};
string bodymember;
xsltStylesheet *bodySS{nullptr};
string result;
string filtersdir;
};
MimeHandlerXslt::~MimeHandlerXslt()
{
delete m;
}
MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
const std::vector<std::string>& params)
: RecollFilter(cnf, id), m(new Internal(this))
{
LOGDEB("MimeHandlerXslt: params: " << stringsToString(params) << endl);
m->filtersdir = path_cat(cnf->getDatadir(), "filters");
xmlSubstituteEntitiesDefault(0);
xmlLoadExtDtdDefaultValue = 0;
// params can be "xslt stylesheetall" or
// "xslt metamember metastylesheet bodymember bodystylesheet"
if (params.size() == 2) {
m->metaOrAllSS = m->prepare_stylesheet(params[1]);
if (m->metaOrAllSS) {
m->ok = true;
}
} else if (params.size() == 5) {
m->metamember = params[1];
m->metaOrAllSS = m->prepare_stylesheet(params[2]);
m->bodymember = params[3];
m->bodySS = m->prepare_stylesheet(params[4]);
if (m->metaOrAllSS && m->bodySS) {
m->ok = true;
}
} else {
LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
stringsToString(params) << endl);
}
}
xsltStylesheet *MimeHandlerXslt::Internal::prepare_stylesheet(const string& ssnm)
{
string ssfn = path_cat(filtersdir, ssnm);
FileScanXML XMLstyle(ssfn);
string reason;
if (!file_scan(ssfn, &XMLstyle, &reason)) {
LOGERR("MimeHandlerXslt: file_scan failed for style sheet " <<
ssfn << " : " << reason << endl);
return nullptr;
}
xmlDoc *stl = XMLstyle.getDoc();
if (stl == nullptr) {
LOGERR("MimeHandlerXslt: getDoc failed for style sheet " <<
ssfn << endl);
return nullptr;
}
return xsltParseStylesheetDoc(stl);
}
bool MimeHandlerXslt::Internal::apply_stylesheet(
const string& fn, const string& member, const string& data,
xsltStylesheet *ssp, string& result, string *md5p)
{
FileScanXML XMLdoc(fn);
string md5, reason;
bool res;
if (!fn.empty()) {
if (member.empty()) {
res = file_scan(fn, &XMLdoc, 0, -1, &reason, md5p);
} else {
res = file_scan(fn, member, &XMLdoc, &reason);
}
} else {
if (member.empty()) {
res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p);
} else {
res = string_scan(data.c_str(), data.size(), member, &XMLdoc,
&reason);
}
}
if (!res) {
LOGERR("MimeHandlerXslt::set_document_: file_scan failed for "<<
fn << " " << member << " : " << reason << endl);
return false;
}
xmlDocPtr doc = XMLdoc.getDoc();
if (nullptr == doc) {
LOGERR("MimeHandlerXslt::set_document_: no parsed doc\n");
return false;
}
xmlDocPtr transformed = xsltApplyStylesheet(ssp, doc, NULL);
if (nullptr == transformed) {
LOGERR("MimeHandlerXslt::set_document_: xslt transform failed\n");
xmlFreeDoc(doc);
return false;
}
xmlChar *outstr;
int outlen;
xsltSaveResultToString(&outstr, &outlen, transformed, metaOrAllSS);
result = string((const char*)outstr, outlen);
xmlFree(outstr);
xmlFreeDoc(transformed);
xmlFreeDoc(doc);
return true;
}
bool MimeHandlerXslt::Internal::process_doc_or_string(
bool forpreview, const string& fn, const string& data)
{
if (nullptr == metaOrAllSS && nullptr == bodySS) {
LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
return false;
}
p->m_metaData[cstr_dj_keycharset] = cstr_utf8;
if (nullptr == bodySS) {
string md5;
if (apply_stylesheet(fn, string(), data, metaOrAllSS, result,
forpreview ? nullptr : &md5)) {
if (!forpreview) {
p->m_metaData[cstr_dj_keymd5] = md5;
}
return true;
}
return false;
} else {
result = "<html>\n<head>\n<meta http-equiv=\"Content-Type\""
"content=\"text/html; charset=UTF-8\">";
string part;
if (!apply_stylesheet(fn,metamember, data, metaOrAllSS, part, nullptr)) {
return false;
}
result += part;
result += "</head>\n<body>\n";
if (!apply_stylesheet(fn, bodymember, data, bodySS, part, nullptr)) {
return false;
}
result += part;
result += "</body></html>";
}
return true;
}
bool MimeHandlerXslt::set_document_file_impl(const std::string& mt,
const std::string &fn)
{
LOGDEB0("MimeHandlerXslt::set_document_file_: fn: " << fn << endl);
if (!m || !m->ok) {
return false;
}
bool ret = m->process_doc_or_string(m_forPreview, fn, string());
if (ret) {
m_havedoc = true;
}
return ret;
}
bool MimeHandlerXslt::set_document_string_impl(const string& mt,
const string& txt)
{
LOGDEB0("MimeHandlerXslt::set_document_string_\n");
if (!m || !m->ok) {
return false;
}
bool ret = m->process_doc_or_string(m_forPreview, string(), txt);
if (ret) {
m_havedoc = true;
}
return ret;
}
bool MimeHandlerXslt::next_document()
{
if (!m || !m->ok) {
return false;
}
if (m_havedoc == false)
return false;
m_havedoc = false;
m_metaData[cstr_dj_keymt] = cstr_texthtml;
m_metaData[cstr_dj_keycontent].swap(m->result);
LOGDEB1("MimeHandlerXslt::next_document: result: [" <<
m_metaData[cstr_dj_keycontent] << "]\n");
return true;
}
void MimeHandlerXslt::clear_impl()
{
m_havedoc = false;
m->result.clear();
}

49
src/internfile/mh_xslt.h Normal file
View File

@ -0,0 +1,49 @@
/* Copyright (C) 2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _MH_XSLT_H_INCLUDED_
#define _MH_XSLT_H_INCLUDED_
#include <string>
#include "mimehandler.h"
class MimeHandlerXslt : public RecollFilter {
public:
MimeHandlerXslt(RclConfig *cnf, const std::string& id,
const std::vector<std::string>& params);
virtual ~MimeHandlerXslt();
virtual bool next_document() override;
virtual void clear_impl() override;
virtual bool is_data_input_ok(DataInput input) const override {
return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
}
protected:
virtual bool set_document_file_impl(const std::string& mt,
const std::string& file_path);
virtual bool set_document_string_impl(const std::string& mt,
const std::string& data);
class Internal;
private:
Internal *m{nullptr};
};
#endif /* _MH_XSLT_H_INCLUDED_ */

View File

@ -41,6 +41,7 @@ using namespace std;
#include "mh_symlink.h" #include "mh_symlink.h"
#include "mh_unknown.h" #include "mh_unknown.h"
#include "mh_null.h" #include "mh_null.h"
#include "mh_xslt.h"
// Performance help: we use a pool of already known and created // Performance help: we use a pool of already known and created
// handlers. There can be several instances for a given mime type // handlers. There can be several instances for a given mime type
@ -137,11 +138,17 @@ void clearMimeHandlerCache()
/** For mime types set as "internal" in mimeconf: /** For mime types set as "internal" in mimeconf:
* create appropriate handler object. */ * create appropriate handler object. */
static RecollFilter *mhFactory(RclConfig *config, const string &mime, static RecollFilter *mhFactory(RclConfig *config, const string &mimeOrParams,
bool nobuild, string& id) bool nobuild, string& id)
{ {
LOGDEB2("mhFactory(" << mime << ")\n"); LOGDEB1("mhFactory(" << mimeOrParams << ")\n");
string lmime(mime); vector<string> lparams;
stringToStrings(mimeOrParams, lparams);
if (lparams.empty()) {
// ??
return nullptr;
}
string lmime(lparams[0]);
stringtolower(lmime); stringtolower(lmime);
if (cstr_textplain == lmime) { if (cstr_textplain == lmime) {
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText\n"); LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText\n");
@ -160,11 +167,11 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
MD5String("MimeHandlerMail", id); MD5String("MimeHandlerMail", id);
return nobuild ? 0 : new MimeHandlerMail(config, id); return nobuild ? 0 : new MimeHandlerMail(config, id);
} else if ("inode/symlink" == lmime) { } else if ("inode/symlink" == lmime) {
LOGDEB2("mhFactory(" << mime << "): ret MimeHandlerSymlink\n"); LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerSymlink\n");
MD5String("MimeHandlerSymlink", id); MD5String("MimeHandlerSymlink", id);
return nobuild ? 0 : new MimeHandlerSymlink(config, id); return nobuild ? 0 : new MimeHandlerSymlink(config, id);
} else if ("application/x-zerosize" == lmime) { } else if ("application/x-zerosize" == lmime) {
LOGDEB("mhFactory(" << mime << "): ret MimeHandlerNull\n"); LOGDEB("mhFactory(" << lmime << "): returning MimeHandlerNull\n");
MD5String("MimeHandlerNull", id); MD5String("MimeHandlerNull", id);
return nobuild ? 0 : new MimeHandlerNull(config, id); return nobuild ? 0 : new MimeHandlerNull(config, id);
} else if (lmime.find("text/") == 0) { } else if (lmime.find("text/") == 0) {
@ -176,6 +183,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText(x)\n"); LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText(x)\n");
MD5String("MimeHandlerText", id); MD5String("MimeHandlerText", id);
return nobuild ? 0 : new MimeHandlerText(config, id); return nobuild ? 0 : new MimeHandlerText(config, id);
} else if ("xsltproc" == lmime) {
// XML Types processed with one or several xslt style sheets.
MD5String(mimeOrParams, id);
return nobuild ? 0 : new MimeHandlerXslt(config, id, lparams);
} else { } else {
// We should not get there. It means that "internal" was set // We should not get there. It means that "internal" was set
// as a handler in mimeconf for a mime type we actually can't // as a handler in mimeconf for a mime type we actually can't
@ -262,7 +273,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
/* Get handler/filter object for given mime type: */ /* Get handler/filter object for given mime type: */
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg, RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
bool filtertypes) bool filtertypes)
{ {
LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " << LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
filtertypes << "\n"); filtertypes << "\n");
@ -291,7 +302,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
} }
bool internal = !stringlowercmp("internal", handlertype); bool internal = !stringlowercmp("internal", handlertype);
if (internal) { if (internal) {
// For internal types let the factory compute the id // For internal types let the factory compute the cache id
mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id); mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id);
} else { } else {
// exec/execm: use the md5 of the def line // exec/execm: use the md5 of the def line
@ -304,16 +315,15 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
goto out; goto out;
LOGDEB2("getMimeHandler: " << mtype << " not in cache\n"); LOGDEB2("getMimeHandler: " << mtype << " not in cache\n");
// Not in cache.
if (internal) { if (internal) {
// If there is a parameter after "internal" it's the mime // If there is a parameter after "internal" it's the mime
// type to use. This is so that we can have bogus mime // type to use, or the further qualifier (e.g. style sheet
// types like text/x-purple-html-log (for ie: specific // name for xslt types). This is so that we can have bogus
// icon) and still use the html filter on them. This is // mime types like text/x-purple-html-log (for ie:
// partly redundant with the localfields/rclaptg, but // specific icon) and still use the html filter on
// better and the latter will probably go away at some // them. This is partly redundant with the
// point in the future. // localfields/rclaptg, but better? (and the latter will
// probably go away at some point in the future?).
LOGDEB2("handlertype internal, cmdstr [" << cmdstr << "]\n"); LOGDEB2("handlertype internal, cmdstr [" << cmdstr << "]\n");
h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id); h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
goto out; goto out;
@ -336,14 +346,10 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
goto out; goto out;
} }
} }
} } else {
// No identified mime type, or no handler associated.
// We get here if there was no specific error, but there is no // Unhandled files are either ignored or their name and
// identified mime type, or no handler associated. // generic metadata is indexed, depending on configuration
// Finally, unhandled files are either ignored or their name and
// generic metadata is indexed, depending on configuration
{
bool indexunknown = false; bool indexunknown = false;
cfg->getConfParam("indexallfilenames", &indexunknown); cfg->getConfParam("indexallfilenames", &indexunknown);
if (indexunknown) { if (indexunknown) {

View File

@ -118,15 +118,15 @@ bool Uncomp::uncompressfile(const string& ifn,
ExecCmd ex; ExecCmd ex;
int status = ex.doexec(cmd, args, 0, &tfile); int status = ex.doexec(cmd, args, 0, &tfile);
if (status || tfile.empty()) { if (status || tfile.empty()) {
LOGERR("uncompressfile: doexec: failed for [" << ifn << "] status 0x" << LOGERR("uncompressfile: doexec: " << cmd << " " <<
status << "\n"); stringsToString(args) << " failed for [" <<
ifn << "] status 0x" << status << "\n");
if (!m_dir->wipe()) { if (!m_dir->wipe()) {
LOGERR("uncompressfile: wipedir failed\n"); LOGERR("uncompressfile: wipedir failed\n");
} }
return false; return false;
} }
if (tfile[tfile.length() - 1] == '\n') rtrimstring(tfile, "\n\r");
tfile.erase(tfile.length() - 1, 1);
m_tfile = tfile; m_tfile = tfile;
m_srcpath = ifn; m_srcpath = ifn;
return true; return true;

View File

@ -12,7 +12,7 @@ import conftree
class RclDynConf: class RclDynConf:
def __init__(self, fname): def __init__(self, fname):
self.data = ConfSimple(fname) self.data = conftree.ConfSimple(fname)
def getStringList(self, sk): def getStringList(self, sk):
nms = self.data.getNames(sk) nms = self.data.getNames(sk)
@ -95,6 +95,6 @@ class RclExtraDbs:
if __name__ == '__main__': if __name__ == '__main__':
config = RclConfig() config = RclConfig()
print(config.getConfParam("topdirs")) print("topdirs = %s" % config.getConfParam("topdirs"))
extradbs = RclExtraDbs(config) extradbs = RclExtraDbs(config)
print(extradbs.getActDbs()) print(extradbs.getActDbs())

View File

@ -341,7 +341,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
// If using an actual file, check that it exists, and if it is // If using an actual file, check that it exists, and if it is
// compressed, we may need an uncompressed version // compressed, we may need an uncompressed version
if (!fn.empty() && theconfig->mimeViewerNeedsUncomp(doc.mimetype)) { if (!fn.empty() && theconfig->mimeViewerNeedsUncomp(doc.mimetype)) {
if (access(fn.c_str(), R_OK) != 0) { if (!path_readable(fn)) {
QMessageBox::warning(0, "Recoll", QMessageBox::warning(0, "Recoll",
tr("Can't access file: ") + u8s2qs(fn)); tr("Can't access file: ") + u8s2qs(fn));
return; return;
@ -445,9 +445,13 @@ void RclMain::execViewer(const map<string, string>& subs, bool enterHistory,
#endif #endif
QStatusBar *stb = statusBar(); QStatusBar *stb = statusBar();
if (stb) { if (stb) {
string fcharset = theconfig->getDefCharset(true);
string prcmd; string prcmd;
#ifdef _WIN32
prcmd = ncmd;
#else
string fcharset = theconfig->getDefCharset(true);
transcode(ncmd, prcmd, fcharset, "UTF-8"); transcode(ncmd, prcmd, fcharset, "UTF-8");
#endif
QString msg = tr("Executing: [") + QString msg = tr("Executing: [") +
QString::fromUtf8(prcmd.c_str()) + "]"; QString::fromUtf8(prcmd.c_str()) + "]";
stb->showMessage(msg, 10000); stb->showMessage(msg, 10000);

View File

@ -375,7 +375,9 @@ QVariant RecollModel::data(const QModelIndex& index, int role) const
string data = m_getters[index.column()](colname, doc); string data = m_getters[index.column()](colname, doc);
#ifndef _WIN32
// Special case url, because it may not be utf-8. URL-encode in this case. // Special case url, because it may not be utf-8. URL-encode in this case.
// Not on windows, where we always read the paths as Unicode.
if (!colname.compare("url")) { if (!colname.compare("url")) {
int ecnt; int ecnt;
string data1; string data1;
@ -383,6 +385,7 @@ QVariant RecollModel::data(const QModelIndex& index, int role) const
data = url_encode(data); data = url_encode(data);
} }
} }
#endif
list<string> lr; list<string> lr;
g_hiliter.plaintorich(data, lr, m_hdata); g_hiliter.plaintorich(data, lr, m_hdata);

View File

@ -257,9 +257,8 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
Xapian::DB_CREATE_OR_OVERWRITE; Xapian::DB_CREATE_OR_OVERWRITE;
#ifdef _WIN32 #ifdef _WIN32
// Xapian is quite bad at erasing partial db which can // On Windows, Xapian is quite bad at erasing partial db which can
// occur because of open file deletion errors on // occur because of open file deletion errors.
// Windows.
if (mode == DbTrunc) { if (mode == DbTrunc) {
if (path_exists(path_cat(dir, "iamchert"))) { if (path_exists(path_cat(dir, "iamchert"))) {
wipedir(dir); wipedir(dir);
@ -268,9 +267,21 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
} }
#endif #endif
if (::access(dir.c_str(), 0) == 0) { if (path_exists(dir)) {
// Existing index // Existing index.
xwdb = Xapian::WritableDatabase(dir, action); xwdb = Xapian::WritableDatabase(dir, action);
if (action == Xapian::DB_CREATE_OR_OVERWRITE ||
xwdb.get_doccount() == 0) {
// New or empty index. Set the "store text" option
// according to configuration. The metadata record will be
// written further down.
m_storetext = o_index_storedoctext;
LOGDEB("Db:: index " << (m_storetext?"stores":"does not store") <<
" document text\n");
} else {
// Existing non empty. Get the option from the index.
storesDocText(xwdb);
}
} else { } else {
// New index. If possible, and depending on config, use a stub // New index. If possible, and depending on config, use a stub
// to force using Chert. No sense in doing this if we are // to force using Chert. No sense in doing this if we are
@ -301,23 +312,22 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
LOGINF("Rcl::Db::openWrite: new index will " << (m_storetext?"":"not ") LOGINF("Rcl::Db::openWrite: new index will " << (m_storetext?"":"not ")
<< "store document text\n"); << "store document text\n");
#else #else
// Old Xapian (chert only) or newer (no chert). Use the // Old Xapian (chert only) or much newer (no chert). Use the
// default index backend and let the user decide of the // default index backend and let the user decide of the
// abstract generation method. The configured default is to // abstract generation method. The configured default is to
// store the text. // store the text.
xwdb = Xapian::WritableDatabase(dir, action); xwdb = Xapian::WritableDatabase(dir, action);
m_storetext = o_index_storedoctext; m_storetext = o_index_storedoctext;
#endif #endif
// Set the storetext value inside the index descriptor (new
// with recoll 1.24, maybe we'll have other stuff to store in
// there in the future).
string desc = string("storetext=") + (m_storetext ? "1" : "0") + "\n";
xwdb.set_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY, desc);
} }
// If the index is empty, write the data format version at once // If the index is empty, write the data format version,
// to avoid stupid error messages: // and the storetext option value inside the index descriptor (new
// with recoll 1.24, maybe we'll have other stuff to store in
// there in the future).
if (xwdb.get_doccount() == 0) { if (xwdb.get_doccount() == 0) {
string desc = string("storetext=") + (m_storetext ? "1" : "0") + "\n";
xwdb.set_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY, desc);
xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION); xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION);
} }
@ -328,21 +338,26 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
#endif #endif
} }
void Db::Native::openRead(const string& dir) void Db::Native::storesDocText(Xapian::Database& db)
{ {
m_iswritable = false; string desc = db.get_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY);
xrdb = Xapian::Database(dir);
string desc = xrdb.get_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY);
ConfSimple cf(desc, 1); ConfSimple cf(desc, 1);
string val; string val;
m_storetext = false; m_storetext = false;
if (cf.get("storetext", val) && stringToBool(val)) { if (cf.get("storetext", val) && stringToBool(val)) {
m_storetext = true; m_storetext = true;
} }
LOGDEB("Db::openRead: index " << (m_storetext?"stores":"does not store") << LOGDEB("Db:: index " << (m_storetext?"stores":"does not store") <<
" document text\n"); " document text\n");
} }
void Db::Native::openRead(const string& dir)
{
m_iswritable = false;
xrdb = Xapian::Database(dir);
storesDocText(xrdb);
}
/* See comment in class declaration: return all subdocuments of a /* See comment in class declaration: return all subdocuments of a
* document given by its unique id. */ * document given by its unique id. */
bool Db::Native::subDocs(const string &udi, int idxi, bool Db::Native::subDocs(const string &udi, int idxi,

View File

@ -106,6 +106,10 @@ class Db::Native {
void openWrite(const std::string& dir, Db::OpenMode mode); void openWrite(const std::string& dir, Db::OpenMode mode);
void openRead(const string& dir); void openRead(const string& dir);
// Determine if an existing index is of the full-text-storing kind
// by looking at the index metadata. Stores the result in m_storetext
void storesDocText(Xapian::Database&);
// Final steps of doc update, part which need to be single-threaded // Final steps of doc update, part which need to be single-threaded
bool addOrUpdateWrite(const string& udi, const string& uniterm, bool addOrUpdateWrite(const string& udi, const string& uniterm,
Xapian::Document *doc, size_t txtlen Xapian::Document *doc, size_t txtlen

View File

@ -79,38 +79,38 @@ application/postscript = exec rclps
application/sql = internal text/plain application/sql = internal text/plain
application/vnd.ms-excel = execm rclxls.py application/vnd.ms-excel = execm rclxls.py
application/vnd.ms-powerpoint = execm rclppt.py application/vnd.ms-powerpoint = execm rclppt.py
application/vnd.oasis.opendocument.text = execm rclsoff.py application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = execm rclsoff.py application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = execm rclsoff.py application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = execm rclsoff.py application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = execm rclsoff.py application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = execm rclsoff-flat.py application/vnd.oasis.opendocument.presentation-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = execm rclsoff-flat.py application/vnd.oasis.opendocument.text-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = execm rclsoff-flat.py application/vnd.oasis.opendocument.spreadsheet-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
execm rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
execm rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \ application/vnd.openxmlformats-officedocument.presentationml.template = \
execm rclopxml.py execm rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \ application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm rclopxml.py execm rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
execm rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
execm rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.sun.xml.calc = execm rclsoff.py application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = execm rclsoff.py application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = execm rclsoff.py application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = execm rclsoff.py application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = execm rclsoff.py application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = execm rclsoff.py application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.math = execm rclsoff.py application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = execm rclsoff.py application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = execm rclsoff.py application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = execm rclsoff.py application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.wordperfect = exec wpd2html;mimetype=text/html application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-abiword = execm rclabw.py application/x-abiword = internal xsltproc abiword.xsl
application/x-awk = internal text/plain application/x-awk = internal text/plain
application/x-chm = execm rclchm application/x-chm = execm rclchm
application/x-dia-diagram = execm rcldia;mimetype=text/plain application/x-dia-diagram = execm rcldia;mimetype=text/plain
@ -118,12 +118,12 @@ application/x-dvi = exec rcldvi
application/x-flac = execm rclaudio application/x-flac = execm rclaudio
application/x-gnote = execm rclxml.py application/x-gnote = execm rclxml.py
application/x-gnuinfo = execm rclinfo application/x-gnuinfo = execm rclinfo
application/x-gnumeric = execm rclgnm.py application/x-gnumeric = internal xsltproc gnumeric.xsl
application/x-kword = exec rclkwd application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx application/x-lyx = exec rcllyx
application/x-mimehtml = internal message/rfc822 application/x-mimehtml = internal message/rfc822
#application/x-mobipocket-ebook = execm rclmobi #application/x-mobipocket-ebook = execm rclmobi
application/x-okular-notes = execm rclokulnote.py application/x-okular-notes = internal xsltproc okular-note.xsl
application/x-perl = internal text/plain application/x-perl = internal text/plain
# Returned by xdg-mime for .php. Future-proofing # Returned by xdg-mime for .php. Future-proofing
application/x-php = internal text/plain application/x-php = internal text/plain
@ -149,7 +149,7 @@ image/jpeg = execm rclimg
image/png = execm rclimg image/png = execm rclimg
image/tiff = execm rclimg image/tiff = execm rclimg
image/vnd.djvu = execm rcldjvu.py image/vnd.djvu = execm rcldjvu.py
image/svg+xml = execm rclsvg.py image/svg+xml = internal xsltproc svg.xsl
image/x-xcf = execm rclimg image/x-xcf = execm rclimg
image/x-nikon-nef = execm rclimg image/x-nikon-nef = execm rclimg
inode/symlink = internal inode/symlink = internal
@ -168,7 +168,7 @@ text/css = internal text/plain
application/javascript = internal text/plain application/javascript = internal text/plain
text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain
text/x-csv = internal text/plain text/x-csv = internal text/plain
text/x-fictionbook = execm rclfb2.py text/x-fictionbook = internal xsltproc fb2.xsl
text/x-gaim-log = exec rclgaim text/x-gaim-log = exec rclgaim
text/x-html-aptosid-man = exec rclaptosidman text/x-html-aptosid-man = exec rclaptosidman
text/x-lua = internal text/x-lua = internal
@ -190,7 +190,7 @@ text/x-tex = exec rcltex
# All parameter and tag names, attribute values etc, are indexed as # All parameter and tag names, attribute values etc, are indexed as
# text. rclxml.py tries to just index the text content. # text. rclxml.py tries to just index the text content.
#application/xml = execm rclxml.py #application/xml = execm rclxml.py
#text/xml = execm rclxml.py #text/xml = internal xsltproc xml.xsl
application/xml = internal text/plain application/xml = internal text/plain
text/xml = internal text/plain text/xml = internal text/plain

View File

@ -507,6 +507,13 @@ int ConfSimple::eraseKey(const string& sk)
return write(); return write();
} }
int ConfSimple::clear()
{
m_submaps.clear();
m_order.clear();
return write();
}
// Walk the tree, calling user function at each node // Walk the tree, calling user function at each node
ConfSimple::WalkerCode ConfSimple::WalkerCode
ConfSimple::sortwalk(WalkerCode(*walker)(void *, const string&, const string&), ConfSimple::sortwalk(WalkerCode(*walker)(void *, const string&, const string&),
@ -692,6 +699,13 @@ bool ConfSimple::commentsAsXML(ostream& out)
} }
break; break;
} }
case ConfLine::CFL_SK:
out << "<subkey>" << it->m_data << "</subkey>" << endl;
break;
case ConfLine::CFL_VAR:
out << "<varsetting>" << it->m_data << " = " <<
it->m_value << "</varsetting>" << endl;
break;
default: default:
break; break;
} }

View File

@ -159,10 +159,7 @@ public:
void reparse(const std::string& in); void reparse(const std::string& in);
/** Clear all content */ /** Clear all content */
void clear() { int clear();
m_submaps.clear();
m_order.clear();
}
/** /**
* Get string value for named parameter, from specified subsection (looks * Get string value for named parameter, from specified subsection (looks

View File

@ -36,6 +36,7 @@
#include "log.h" #include "log.h"
#include "pathut.h" #include "pathut.h"
#include "fstreewalk.h" #include "fstreewalk.h"
#include "transcode.h"
using namespace std; using namespace std;
@ -310,6 +311,20 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
return FtwOk; return FtwOk;
} }
#ifdef _WIN32
#define DIRENT _wdirent
#define DIRHDL _WDIR
#define OPENDIR _wopendir
#define CLOSEDIR _wclosedir
#define READDIR _wreaddir
#else
#define DIRENT dirent
#define DIRHDL DIR
#define OPENDIR opendir
#define CLOSEDIR closedir
#define READDIR readdir
#endif
// Note that the 'norecurse' flag is handled as part of the directory read. // Note that the 'norecurse' flag is handled as part of the directory read.
// This means that we always go into the top 'walk()' parameter if it is a // This means that we always go into the top 'walk()' parameter if it is a
// directory, even if norecurse is set. Bug or Feature ? // directory, even if norecurse is set. Bug or Feature ?
@ -341,24 +356,25 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
// This is a directory, read it and process entries: // This is a directory, read it and process entries:
#ifndef _WIN32
// Detect if directory already seen. This could just be several // Detect if directory already seen. This could just be several
// symlinks pointing to the same place (if FtwFollow is set), it // symlinks pointing to the same place (if FtwFollow is set), it
// could also be some other kind of cycle. In any case, there is // could also be some other kind of cycle. In any case, there is
// no point in entering again. // no point in entering again.
// For now, we'll ignore the "other kind of cycle" part and only monitor // For now, we'll ignore the "other kind of cycle" part and only monitor
// this is FtwFollow is set // this is FtwFollow is set
#ifndef _WIN32
if (data->options & FtwFollow) { if (data->options & FtwFollow) {
DirId dirid(stp->st_dev, stp->st_ino); DirId dirid(stp->st_dev, stp->st_ino);
if (data->donedirs.find(dirid) != data->donedirs.end()) { if (data->donedirs.find(dirid) != data->donedirs.end()) {
LOGINFO("Not processing [" << (top) << "] (already seen as other path)\n" ); LOGINFO("Not processing [" << top <<
"] (already seen as other path)\n");
return status; return status;
} }
data->donedirs.insert(dirid); data->donedirs.insert(dirid);
} }
#endif #endif
SYSPATH(top, systop);
DIR *d = opendir(top.c_str()); DIRHDL *d = OPENDIR(systop);
if (d == 0) { if (d == 0) {
data->logsyserr("opendir", top); data->logsyserr("opendir", top);
switch (errno) { switch (errno) {
@ -376,42 +392,38 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
} }
} }
struct dirent *ent; struct DIRENT *ent;
while ((ent = readdir(d)) != 0) { while ((ent = READDIR(d)) != 0) {
string fn; string fn;
struct stat st; struct stat st;
#ifdef _WIN32
string sdname;
if (!wchartoutf8(ent->d_name, sdname)) {
LOGERR("wchartoutf8 failed in " << top << endl);
continue;
}
const char *dname = sdname.c_str();
#else
const char *dname = ent->d_name;
#endif
// Maybe skip dotfiles // Maybe skip dotfiles
if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.') if ((data->options & FtwSkipDotFiles) && dname[0] == '.')
continue; continue;
// Skip . and .. // Skip . and ..
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) if (!strcmp(dname, ".") || !strcmp(dname, ".."))
continue; continue;
// Skipped file names match ? // Skipped file names match ?
if (!data->skippedNames.empty()) { if (!data->skippedNames.empty()) {
if (inSkippedNames(ent->d_name)) if (inSkippedNames(dname))
continue; continue;
} }
fn = path_cat(top, dname);
fn = path_cat(top, ent->d_name);
#ifdef _WIN32
// readdir gets the useful attrs, no inode indirection on windows,
// spare the path_fileprops() call, but make sure we mimick it.
memset(&st, 0, sizeof(st));
st.st_mtime = ent->d_mtime;
st.st_size = ent->d_size;
st.st_mode = ent->d_mode;
// ctime is really creation time on Windows. Just use mtime
// for all. We only use ctime on Unix to catch xattr changes
// anyway.
st.st_ctime = st.st_mtime;
#else
int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow); int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow);
if (statret == -1) { if (statret == -1) {
data->logsyserr("stat", fn); data->logsyserr("stat", fn);
continue; continue;
} }
#endif
if (!data->skippedPaths.empty()) { if (!data->skippedPaths.empty()) {
// We do not check the ancestors. This means that you can have // We do not check the ancestors. This means that you can have
@ -461,7 +473,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
out: out:
if (d) if (d)
closedir(d); CLOSEDIR(d);
return status; return status;
} }

View File

@ -20,15 +20,17 @@
#include <string.h> #include <string.h>
#include "md5ut.h" #include "md5ut.h"
#include "readfile.h" #include "readfile.h"
using namespace std; using namespace std;
class FileScanMd5 : public FileScanDo { // Quite incredibly if this class is named FileScanMd5 like the
// different one in readfile.cpp, the vtables get mixed up and mh_xslt
// crashes while calling a virtual function (gcc 6.3 and 7.3)
class FileScanMd5loc : public FileScanDo {
public: public:
FileScanMd5(string& d) : digest(d) {} FileScanMd5loc(string& d) : digest(d) {}
virtual bool init(size_t size, string *) virtual bool init(int64_t, string *)
{ {
MD5Init(&ctx); MD5Init(&ctx);
return true; return true;
@ -44,7 +46,7 @@ public:
bool MD5File(const string& filename, string &digest, string *reason) bool MD5File(const string& filename, string &digest, string *reason)
{ {
FileScanMd5 md5er(digest); FileScanMd5loc md5er(digest);
if (!file_scan(filename, &md5er, reason)) if (!file_scan(filename, &md5er, reason))
return false; return false;
// We happen to know that digest and md5er.digest are the same object // We happen to know that digest and md5er.digest are the same object

7573
src/utils/miniz.cpp Normal file

File diff suppressed because it is too large Load Diff

1325
src/utils/miniz.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -24,13 +24,19 @@
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include <errno.h> #include <errno.h>
#include <dirent.h>
#ifdef _WIN32 #ifdef _WIN32
#include "dirent.h"
#include "safefcntl.h" #include "safefcntl.h"
#include "safeunistd.h" #include "safeunistd.h"
#include "safewindows.h" #include "safewindows.h"
#include "safesysstat.h" #include "safesysstat.h"
#include "transcode.h"
#define STAT _wstat
#define LSTAT _wstat
#define STATBUF _stat
#define ACCESS _waccess
#else // Not windows -> #else // Not windows ->
#include <fcntl.h> #include <fcntl.h>
@ -39,10 +45,13 @@
#include <pwd.h> #include <pwd.h>
#include <sys/file.h> #include <sys/file.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <dirent.h>
#include <sys/statvfs.h> #include <sys/statvfs.h>
#include <sys/types.h> #include <sys/types.h>
#define STAT stat
#define LSTAT lstat
#define STATBUF stat
#define ACCESS access
#endif #endif
#include <cstdlib> #include <cstdlib>
@ -506,8 +515,9 @@ bool path_makepath(const string& ipath, int mode)
bool path_isdir(const string& path) bool path_isdir(const string& path)
{ {
struct stat st; struct STATBUF st;
if (lstat(path.c_str(), &st) < 0) { SYSPATH(path, syspath);
if (LSTAT(syspath, &st) < 0) {
return false; return false;
} }
if (S_ISDIR(st.st_mode)) { if (S_ISDIR(st.st_mode)) {
@ -518,8 +528,9 @@ bool path_isdir(const string& path)
long long path_filesize(const string& path) long long path_filesize(const string& path)
{ {
struct stat st; struct STATBUF st;
if (stat(path.c_str(), &st) < 0) { SYSPATH(path, syspath);
if (STAT(syspath, &st) < 0) {
return -1; return -1;
} }
return (long long)st.st_size; return (long long)st.st_size;
@ -531,8 +542,9 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
return -1; return -1;
} }
memset(stp, 0, sizeof(struct stat)); memset(stp, 0, sizeof(struct stat));
struct stat mst; struct STATBUF mst;
int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst); SYSPATH(path, syspath);
int ret = follow ? STAT(syspath, &mst) : LSTAT(syspath, &mst);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
@ -551,7 +563,13 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
bool path_exists(const string& path) bool path_exists(const string& path)
{ {
return access(path.c_str(), 0) == 0; SYSPATH(path, syspath);
return ACCESS(syspath, 0) == 0;
}
bool path_readable(const string& path)
{
SYSPATH(path, syspath);
return ACCESS(syspath, R_OK) == 0;
} }
// Allowed punctuation in the path part of an URI according to RFC2396 // Allowed punctuation in the path part of an URI according to RFC2396

View File

@ -87,10 +87,20 @@ extern int path_fileprops(const std::string path, struct stat *stp,
/// Returns true if last elt could be checked to exist. False may mean that /// Returns true if last elt could be checked to exist. False may mean that
/// the file/dir does not exist or that an error occurred. /// the file/dir does not exist or that an error occurred.
extern bool path_exists(const std::string& path); extern bool path_exists(const std::string& path);
/// Same but must be readable
extern bool path_readable(const std::string& path);
/// Return separator for PATH environment variable /// Return separator for PATH environment variable
extern std::string path_PATHsep(); extern std::string path_PATHsep();
#ifdef _WIN32
#define SYSPATH(PATH, SPATH) wchar_t PATH ## _buf[2048]; \
utf8towchar(PATH, PATH ## _buf, 2048); \
wchar_t *SPATH = PATH ## _buf;
#else
#define SYSPATH(PATH, SPATH) const char *SPATH = PATH.c_str()
#endif
/// Dump directory /// Dump directory
extern bool readdir(const std::string& dir, std::string& reason, extern bool readdir(const std::string& dir, std::string& reason,
std::set<std::string>& entries); std::set<std::string>& entries);

View File

@ -164,10 +164,15 @@ const string& path_pkgdatadir()
// into either utf-8 if transcoding succeeds, or url-encoded // into either utf-8 if transcoding succeeds, or url-encoded
bool printableUrl(const string& fcharset, const string& in, string& out) bool printableUrl(const string& fcharset, const string& in, string& out)
{ {
#ifdef _WIN32
// On windows our paths are always utf-8
out = in;
#else
int ecnt = 0; int ecnt = 0;
if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) { if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
out = url_encode(in, 7); out = url_encode(in, 7);
} }
#endif
return true; return true;
} }

View File

@ -14,43 +14,67 @@
* Free Software Foundation, Inc., * Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/ */
#ifndef TEST_READFILE
#ifdef BUILDING_RECOLL #ifdef BUILDING_RECOLL
#include "autoconfig.h" #include "autoconfig.h"
#else #else
#include "config.h" #include "config.h"
#endif #endif
#include "readfile.h"
#include <errno.h> #include <errno.h>
#include <sys/types.h> #include <sys/types.h>
#ifdef _WIN32 #ifdef _WIN32
#include "safefcntl.h" #include "safefcntl.h"
#include "safesysstat.h" #include "safesysstat.h"
#include "safeunistd.h" #include "safeunistd.h"
#include "transcode.h"
#define OPEN _wopen
#else #else
#define O_BINARY 0 #define O_BINARY 0
#include <fcntl.h> #include <fcntl.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <unistd.h>
#define OPEN open
#endif #endif
#include <string> #include <string>
#include "readfile.h"
#include "smallut.h" #include "smallut.h"
#include "pathut.h"
using std::string; #ifdef READFILE_ENABLE_MD5
#include "md5.h"
#endif
#ifdef MDU_INCLUDE_LOG
#include MDU_INCLUDE_LOG
#else
#include "log.h"
#endif
using namespace std;
///////////////
// Implementation of basic interface: read whole file to memory buffer
class FileToString : public FileScanDo { class FileToString : public FileScanDo {
public: public:
FileToString(string& data) : m_data(data) {} FileToString(string& data) : m_data(data) {}
string& m_data;
bool init(size_t size, string *reason) { // Note: the fstat() + reserve() (in init()) calls divide cpu
// usage almost by 2 on both linux i586 and macosx (compared to
// just append()) Also tried a version with mmap, but it's
// actually slower on the mac and not faster on linux.
virtual bool init(int64_t size, string *reason) {
if (size > 0) { if (size > 0) {
m_data.reserve(size); m_data.reserve(size);
} }
return true; return true;
} }
bool data(const char *buf, int cnt, string *reason) { virtual bool data(const char *buf, int cnt, string *reason) {
try { try {
m_data.append(buf, cnt); m_data.append(buf, cnt);
} catch (...) { } catch (...) {
@ -59,248 +83,558 @@ public:
} }
return true; return true;
} }
string& m_data;
}; };
bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
string *reason)
{
FileToString accum(data);
return file_scan(fn, &accum, offs, cnt, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
}
bool file_to_string(const string& fn, string& data, string *reason) bool file_to_string(const string& fn, string& data, string *reason)
{ {
return file_to_string(fn, data, 0, size_t(-1), reason); return file_to_string(fn, data, 0, size_t(-1), reason);
} }
bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
string *reason)
/////////////
// Callback/filtering interface
// Abstract class base for both source (origin) and filter
// (midstream). Both have a downstream
class FileScanUpstream {
public:
virtual void setDownstream(FileScanDo *down) {
m_down = down;
}
virtual FileScanDo *out() {
return m_down;
}
protected:
FileScanDo *m_down{nullptr};
};
// Source element.
class FileScanSource : public FileScanUpstream {
public:
FileScanSource(FileScanDo *down) {
setDownstream(down);
}
virtual bool scan() = 0;
};
// Inside element of a transformation pipe. The idea is that elements
// which don't recognize the data get themselves out of the pipe
// (pop()). Typically, only one of the decompression modules
// (e.g. gzip/bzip2/xz...) would remain. For now there is only gzip,
// it pops itself if the data does not have the right magic number
class FileScanFilter : public FileScanDo, public FileScanUpstream {
public:
virtual void insertAtSink(FileScanDo *sink, FileScanUpstream *upstream) {
setDownstream(sink);
if (m_down) {
m_down->setUpstream(this);
}
setUpstream(upstream);
if (m_up) {
m_up->setDownstream(this);
}
}
// Remove myself from the pipe.
virtual void pop() {
if (m_down) {
m_down->setUpstream(m_up);
}
if (m_up) {
m_up->setDownstream(m_down);
}
}
virtual void setUpstream(FileScanUpstream *up) override {
m_up = up;
}
private:
FileScanUpstream *m_up{nullptr};
};
#if defined(READFILE_ENABLE_ZLIB)
#include <zlib.h>
class GzFilter : public FileScanFilter {
public:
virtual ~GzFilter() {
if (m_initdone) {
inflateEnd(&m_stream);
}
}
virtual bool init(int64_t size, string *reason) override {
LOGDEB1("GzFilter::init\n");
if (out()) {
return out()->init(size, reason);
}
return true;
}
virtual bool data(const char *buf, int cnt, string *reason) override {
LOGDEB1("GzFilter::data: cnt " << cnt << endl);
int error;
m_stream.next_in = (Bytef*)buf;
m_stream.avail_in = cnt;
if (m_initdone == false) {
// We do not support a first read cnt < 2. This quite
// probably can't happen with a compressed file (size>2)
// except if we're reading a tty which is improbable. So
// assume this is a regular file.
const unsigned char *ubuf = (const unsigned char *)buf;
if ((cnt < 2) || ubuf[0] != 0x1f || ubuf[1] != 0x8b) {
LOGDEB1("GzFilter::data: not gzip. out() is " << out() << "\n");
pop();
if (out()) {
return out()->data(buf, cnt, reason);
} else {
return false;
}
}
m_stream.opaque = nullptr;
m_stream.zalloc = alloc_func;
m_stream.zfree = free_func;
m_stream.next_out = (Bytef*)m_obuf;
m_stream.avail_out = m_obs;
if ((error = inflateInit2(&m_stream, 15+32)) != Z_OK) {
LOGERR("inflateInit2 error: " << error << endl);
if (reason) {
*reason += " Zlib inflateinit failed";
if (m_stream.msg && *m_stream.msg) {
*reason += string(": ") + m_stream.msg;
}
}
return false;
}
m_initdone = true;
}
while (m_stream.avail_in != 0) {
m_stream.next_out = (Bytef*)m_obuf;
m_stream.avail_out = m_obs;
if ((error = inflate(&m_stream, Z_SYNC_FLUSH)) < Z_OK) {
LOGERR("inflate error: " << error << endl);
if (reason) {
*reason += " Zlib inflate failed";
if (m_stream.msg && *m_stream.msg) {
*reason += string(": ") + m_stream.msg;
}
}
return false;
}
if (out() &&
!out()->data(m_obuf, m_obs - m_stream.avail_out, reason)) {
return false;
}
}
return true;
}
static voidpf alloc_func(voidpf opaque, uInt items, uInt size) {
return malloc(items * size);
}
static void free_func(voidpf opaque, voidpf address) {
free(address);
}
bool m_initdone{false};
z_stream m_stream;
char m_obuf[10000];
const int m_obs{10000};
};
#endif // GZ
#ifdef READFILE_ENABLE_MD5
class FileScanMd5 : public FileScanFilter {
public:
FileScanMd5(string& d) : digest(d) {}
virtual bool init(int64_t size, string *reason) override {
LOGDEB1("FileScanMd5: init\n");
MD5Init(&ctx);
if (out()) {
return out()->init(size, reason);
}
return true;
}
virtual bool data(const char *buf, int cnt, string *reason) override {
LOGDEB1("FileScanMd5: data. cnt " << cnt << endl);
MD5Update(&ctx, (const unsigned char*)buf, cnt);
if (out() && !out()->data(buf, cnt, reason)) {
return false;
}
return true;
}
bool finish() {
LOGDEB1("FileScanMd5: finish\n");
MD5Final(digest, &ctx);
return true;
}
string &digest;
MD5_CTX ctx;
};
#endif // MD5
// Source taking data from a regular file
class FileScanSourceFile : public FileScanSource {
public:
FileScanSourceFile(FileScanDo *next, const string& fn, int64_t startoffs,
int64_t cnttoread, string *reason)
: FileScanSource(next), m_fn(fn), m_startoffs(startoffs),
m_cnttoread(cnttoread), m_reason(reason) { }
virtual bool scan() {
LOGDEB1("FileScanSourceFile: reading " << m_fn << " offs " <<
m_startoffs<< " cnt " << m_cnttoread << " out " << out() << endl);
const int RDBUFSZ = 8192;
bool ret = false;
bool noclosing = true;
int fd = 0;
struct stat st;
// Initialize st_size: if fn.empty() , the fstat() call won't happen.
st.st_size = 0;
// If we have a file name, open it, else use stdin.
if (!m_fn.empty()) {
SYSPATH(m_fn, realpath);
fd = OPEN(realpath, O_RDONLY | O_BINARY);
if (fd < 0 || fstat(fd, &st) < 0) {
catstrerror(m_reason, "open/stat", errno);
return false;
}
noclosing = false;
}
#if defined O_NOATIME && O_NOATIME != 0
if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
// perror("fcntl");
}
#endif
if (out()) {
if (m_cnttoread != -1 && m_cnttoread) {
out()->init(m_cnttoread + 1, m_reason);
} else if (st.st_size > 0) {
out()->init(st.st_size + 1, m_reason);
} else {
out()->init(0, m_reason);
}
}
int64_t curoffs = 0;
if (m_startoffs > 0 && !m_fn.empty()) {
if (lseek(fd, m_startoffs, SEEK_SET) != m_startoffs) {
catstrerror(m_reason, "lseek", errno);
return false;
}
curoffs = m_startoffs;
}
char buf[RDBUFSZ];
int64_t totread = 0;
for (;;) {
size_t toread = RDBUFSZ;
if (m_startoffs > 0 && curoffs < m_startoffs) {
toread = size_t(MIN(RDBUFSZ, m_startoffs - curoffs));
}
if (m_cnttoread != -1) {
toread = MIN(toread, (uint64_t)(m_cnttoread - totread));
}
ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
if (n < 0) {
catstrerror(m_reason, "read", errno);
goto out;
}
if (n == 0) {
break;
}
curoffs += n;
if (curoffs - n < m_startoffs) {
continue;
}
if (!out()->data(buf, n, m_reason)) {
goto out;
}
totread += n;
if (m_cnttoread > 0 && totread >= m_cnttoread) {
break;
}
}
ret = true;
out:
if (fd >= 0 && !noclosing) {
close(fd);
}
return ret;
}
protected:
string m_fn;
int64_t m_startoffs;
int64_t m_cnttoread;
string *m_reason;
};
#if defined(READFILE_ENABLE_MINIZ)
#include "miniz.h"
// Source taking data from a ZIP archive member
class FileScanSourceZip : public FileScanSource {
public:
FileScanSourceZip(FileScanDo *next, const string& fn,
const string& member, string *reason)
: FileScanSource(next), m_fn(fn), m_member(member),
m_reason(reason) {}
FileScanSourceZip(const char *data, size_t cnt, FileScanDo *next,
const string& member, string *reason)
: FileScanSource(next), m_data(data), m_cnt(cnt), m_member(member),
m_reason(reason) {}
virtual bool scan() {
bool ret = false;
mz_zip_archive zip;
mz_zip_zero_struct(&zip);
void *opaque = this;
bool ret1;
if (m_fn.empty()) {
ret1 = mz_zip_reader_init_mem(&zip, m_data, m_cnt, 0);
} else {
SYSPATH(m_fn, realpath);
ret1 = mz_zip_reader_init_file(&zip, realpath, 0);
}
if (!ret1) {
if (m_reason) {
*m_reason += "mz_zip_reader_init_xx() failed: ";
*m_reason +=
string(mz_zip_get_error_string(zip.m_last_error));
}
return false;
}
mz_uint32 file_index;
if (mz_zip_reader_locate_file_v2(&zip, m_member.c_str(), NULL, 0,
&file_index) < 0) {
if (m_reason) {
*m_reason += "mz_zip_reader_locate_file() failed: ";
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
}
goto out;
}
mz_zip_archive_file_stat zstat;
if (!mz_zip_reader_file_stat(&zip, file_index, &zstat)) {
if (m_reason) {
*m_reason += "mz_zip_reader_file_stat() failed: ";
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
}
goto out;
}
if (out()) {
if (!out()->init(zstat.m_uncomp_size, m_reason)) {
goto out;
}
}
if (!mz_zip_reader_extract_to_callback(
&zip, file_index, write_cb, opaque, 0)) {
if (m_reason) {
*m_reason += "mz_zip_reader_extract_to_callback() failed: ";
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
}
goto out;
}
ret = true;
out:
mz_zip_reader_end(&zip);
return ret;
}
static size_t write_cb(void *pOpaque, mz_uint64 file_ofs,
const void *pBuf, size_t n) {
const char *cp = (const char*)pBuf;
LOGDEB1("write_cb: ofs " << file_ofs << " cnt " << n << " data: " <<
string(cp, n) << endl);
FileScanSourceZip *ths = (FileScanSourceZip *)pOpaque;
if (ths->out()) {
if (!ths->out()->data(cp, n, ths->m_reason)) {
return (size_t)-1;
}
}
return n;
}
protected:
const char *m_data;
size_t m_cnt;
string m_fn;
string m_member;
string *m_reason;
};
bool file_scan(const std::string& filename, const std::string& membername,
FileScanDo* doer, std::string *reason)
{ {
FileToString accum(data); if (membername.empty()) {
return file_scan(fn, &accum, offs, cnt, reason); return file_scan(filename, doer, 0, -1, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
} else {
FileScanSourceZip source(doer, filename, membername, reason);
return source.scan();
}
}
bool string_scan(const char *data, size_t cnt, const std::string& membername,
FileScanDo* doer, std::string *reason)
{
if (membername.empty()) {
return string_scan(data, cnt, doer, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
} else {
FileScanSourceZip source(data, cnt, doer, membername, reason);
return source.scan();
}
}
#endif // READFILE_ENABLE_ZIP
bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
int64_t cnttoread, string *reason
#ifdef READFILE_ENABLE_MD5
, string *md5p
#endif
)
{
LOGDEB1("file_scan: doer " << doer << endl);
#if defined(READFILE_ENABLE_ZLIB)
bool nodecomp = startoffs != 0;
#endif
if (startoffs < 0) {
startoffs = 0;
}
FileScanSourceFile source(doer, fn, startoffs, cnttoread, reason);
FileScanUpstream *up = &source;
up = up;
#if defined(READFILE_ENABLE_ZLIB)
GzFilter gzfilter;
if (!nodecomp) {
gzfilter.insertAtSink(doer, up);
up = &gzfilter;
}
#endif
#ifdef READFILE_ENABLE_MD5
// We compute the MD5 on the uncompressed data, so insert this
// right at the source (after the decompressor).
string digest;
FileScanMd5 md5filter(digest);
if (md5p) {
md5filter.insertAtSink(doer, up);
up = &md5filter;
}
#endif
bool ret = source.scan();
#ifdef READFILE_ENABLE_MD5
if (md5p) {
md5filter.finish();
MD5HexPrint(digest, *md5p);
}
#endif
return ret;
} }
bool file_scan(const string& fn, FileScanDo* doer, string *reason) bool file_scan(const string& fn, FileScanDo* doer, string *reason)
{ {
return file_scan(fn, doer, 0, size_t(-1), reason); return file_scan(fn, doer, 0, -1, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
} }
const int RDBUFSZ = 8192;
// Note: the fstat() + reserve() (in init()) calls divide cpu usage almost by 2
// on both linux i586 and macosx (compared to just append())
// Also tried a version with mmap, but it's actually slower on the mac and not
// faster on linux.
bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
size_t cnttoread, string *reason)
{
if (startoffs < 0) {
*reason += " file_scan: negative startoffs not allowed";
return false;
}
bool ret = false; class FileScanSourceBuffer : public FileScanSource {
bool noclosing = true; public:
int fd = 0; FileScanSourceBuffer(FileScanDo *next, const char *data, size_t cnt,
struct stat st; string *reason)
// Initialize st_size: if fn.empty() , the fstat() call won't happen. : FileScanSource(next), m_data(data), m_cnt(cnt), m_reason(reason) {}
st.st_size = 0;
// If we have a file name, open it, else use stdin. virtual bool scan() {
if (!fn.empty()) { if (out()) {
fd = open(fn.c_str(), O_RDONLY | O_BINARY); if (!out()->init(m_cnt, m_reason)) {
if (fd < 0 || fstat(fd, &st) < 0) { return false;
catstrerror(reason, "open/stat", errno); }
return false; return out()->data(m_data, m_cnt, m_reason);
} else {
return true;
} }
noclosing = false;
} }
#if defined O_NOATIME && O_NOATIME != 0 protected:
if (fcntl(fd, F_SETFL, O_NOATIME) < 0) { const char *m_data{nullptr};
// perror("fcntl"); size_t m_cnt{0};
string *m_reason{nullptr};
};
bool string_scan(const char *data, size_t cnt, FileScanDo* doer,
std::string *reason
#ifdef READFILE_ENABLE_MD5
, std::string *md5p
#endif
)
{
FileScanSourceBuffer source(doer, data, cnt, reason);
FileScanUpstream *up = &source;
up = up;
#ifdef READFILE_ENABLE_MD5
string digest;
FileScanMd5 md5filter(digest);
if (md5p) {
md5filter.insertAtSink(doer, up);
up = &md5filter;
} }
#endif #endif
if (cnttoread != (size_t) - 1 && cnttoread) { bool ret = source.scan();
doer->init(cnttoread + 1, reason);
} else if (st.st_size > 0) { #ifdef READFILE_ENABLE_MD5
doer->init(size_t(st.st_size + 1), reason); if (md5p) {
} else { md5filter.finish();
doer->init(0, reason); MD5HexPrint(digest, *md5p);
}
int64_t curoffs = 0;
if (startoffs > 0 && !fn.empty()) {
if (lseek(fd, startoffs, SEEK_SET) != startoffs) {
catstrerror(reason, "lseek", errno);
return false;
}
curoffs = startoffs;
}
char buf[RDBUFSZ];
size_t totread = 0;
for (;;) {
size_t toread = RDBUFSZ;
if (startoffs > 0 && curoffs < startoffs) {
toread = size_t(MIN(RDBUFSZ, startoffs - curoffs));
}
if (cnttoread != size_t(-1)) {
toread = MIN(toread, cnttoread - totread);
}
ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
if (n < 0) {
catstrerror(reason, "read", errno);
goto out;
}
if (n == 0) {
break;
}
curoffs += n;
if (curoffs - n < startoffs) {
continue;
}
if (!doer->data(buf, n, reason)) {
goto out;
}
totread += n;
if (cnttoread > 0 && totread >= cnttoread) {
break;
}
}
ret = true;
out:
if (fd >= 0 && !noclosing) {
close(fd);
} }
#endif
return ret; return ret;
} }
#else // Test
#include "autoconfig.h"
#include <stdio.h>
#include <sys/types.h>
#include "safesysstat.h"
#include <stdlib.h>
#include <string>
#include <iostream>
using namespace std;
#include "readfile.h"
#include "fstreewalk.h"
using namespace std;
class myCB : public FsTreeWalkerCB {
public:
FsTreeWalker::Status processone(const string& path,
const struct stat *st,
FsTreeWalker::CbFlag flg) {
if (flg == FsTreeWalker::FtwDirEnter) {
//cout << "[Entering " << path << "]" << endl;
} else if (flg == FsTreeWalker::FtwDirReturn) {
//cout << "[Returning to " << path << "]" << endl;
} else if (flg == FsTreeWalker::FtwRegular) {
//cout << path << endl;
string s, reason;
if (!file_to_string(path, s, &reason)) {
cerr << "Failed: " << reason << " : " << path << endl;
} else {
//cout <<
//"================================================" << endl;
cout << path << endl;
// cout << s;
}
reason.clear();
}
return FsTreeWalker::FtwOk;
}
};
static int op_flags;
#define OPT_MOINS 0x1
#define OPT_c 0x2
#define OPT_o 0x4
static const char *thisprog;
static char usage [] =
"trreadfile [-o offs] [-c cnt] topdirorfile\n\n"
;
static void
Usage(void)
{
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
exit(1);
}
int main(int argc, const char **argv)
{
int64_t offs = 0;
size_t cnt = size_t(-1);
thisprog = argv[0];
argc--;
argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
{
Usage();
}
while (**argv)
switch (*(*argv)++) {
case 'c':
op_flags |= OPT_c;
if (argc < 2) {
Usage();
}
cnt = atoll(*(++argv));
argc--;
goto b1;
case 'o':
op_flags |= OPT_o;
if (argc < 2) {
Usage();
}
offs = strtoull(*(++argv), 0, 0);
argc--;
goto b1;
default:
Usage();
break;
}
b1:
argc--;
argv++;
}
if (argc != 1) {
Usage();
}
string top = *argv++;
argc--;
cerr << "filename " << top << " offs " << offs << " cnt " << cnt << endl;
struct stat st;
if (!top.empty() && stat(top.c_str(), &st) < 0) {
perror("stat");
exit(1);
}
if (!top.empty() && S_ISDIR(st.st_mode)) {
FsTreeWalker walker;
myCB cb;
walker.walk(top, cb);
if (walker.getErrCnt() > 0) {
cout << walker.getReason();
}
} else {
string s, reason;
if (!file_to_string(top, s, offs, cnt, &reason)) {
cerr << reason << endl;
exit(1);
} else {
cout << s;
}
}
exit(0);
}
#endif //TEST_READFILE

View File

@ -21,30 +21,85 @@
#include <string> #include <string>
/** class FileScanUpstream;
* Read file in chunks, calling an accumulator for each chunk. Can be used
* for reading in a file, computing an md5... /** Data sink for the file reader. */
*/
class FileScanDo { class FileScanDo {
public: public:
virtual ~FileScanDo() {} virtual ~FileScanDo() {}
virtual bool init(size_t size, std::string *reason) = 0; /* Initialize and allocate.
virtual bool data(const char *buf, int cnt, std::string* reason) = 0; * @param size if set, lower bound of data size.
* @param reason[output] set to error message in case of error.
* @return false for error (file_scan will return), true if ok.
*/
virtual bool init(int64_t size, std::string *reason) = 0;
/* Process chunk of data
* @param buf the data buffer.
* @param cnt byte count.
* @param reason[output] set to error message in case of error.
* @return false for error (file_scan will return), true if ok.
*/
virtual bool data(const char *buf, int cnt, std::string *reason) = 0;
virtual void setUpstream(FileScanUpstream*) {}
}; };
bool file_scan(const std::string& filename, FileScanDo* doer, std::string *reason = 0);
/* Same but only process count cnt from offset offs. Set cnt to size_t(-1) /** Open and read file, calling the FileScanDo data() method for each chunk.
* for no limit */ *
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t offs, size_t cnt, * @param filename File name. Use empty value for stdin
std::string *reason = 0);
* @param doer the data processor. The init() method will be called
* initially witht a lower bound of the data size (may be used to
* reserve a buffer), or with a 0 size if nothing is known about the
* size. The data() method will be called for every chunk of data
* read.
* @param offs Start offset. If not zero, will disable decompression
* (set to -1 to start at 0 with no decompression).
* @param cnt Max bytes in output. Set cnt to -1 for no limit.
* @param[output] md5p If not null, points to a string to store the hex ascii
* md5 of the uncompressed data.
* @param[output] reason If not null, points to a string for storing an
* error message if the return value is false.
* @return true if the operation ended normally, else false.
*/
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t startoffs,
int64_t cnttoread, std::string *reason
#ifdef READFILE_ENABLE_MD5
, std::string *md5p
#endif
);
/** Same as above, not offset/cnt/md5 */
bool file_scan(const std::string& filename, FileScanDo* doer,
std::string *reason);
/** Same as file_scan, from a memory buffer. No libz processing */
bool string_scan(const char *data, size_t cnt, FileScanDo* doer,
std::string *reason
#ifdef READFILE_ENABLE_MD5
, std::string *md5p
#endif
);
#if defined(READFILE_ENABLE_MINIZ)
/* Process a zip archive member */
bool file_scan(const std::string& filename, const std::string& membername,
FileScanDo* doer, std::string *reason);
bool string_scan(const char* data, size_t cnt, const std::string& membername,
FileScanDo* doer, std::string *reason);
#endif
/** /**
* Read file into string. * Read file into string.
* @return true for ok, false else * @return true for ok, false else
*/ */
bool file_to_string(const std::string& filename, std::string& data, std::string *reason = 0); bool file_to_string(const std::string& filename, std::string& data,
std::string *reason = 0);
/** Read file chunk into string. Set cnt to size_t(-1) for whole file */ /** Read file chunk into string. Set cnt to -1 for going to
* eof, offs to -1 for going from the start without decompression */
bool file_to_string(const std::string& filename, std::string& data, bool file_to_string(const std::string& filename, std::string& data,
int64_t offs, size_t cnt, std::string *reason = 0); int64_t offs, size_t cnt, std::string *reason = 0);
#endif /* _READFILE_H_INCLUDED_ */ #endif /* _READFILE_H_INCLUDED_ */

View File

@ -21,14 +21,16 @@
#include <string> #include <string>
#include <iostream> #include <iostream>
#include <mutex> #include <mutex>
using std::string;
#include <errno.h> #include <errno.h>
#include <iconv.h> #include <iconv.h>
#include <wchar.h>
#include "transcode.h" #include "transcode.h"
#include "log.h" #include "log.h"
using namespace std;
// We gain approximately 25% exec time for word at a time conversions by // We gain approximately 25% exec time for word at a time conversions by
// caching the iconv_open thing. // caching the iconv_open thing.
// //
@ -42,7 +44,7 @@ using std::string;
bool transcode(const string &in, string &out, const string &icode, bool transcode(const string &in, string &out, const string &icode,
const string &ocode, int *ecnt) const string &ocode, int *ecnt)
{ {
LOGDEB2("Transcode: " << (icode) << " -> " << (ocode) << "\n" ); LOGDEB2("Transcode: " << icode << " -> " << ocode << "\n");
#ifdef ICONV_CACHE_OPEN #ifdef ICONV_CACHE_OPEN
static iconv_t ic = (iconv_t)-1; static iconv_t ic = (iconv_t)-1;
static string cachedicode; static string cachedicode;
@ -100,8 +102,9 @@ bool transcode(const string &in, string &out, const string &icode,
" : " + strerror(errno); " : " + strerror(errno);
#endif #endif
if (errno == EILSEQ) { if (errno == EILSEQ) {
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n" ); LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n");
LOGDEB1(" Input consumed " << (ip - in) << " output produced " << (out.length() + OBSIZ - osiz) << "\n" ); LOGDEB1(" Input consumed " << ip - in << " output produced " <<
out.length() + OBSIZ - osiz << "\n");
out.append(obuf, OBSIZ - osiz); out.append(obuf, OBSIZ - osiz);
out += "?"; out += "?";
mecnt++; mecnt++;
@ -144,14 +147,67 @@ error:
} }
if (mecnt) if (mecnt)
LOGDEB("transcode: [" << (icode) << "]->[" << (ocode) << "] " << (mecnt) << " errors\n" ); LOGDEB("transcode: [" << icode << "]->[" << ocode << "] " <<
mecnt << " errors\n");
if (ecnt) if (ecnt)
*ecnt = mecnt; *ecnt = mecnt;
return ret; return ret;
} }
bool wchartoutf8(const wchar_t *in, std::string& out)
{
static iconv_t ic = (iconv_t)-1;
if (ic == (iconv_t)-1) {
if((ic = iconv_open("UTF-8", "WCHAR_T")) == (iconv_t)-1) {
LOGERR("wchartoutf8: iconv_open failed\n");
return false;
}
}
const int OBSIZ = 8192;
char obuf[OBSIZ], *op;
out.erase();
size_t isiz = 2 * wcslen(in);
out.reserve(isiz);
const char *ip = (const char *)in;
#else while (isiz > 0) {
size_t osiz;
op = obuf;
osiz = OBSIZ;
if(iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1
&& errno != E2BIG) {
LOGERR("wchartoutf8: iconv error, errno: " << errno << endl);
return false;
}
out.append(obuf, OBSIZ - osiz);
}
return true;
}
bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap)
{
static iconv_t ic = (iconv_t)-1;
if (ic == (iconv_t)-1) {
if((ic = iconv_open("WCHAR_T", "UTF-8")) == (iconv_t)-1) {
LOGERR("utf8towchar: iconv_open failed\n");
return false;
}
}
size_t isiz = in.size();
const char *ip = in.c_str();
size_t osiz = (size_t)obytescap-2;
char *op = (char *)out;
if (iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1) {
LOGERR("utf8towchar: iconv error, errno: " << errno << endl);
return false;
}
*op++ = 0;
*op = 0;
return true;
}
#else // -> TEST
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -222,4 +278,3 @@ int main(int argc, char **argv)
exit(0); exit(0);
} }
#endif #endif

View File

@ -36,4 +36,9 @@ extern bool transcode(const std::string &in, std::string &out,
const std::string &ocode, const std::string &ocode,
int *ecnt = 0); int *ecnt = 0);
#ifdef _WIN32
extern bool wchartoutf8(const wchar_t *in, std::string& out);
extern bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap);
#endif
#endif /* _TRANSCODE_H_INCLUDED_ */ #endif /* _TRANSCODE_H_INCLUDED_ */

View File

@ -1,154 +0,0 @@
/*
Implementation of POSIX directory browsing functions and types for Win32.
Author: Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
History: Created March 1997. Updated June 2003 and July 2012.
Rights: See end of file.
*/
#include <dirent.h>
#include <errno.h>
#include <io.h> /* _findfirst and _findnext set errno iff they return -1 */
#include <stdlib.h>
#include <string.h>
#ifdef __cplusplus
extern "C"
{
#endif
typedef ptrdiff_t handle_type; /* C99's intptr_t not sufficiently portable */
struct DIR
{
handle_type handle; /* -1 for failed rewind */
struct _finddata_t info;
struct dirent result; /* d_name null iff first time */
char *name; /* null-terminated char string */
};
DIR *opendir(const char *name)
{
DIR *dir = 0;
if(name && name[0])
{
size_t base_length = strlen(name);
const char *all = /* search pattern must end with suitable wildcard */
strchr("/\\", name[base_length - 1]) ? "*" : "/*";
if((dir = (DIR *) malloc(sizeof *dir)) != 0 &&
(dir->name = (char *) malloc(base_length + strlen(all) + 1)) != 0)
{
strcat(strcpy(dir->name, name), all);
if((dir->handle =
(handle_type) _findfirst(dir->name, &dir->info)) != -1)
{
dir->result.d_name = 0;
}
else /* rollback */
{
free(dir->name);
free(dir);
dir = 0;
}
}
else /* rollback */
{
free(dir);
dir = 0;
errno = ENOMEM;
}
}
else
{
errno = EINVAL;
}
return dir;
}
int closedir(DIR *dir)
{
int result = -1;
if(dir)
{
if(dir->handle != -1)
{
result = _findclose(dir->handle);
}
free(dir->name);
free(dir);
}
if(result == -1) /* map all errors to EBADF */
{
errno = EBADF;
}
return result;
}
struct dirent *readdir(DIR *dir)
{
struct dirent *result = 0;
if(dir && dir->handle != -1)
{
if(!dir->result.d_name || _findnext(dir->handle, &dir->info) != -1)
{
result = &dir->result;
result->d_mtime = dir->info.time_write;
result->d_size = dir->info.size;
result->d_name = dir->info.name;
if (dir->info.attrib & _A_SUBDIR)
result->d_mode = S_IFDIR;
else
result->d_mode = S_IFREG;
}
}
else
{
errno = EBADF;
}
return result;
}
void rewinddir(DIR *dir)
{
if(dir && dir->handle != -1)
{
_findclose(dir->handle);
dir->handle = (handle_type) _findfirst(dir->name, &dir->info);
dir->result.d_name = 0;
}
else
{
errno = EBADF;
}
}
#ifdef __cplusplus
}
#endif
/*
Copyright Kevlin Henney, 1997, 2003, 2012. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose is hereby granted without fee, provided
that this copyright and permissions notice appear in all copies and
derivatives.
This software is supplied "as is" without express or implied warranty.
But that said, if there are any problems please get in touch.
*/

View File

@ -1,57 +0,0 @@
#ifndef DIRENT_INCLUDED
#define DIRENT_INCLUDED
/*
Declaration of POSIX directory browsing functions and types for Win32.
Author: Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
History: Created March 1997. Updated June 2003.
Rights: See end of file.
*/
#include <sys/stat.h>
#ifdef __cplusplus
extern "C"
{
#endif
typedef struct DIR DIR;
struct dirent
{
char *d_name;
// The native call we use, findfirst/next return file attributes at once,
// no need for a separate stat() call in most cases
// Note that ctime is actually creation time. No use for posix.
time_t d_mtime;
off_t d_size;
int d_mode; // S_IFREG or S_IFDIR only
};
DIR *opendir(const char *);
int closedir(DIR *);
struct dirent *readdir(DIR *);
void rewinddir(DIR *);
/*
Copyright Kevlin Henney, 1997, 2003. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose is hereby granted without fee, provided
that this copyright and permissions notice appear in all copies and
derivatives.
This software is supplied "as is" without express or implied warranty.
But that said, if there are any problems please get in touch.
*/
#ifdef __cplusplus
}
#endif
#endif

View File

@ -32,6 +32,7 @@
#include <psapi.h> #include <psapi.h>
#include "smallut.h" #include "smallut.h"
#include "pathut.h" #include "pathut.h"
#include "transcode.h"
using namespace std; using namespace std;
@ -752,7 +753,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
return false; return false;
} }
STARTUPINFO siStartInfo; STARTUPINFOW siStartInfo;
BOOL bSuccess = FALSE; BOOL bSuccess = FALSE;
// Set up members of the PROCESS_INFORMATION structure. // Set up members of the PROCESS_INFORMATION structure.
@ -760,8 +761,8 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
// Set up members of the STARTUPINFO structure. // Set up members of the STARTUPINFO structure.
// This structure specifies the STDIN and STDOUT handles for redirection. // This structure specifies the STDIN and STDOUT handles for redirection.
ZeroMemory(&siStartInfo, sizeof(STARTUPINFO)); ZeroMemory(&siStartInfo, sizeof(siStartInfo));
siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.cb = sizeof(siStartInfo);
if (m->m_flags & EXF_SHOWWINDOW) { if (m->m_flags & EXF_SHOWWINDOW) {
siStartInfo.dwFlags |= STARTF_USESTDHANDLES; siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
if (m->m_flags & EXF_MAXIMIZED) { if (m->m_flags & EXF_MAXIMIZED) {
@ -782,12 +783,15 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
// Create the child process. // Create the child process.
// Need a writable buffer for the command line, for some reason. // Need a writable buffer for the command line, for some reason.
LOGDEB1("ExecCmd:startExec: cmdline [" << (cmdline) << "]\n" ); LOGDEB("ExecCmd:startExec: cmdline [" << cmdline << "]\n");
#if 0
LPSTR buf = (LPSTR)malloc(cmdline.size() + 1); LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
memcpy(buf, cmdline.c_str(), cmdline.size()); memcpy(buf, cmdline.c_str(), cmdline.size());
buf[cmdline.size()] = 0; buf[cmdline.size()] = 0;
bSuccess = CreateProcess(NULL, #endif
buf, // command line SYSPATH(cmdline, wcmdline);
bSuccess = CreateProcessW(NULL,
wcmdline, // command line
NULL, // process security attributes NULL, // process security attributes
NULL, // primary thread security attrs NULL, // primary thread security attrs
TRUE, // handles are inherited TRUE, // handles are inherited
@ -799,8 +803,9 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
if (!bSuccess) { if (!bSuccess) {
printError("ExecCmd::doexec: CreateProcess"); printError("ExecCmd::doexec: CreateProcess");
} }
free(envir); free(envir);
free(buf); // free(buf);
// Close child-side handles else we'll never see eofs // Close child-side handles else we'll never see eofs
if (!CloseHandle(hOutputWrite)) if (!CloseHandle(hOutputWrite))
printError("CloseHandle"); printError("CloseHandle");

View File

@ -47,54 +47,43 @@ application/x-lzma = uncompress python rcluncomp.py 7z %f %t
# each filter, see the exemples below (ie: msword) # each filter, see the exemples below (ie: msword)
[index] [index]
application/msword = execm python rcldoc.py application/msword = execm python rcldoc.py
application/pdf = execm python rclpdf.py
application/vnd.ms-excel = execm python rclxls.py application/vnd.ms-excel = execm python rclxls.py
application/vnd.ms-powerpoint = execm python rclppt.py application/vnd.ms-powerpoint = execm python rclppt.py
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
execm python rclopxml.py
application/vnd.oasis.opendocument.text = execm python rclsoff.py application/pdf = execm python rclpdf.py
application/vnd.oasis.opendocument.text-template = execm python rclsoff.py
application/vnd.oasis.opendocument.presentation = execm python rclsoff.py application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = execm python rclsoff.py application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = execm python rclsoff.py application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc = execm python rclsoff.py application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = execm python rclsoff.py application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = execm python rclsoff.py application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = execm python rclsoff.py application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = execm python rclsoff.py application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = execm python rclsoff.py application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.math = execm python rclsoff.py application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = execm python rclsoff.py application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = execm python rclsoff.py application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = execm python rclsoff.py application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html
application/x-abiword = execm python rclabw.py application/x-abiword = internal xsltproc abiword.xsl
text/x-fictionbook = execm python rclfb2.py text/x-fictionbook = internal xsltproc fb2.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \ application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
execm python rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \ application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
execm python rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \ application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \ application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
execm python rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\ application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
execm python rclopxml.py internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/epub+zip = execm python rclepub application/epub+zip = execm python rclepub
# Returned by xdg-mime for .js. Future-proofing # Returned by xdg-mime for .js. Future-proofing
@ -121,17 +110,18 @@ application/x-shellscript = internal text/plain
#application/x-tar = execm python rcltar #application/x-tar = execm python rcltar
application/x-webarchive = execm python rclwar application/x-webarchive = execm python rclwar
application/x-7z-compressed = execm python rcl7z application/x-7z-compressed = execm python rcl7z
application/zip = execm python rclzip;charset=default
audio/mpeg = execm python rclaudio audio/mpeg = execm python rclaudio
audio/mp4 = execm python rclaudio audio/mp4 = execm python rclaudio
audio/aac = execm python rclaudio audio/aac = execm python rclaudio
audio/x-karaoke = execm python rclkar audio/x-karaoke = execm python rclkar
image/gif = execm python rclimg.py image/gif = execm rclimg.exe
image/jp2 = execm python rclimg.py image/jp2 = execm rclimg.exe
image/jpeg = execm python rclimg.py image/jpeg = execm rclimg.exe
image/png = execm python rclimg.py image/png = execm rclimg.exe
image/tiff = execm python rclimg.py image/tiff = execm rclimg.exe
image/svg+xml = execm python rclsvg.py image/svg+xml = internal xsltproc svg.xsl
#image/x-xcf = execm perl rclimg #image/x-xcf = execm rclimg.exe
inode/symlink = internal inode/symlink = internal
application/x-zerosize = internal application/x-zerosize = internal
inode/x-empty = internal application/x-zerosize inode/x-empty = internal application/x-zerosize

View File

@ -24,36 +24,39 @@ test -d $DESTDIR || mkdir $DESTDIR || fatal cant create $DESTDIR
# Recoll src tree # Recoll src tree
RCL=c:/recoll/src/ RCL=c:/recoll/src/
RCLW=$RCL/windows/
# Recoll dependancies
RCLDEPS=c:/recolldeps/
ReleaseBuild=y ReleaseBuild=y
UNRTF=c:/recolldeps/unrtf PYTHON=${RCLDEPS}py-python3
ANTIWORD=c:/recolldeps/antiword UNRTF=${RCLDEPS}unrtf
PYXSLT=C:/recolldeps/pyxslt ANTIWORD=${RCLDEPS}antiword
PYEXIV2=C:/recolldeps/pyexiv2 PYXSLT=${RCLDEPS}pyxslt
#LIBXAPIAN=c:/temp/xapian-core-1.2.21/.libs/libxapian-22.dll PYEXIV2=${RCLDEPS}pyexiv2
LIBXAPIAN=c:/recolldeps/xapian-core-1.4.5/.libs/libxapian-30.dll LIBXAPIAN=${RCLDEPS}xapian-core-1.4.5/.libs/libxapian-30.dll
MUTAGEN=C:/recolldeps/mutagen-1.32/ MUTAGEN=${RCLDEPS}mutagen-1.32/
EPUB=C:/recolldeps/epub-0.5.2 EPUB=${RCLDEPS}epub-0.5.2
FUTURE=C:/recolldeps/python2-future FUTURE=${RCLDEPS}python2-future
ZLIB=c:/recolldeps/zlib-1.2.8 ZLIB=${RCLDEPS}zlib-1.2.8
POPPLER=c:/recolldeps/poppler-0.36/ POPPLER=${RCLDEPS}poppler-0.36/
LIBWPD=c:/recolldeps/libwpd/libwpd-0.10.0/ LIBWPD=${RCLDEPS}libwpd/libwpd-0.10.0/
LIBREVENGE=c:/recolldeps/libwpd/librevenge-0.0.1.jfd/ LIBREVENGE=${RCLDEPS}libwpd/librevenge-0.0.1.jfd/
CHM=c:/recolldeps/pychm CHM=${RCLDEPS}pychm
MISC=${RCLDEPS}misc
# Where to find libgcc_s_dw2-1.dll for progs which need it copied
gccpath=`which gcc`
MINGWBIN=`dirname $gccpath`
# Where to copy the Qt Dlls from: # Where to copy the Qt Dlls from:
QTBIN=C:/Qt/Qt5.8.0/5.8/mingw53_32/bin QTBIN=C:/Qt/Qt5.8.0/5.8/mingw53_32/bin
QTGCCBIN=C:/qt/Qt5.8.0/Tools/mingw530_32/bin/
# Where to find libgcc_s_dw2-1.dll for progs which need it copied
MINGWBIN=$QTBIN
PATH=$MINGWBIN:$QTGCCBIN:$PATH
export PATH
# Qt arch # Qt arch
QTA=Desktop_Qt_5_8_0_MinGW_32bit QTA=Desktop_Qt_5_8_0_MinGW_32bit
RCLW=$RCL/windows/
if test X$ReleaseBuild = X'y'; then if test X$ReleaseBuild = X'y'; then
qtsdir=release qtsdir=release
else else
@ -73,9 +76,14 @@ RCLS=$RCLW/build-rclstartw-${QTA}-${qtsdir}/${qtsdir}/rclstartw.exe
################ ################
# Script: # Script:
FILTERS=$DESTDIR/Share/filters FILTERS=$DESTDIR/Share/filters
fatal()
{
echo $*
exit 1
}
# checkcopy. # checkcopy.
chkcp() chkcp()
{ {
@ -115,7 +123,12 @@ copyzlib()
{ {
chkcp $ZLIB/zlib1.dll $DESTDIR chkcp $ZLIB/zlib1.dll $DESTDIR
} }
copypython()
{
mkdir -p $DESTDIR/Share/filters/python
cp -rp $PYTHON/* $DESTDIR/Share/filters/python
chkcp $PYTHON/python.exe $DESTDIR/Share/filters/python/python.exe
}
copyrecoll() copyrecoll()
{ {
# bindir=$RCL/windows/$PLATFORM/$CONFIGURATION/ # bindir=$RCL/windows/$PLATFORM/$CONFIGURATION/
@ -127,6 +140,7 @@ copyrecoll()
chkcp $RCLIDX $DESTDIR chkcp $RCLIDX $DESTDIR
chkcp $RCLQ $DESTDIR chkcp $RCLQ $DESTDIR
chkcp $RCLS $DESTDIR chkcp $RCLS $DESTDIR
chkcp $MINGWBIN/libgcc_s_dw2-1.dll $DESTDIR
chkcp $RCL/COPYING $DESTDIR/COPYING.txt chkcp $RCL/COPYING $DESTDIR/COPYING.txt
chkcp $RCL/doc/user/usermanual.html $DESTDIR/Share/doc chkcp $RCL/doc/user/usermanual.html $DESTDIR/Share/doc
@ -144,6 +158,8 @@ copyrecoll()
chkcp $RCL/python/recoll/recoll/rclconfig.py $FILTERS chkcp $RCL/python/recoll/recoll/rclconfig.py $FILTERS
chkcp $RCL/python/recoll/recoll/conftree.py $FILTERS chkcp $RCL/python/recoll/recoll/conftree.py $FILTERS
chkcp $RCL/filters/* $FILTERS chkcp $RCL/filters/* $FILTERS
rm $FILTERS/rclimg $FILTERS/rclimg.py
chkcp $RCLDEPS/rclimg/rclimg.exe $FILTERS
chkcp $RCL/qtgui/mtpics/* $DESTDIR/Share/images chkcp $RCL/qtgui/mtpics/* $DESTDIR/Share/images
chkcp $RCL/qtgui/i18n/*.qm $DESTDIR/Share/translations chkcp $RCL/qtgui/i18n/*.qm $DESTDIR/Share/translations
} }
@ -169,9 +185,8 @@ copyunrtf()
chkcp $bindir/unrtf.exe $FILTERS chkcp $bindir/unrtf.exe $FILTERS
chkcp $UNRTF/outputs/*.conf $FILTERS/Share chkcp $UNRTF/outputs/*.conf $FILTERS/Share
chkcp $UNRTF/outputs/SYMBOL.charmap $FILTERS/Share chkcp $UNRTF/outputs/SYMBOL.charmap $FILTERS/Share
# libiconv2 is not present in qt, get it from mingw direct. is C, should # libiconv-2 originally comes from mingw
# be compatible chkcp $MISC/libiconv-2.dll $FILTERS
chkcp c:/MinGW/bin/libiconv-2.dll $FILTERS
} }
copymutagen() copymutagen()
@ -187,6 +202,10 @@ copyepub()
# chkcp to check that epub is where we think it is # chkcp to check that epub is where we think it is
chkcp $EPUB/build/lib/epub/opf.py $FILTERS/epub chkcp $EPUB/build/lib/epub/opf.py $FILTERS/epub
} }
# We used to copy the future module to the filters dir, but it is now
# part of the origin Python tree in recolldeps. (2 dirs:
# site-packages/builtins, site-packages/future)
copyfuture() copyfuture()
{ {
cp -rp $FUTURE/future $FILTERS/ cp -rp $FUTURE/future $FILTERS/
@ -246,6 +265,18 @@ for d in doc examples filters images translations; do
fatal mkdir $d failed fatal mkdir $d failed
done done
# First check that the config is ok
cmp -s $RCL/common/autoconfig.h $RCL/common/autoconfig-win.h || \
fatal autoconfig.h and autoconfig-win.h differ
VERSION=`cat $RCL/VERSION`
CFVERS=`grep PACKAGE_VERSION $RCL/common/autoconfig.h | \
cut -d ' ' -f 3 | sed -e 's/"//g'`
test "$VERSION" = "$CFVERS" ||
fatal Versions in VERSION and autoconfig.h differ
echo Packaging version $CFVERS
# copyrecoll must stay before copyqt so that windeployqt can do its thing # copyrecoll must stay before copyqt so that windeployqt can do its thing
copyrecoll copyrecoll
copyqt copyqt
@ -255,9 +286,10 @@ copypoppler
copyantiword copyantiword
copyunrtf copyunrtf
copyxslt copyxslt
copyfuture #copyfuture
copymutagen copymutagen
copyepub copyepub
copypyexiv2 #copypyexiv2
copywpd copywpd
copychm #copychm
copypython

View File

@ -14,6 +14,13 @@ DEFINES -= UNICODE
DEFINES -= _UNICODE DEFINES -= _UNICODE
DEFINES += _MBCS DEFINES += _MBCS
DEFINES += PSAPI_VERSION=1 DEFINES += PSAPI_VERSION=1
DEFINES += READFILE_ENABLE_MINIZ
DEFINES += READFILE_ENABLE_MD5
DEFINES += READFILE_ENABLE_ZLIB
# This is necessary to avoid an undefined impl__xmlFree.
# See comment in libxml/xmlexports.h
DEFINES += LIBXML_STATIC
SOURCES += \ SOURCES += \
../../aspell/rclaspell.cpp \ ../../aspell/rclaspell.cpp \
@ -50,6 +57,7 @@ SOURCES += \
../../internfile/mh_mail.cpp \ ../../internfile/mh_mail.cpp \
../../internfile/mh_mbox.cpp \ ../../internfile/mh_mbox.cpp \
../../internfile/mh_text.cpp \ ../../internfile/mh_text.cpp \
../../internfile/mh_xslt.cpp \
../../internfile/mimehandler.cpp \ ../../internfile/mimehandler.cpp \
../../internfile/myhtmlparse.cpp \ ../../internfile/myhtmlparse.cpp \
../../internfile/txtdcode.cpp \ ../../internfile/txtdcode.cpp \
@ -105,6 +113,7 @@ SOURCES += \
../../utils/md5.cpp \ ../../utils/md5.cpp \
../../utils/md5ut.cpp \ ../../utils/md5ut.cpp \
../../utils/mimeparse.cpp \ ../../utils/mimeparse.cpp \
../../utils/miniz.cpp \
../../utils/pathut.cpp \ ../../utils/pathut.cpp \
../../utils/pxattr.cpp \ ../../utils/pxattr.cpp \
../../utils/rclionice.cpp \ ../../utils/rclionice.cpp \
@ -114,8 +123,7 @@ SOURCES += \
../../utils/strmatcher.cpp \ ../../utils/strmatcher.cpp \
../../utils/transcode.cpp \ ../../utils/transcode.cpp \
../../utils/wipedir.cpp \ ../../utils/wipedir.cpp \
../../windows/strptime.cpp \ ../../windows/strptime.cpp
../../windows/dirent.c
INCLUDEPATH += ../../common ../../index ../../internfile ../../query \ INCLUDEPATH += ../../common ../../index ../../internfile ../../query \
../../unac ../../utils ../../aspell ../../rcldb ../../qtgui \ ../../unac ../../utils ../../aspell ../../rcldb ../../qtgui \
@ -129,10 +137,16 @@ windows {
contains(QMAKE_CC, cl){ contains(QMAKE_CC, cl){
# Visual Studio # Visual Studio
} }
LIBS += c:/temp/xapian-core-1.4.5/.libs/libxapian-30.dll \ LIBS += C:/recolldeps/libxslt/libxslt-1.1.29/win32/bin.mingw/libxslt.a \
c:/temp/zlib-1.2.8/zlib1.dll -liconv -lshlwapi -lpsapi -lkernel32 C:/recolldeps/libxml2/libxml2-2.9.4+dfsg1/win32/bin.mingw/libxml2.a \
c:/recolldeps/xapian-core-1.4.5/.libs/libxapian-30.dll \
c:/recolldeps/zlib-1.2.8/zlib1.dll \
-liconv -lshlwapi -lpsapi -lkernel32
INCLUDEPATH += ../../windows \ INCLUDEPATH += ../../windows \
C:/temp/xapian-core-1.4.5/include C:/recolldeps/xapian-core-1.4.5/include \
C:/recolldeps/libxslt/libxslt-1.1.29/ \
C:/recolldeps/libxml2/libxml2-2.9.4+dfsg1/include
} }
unix { unix {

View File

@ -19,6 +19,9 @@
#include <shellapi.h> #include <shellapi.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "safewindows.h"
#include "pathut.h"
#include "transcode.h"
using namespace std; using namespace std;
@ -41,6 +44,10 @@ int op_flags;
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int wargc;
wchar_t **wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
// Yes we could use wargv
thisprog = argv[0]; thisprog = argv[0];
argc--; argv++; argc--; argv++;
int imode = 0; int imode = 0;
@ -62,7 +69,9 @@ int main(int argc, char *argv[])
if (argc != 1) { if (argc != 1) {
Usage(); Usage();
} }
char *fn = strdup(argv[0]);
wchar_t *wfn = wargv[1];
// Do we need this ? // Do we need this ?
//https://msdn.microsoft.com/en-us/library/windows/desktop/bb762153%28v=vs.85%29.aspx //https://msdn.microsoft.com/en-us/library/windows/desktop/bb762153%28v=vs.85%29.aspx
//CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE); //CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
@ -73,9 +82,10 @@ int main(int argc, char *argv[])
default: wmode = SW_SHOWNORMAL; break; default: wmode = SW_SHOWNORMAL; break;
} }
int ret = (int)ShellExecute(NULL, "open", fn, NULL, NULL, wmode); int ret = (int)ShellExecuteW(NULL, L"open", wfn, NULL, NULL, wmode);
if (ret) { if (ret) {
fprintf(stderr, "ShellExecute returned %d\n", ret); fprintf(stderr, "ShellExecute returned %d\n", ret);
} }
LocalFree(wargv);
return ret; return ret;
} }

View File

@ -2,7 +2,7 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
#define MyAppName "Recoll" #define MyAppName "Recoll"
#define MyAppVersion "1.24.1-20180517-96c6fd" #define MyAppVersion "1.25.0-20190125-540140bd"
#define MyAppPublisher "Recoll.org" #define MyAppPublisher "Recoll.org"
#define MyAppURL "http://www.recoll.org" #define MyAppURL "http://www.recoll.org"
#define MyAppExeName "recoll.exe" #define MyAppExeName "recoll.exe"