merged internal-xsl branch

This commit is contained in:
Jean-Francois Dockes 2019-01-30 08:47:50 +01:00
commit c0d87a3995
60 changed files with 11095 additions and 703 deletions

View File

@ -2,6 +2,8 @@
CXXFLAGS ?= @CXXFLAGS@
LIBXAPIAN=@LIBXAPIAN@
XAPIANCXXFLAGS=@XAPIANCXXFLAGS@
XSLT_CFLAGS=@XSLT_CFLAGS@
XSLT_LINKADD=@XSLT_LINKADD@
LIBICONV=@LIBICONV@
INCICONV=@INCICONV@
LIBFAM = @LIBFAM@
@ -29,8 +31,10 @@ AM_CPPFLAGS = -Wall -Wno-unused -std=c++11 \
$(COMMONCPPFLAGS) \
$(INCICONV) \
$(XAPIANCXXFLAGS) \
$(XSLT_CFLAGS) \
$(X_CFLAGS) \
-DRECOLL_DATADIR=\"${pkgdatadir}\" \
-DREADFILE_ENABLE_ZLIB -DREADFILE_ENABLE_MINIZ -DREADFILE_ENABLE_MD5 \
-D_GNU_SOURCE \
$(DEFS)
@ -121,6 +125,8 @@ internfile/mh_symlink.h \
internfile/mh_text.cpp \
internfile/mh_text.h \
internfile/mh_unknown.h \
internfile/mh_xslt.cpp \
internfile/mh_xslt.h \
internfile/mimehandler.cpp \
internfile/mimehandler.h \
internfile/myhtmlparse.cpp \
@ -224,6 +230,8 @@ utils/md5ut.cpp \
utils/md5ut.h \
utils/mimeparse.cpp \
utils/mimeparse.h \
utils/miniz.cpp \
utils/miniz.h \
utils/netcon.cpp \
utils/netcon.h \
utils/pathut.cpp \
@ -262,7 +270,7 @@ AM_YFLAGS = -d
librecoll_la_LDFLAGS = -release $(VERSION) \
-Wl,--no-undefined -Wl,--warn-unresolved-symbols
librecoll_la_LIBADD = $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS)
librecoll_la_LIBADD = $(XSLT_LINKADD) $(LIBXAPIAN) $(LIBICONV) $(LIBTHREADS)
# There is probably a better way to do this. The KIO needs to be linked
# with librecoll, but librecoll is installed into a non-standard place
@ -640,6 +648,18 @@ sampleconf/mimeview
filterdir = $(pkgdatadir)/filters
filter_DATA = \
desktop/hotrecoll.py \
filters/abiword.xsl \
filters/fb2.xsl \
filters/gnumeric.xsl \
filters/msodump.zip \
filters/okular-note.xsl \
filters/opendoc-body.xsl \
filters/opendoc-flat.xsl \
filters/opendoc-meta.xsl \
filters/openxml-xls-body.xsl \
filters/openxml-word-body.xsl \
filters/openxml-meta.xsl \
filters/ppt-dump.py \
filters/rcl7z \
filters/rclabw.py \
filters/rclaptosidman \
@ -671,19 +691,19 @@ filters/rcllatinstops.zip \
filters/rcllyx \
filters/rclman \
filters/rclmidi.py \
filters/rclpdf.py \
filters/rclps \
filters/rclokulnote.py \
filters/rclopxml.py \
filters/rclpdf.py \
filters/rclppt.py \
filters/rclps \
filters/rclpurple \
filters/rclpython \
filters/rclrar \
filters/rclrtf.py \
filters/rclscribus \
filters/rclshowinfo \
filters/rclsoff.py \
filters/rclsoff-flat.py \
filters/rclsoff.py \
filters/rclsvg.py \
filters/rcltar \
filters/rcltex \
@ -697,11 +717,11 @@ filters/rclxmp.py \
filters/rclxslt.py \
filters/rclzip \
filters/recoll-we-move-files.py \
filters/ppt-dump.py \
filters/recollepub.zip \
filters/svg.xsl \
filters/xls-dump.py \
filters/xlsxmltocsv.py \
filters/msodump.zip \
filters/recollepub.zip \
filters/xml.xsl \
python/recoll/recoll/conftree.py \
python/recoll/recoll/rclconfig.py

View File

@ -1 +1 @@
1.25.0pre0
1.25.1

View File

@ -112,7 +112,10 @@ overriden in the c++ code by ifdefs _WIN32 anyway */
#define PACKAGE_NAME "Recoll"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "Recoll 1.24.1"
#define PACKAGE_STRING "Recoll 1.25.1"
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.25.1"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "recoll"
@ -120,9 +123,6 @@ overriden in the c++ code by ifdefs _WIN32 anyway */
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.24.1"
/* putenv parameter is const */
/* #undef PUTENV_ARG_CONST */

View File

@ -394,6 +394,7 @@ bool RclConfig::updateMainConfig()
setKeyDir(cstr_null);
// Texsplit customization
bool bvalue = false;
if (getConfParam("nocjk", &bvalue) && bvalue == true) {
TextSplit::cjkProcessing(false);
@ -405,16 +406,18 @@ bool RclConfig::updateMainConfig()
TextSplit::cjkProcessing(true);
}
}
bvalue = false;
if (getConfParam("nonumbers", &bvalue) && bvalue == true) {
TextSplit::noNumbers();
}
bvalue = false;
if (getConfParam("dehyphenate", &bvalue)) {
TextSplit::deHyphenate(bvalue);
}
bvalue = false;
if (getConfParam("backslashasletter", &bvalue)) {
TextSplit::backslashAsLetter(bvalue);
}
bvalue = true;
if (getConfParam("skippedPathsFnmPathname", &bvalue) && bvalue == false) {
@ -1623,7 +1626,12 @@ string RclConfig::findFilter(const string &icmd) const
// Prepend $datadir/filters
temp = path_cat(m_datadir, "filters");
PATH = temp + path_PATHsep() + PATH;
#ifdef _WIN32
// Windows only: use the bundled Python
temp = path_cat(m_datadir, "filters");
temp = path_cat(temp, "python");
PATH = temp + path_PATHsep() + PATH;
#endif
// Prepend possible configuration parameter?
if (getConfParam(string("filtersdir"), temp)) {
temp = path_tildexpand(temp);

View File

@ -137,6 +137,14 @@ public:
};
static const CharClassInit charClassInitInstance;
void TextSplit::backslashAsLetter(bool on) {
if (on) {
charclasses[int('\\')] = A_LLETTER;
} else {
charclasses[int('\\')] = SPACE;
}
}
static inline int whatcc(unsigned int c)
{
if (c <= 127) {

View File

@ -59,6 +59,11 @@ public:
o_deHyphenate = on;
}
// Process backslashes as letters? Default is off, but it may be
// useful for searching for tex commands. Config variable:
// backslashasletter
static void backslashAsLetter(bool on);
enum Flags {
// Default: will return spans and words (a_b, a, b)
TXTS_NONE = 0,

View File

@ -7,6 +7,11 @@ using namespace std;
string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
{
#ifdef _WIN32
// On windows file names are read as UTF16 wchar_t and converted to UTF-8
// while scanning directories
return ifn;
#else
string charset = config->getDefCharset(true);
string utf8fn;
int ercnt;
@ -21,4 +26,5 @@ string compute_utf8fn(const RclConfig *config, const string& ifn, bool simple)
LOGDEB1("compute_utf8fn: transcoded from [" << lfn << "] to [" <<
utf8fn << "] (" << charset << "->" << "UTF-8)\n");
return utf8fn;
#endif
}

View File

@ -321,6 +321,21 @@ XAPIANCXXFLAGS=`$XAPIAN_CONFIG --cxxflags`
#echo LIBXAPIANSTATICEXTRA: $LIBXAPIANSTATICEXTRA
#echo XAPIANCXXFLAGS: $XAPIANCXXFLAGS
XSLT_CONFIG=${XSLT_CONFIG:-no}
if test "$XSLT_CONFIG" = "no"; then
AC_PATH_PROG(XSLT_CONFIG0, [xslt-config], no)
XSLT_CONFIG=$XSLT_CONFIG0
fi
if test "$XSLT_CONFIG" = "no" ; then
AC_MSG_ERROR([Cannot find xslt-config command in $PATH. Is
libxslt installed ?])
exit 1
fi
XSLT_CFLAGS=`xslt-config --cflags`
XSLT_LINKADD=`xslt-config --libs`
AC_ARG_ENABLE(xadump,
AC_HELP_STRING([--enable-xadump],
[Enable building the xadump low level Xapian access program.]),
@ -527,6 +542,8 @@ AC_SUBST(QMAKE_DISABLE_ZEITGEIST)
AC_SUBST(LIBQZEITGEIST)
AC_SUBST(RCLVERSION)
AC_SUBST(RCLLIBVERSION)
AC_SUBST(XSLT_CFLAGS)
AC_SUBST(XSLT_LINKADD)
# All object files depend on localdefs which has the cc flags. Avoid
# changing it unless necessary

88
src/filters/abiword.xsl Normal file
View File

@ -0,0 +1,88 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:ab="http://www.abisource.com/awml.dtd"
exclude-result-prefixes="ab"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:apply-templates select="ab:abiword/ab:metadata"/>
</head>
<body>
<!-- This is for the older abiword format with no namespaces -->
<xsl:for-each select="abiword/section">
<xsl:apply-templates select="p"/>
</xsl:for-each>
<!-- Newer namespaced format -->
<xsl:for-each select="ab:abiword/ab:section">
<xsl:for-each select="ab:p">
<p><xsl:value-of select="."/></p><xsl:text>
</xsl:text>
</xsl:for-each>
</xsl:for-each>
</body>
</html>
</xsl:template>
<xsl:template match="p">
<p><xsl:value-of select="."/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="ab:metadata">
<xsl:for-each select="ab:m">
<xsl:choose>
<xsl:when test="@key = 'dc.creator'">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'abiword.keywords'">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'dc.subject'">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'dc.description'">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:when>
<xsl:when test="@key = 'dc.title'">
<title><xsl:value-of select="."/></title><xsl:text>
</xsl:text>
</xsl:when>
<xsl:otherwise>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

56
src/filters/fb2.xsl Executable file
View File

@ -0,0 +1,56 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
exclude-result-prefixes="fb"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/fb:FictionBook">
<html>
<xsl:apply-templates select="fb:description"/>
<xsl:apply-templates select="fb:body"/>
</html>
</xsl:template>
<xsl:template match="fb:description">
<head>
<xsl:apply-templates select="fb:title-info"/>
</head><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="fb:description/fb:title-info">
<xsl:apply-templates select="fb:book-title"/>
<xsl:apply-templates select="fb:author"/>
</xsl:template>
<xsl:template match="fb:description/fb:title-info/fb:book-title">
<title> <xsl:value-of select="."/> </title>
</xsl:template>
<xsl:template match="fb:description/fb:title-info/fb:author">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="fb:first-name"/><xsl:text> </xsl:text>
<xsl:value-of select="fb:middle-name"/><xsl:text> </xsl:text>
<xsl:value-of select="fb:last-name"/>
</xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="fb:body">
<body>
<xsl:apply-templates select="fb:section"/>
</body>
</xsl:template>
<xsl:template match="fb:body/fb:section">
<xsl:for-each select="fb:p">
<p><xsl:value-of select="."/></p>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

79
src/filters/gnumeric.xsl Executable file
View File

@ -0,0 +1,79 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
xmlns:gnm="http://www.gnumeric.org/v10.dtd"
exclude-result-prefixes="office xlink meta ooo dc"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<xsl:apply-templates select="//office:document-meta/office:meta"/>
</head>
<body>
<xsl:apply-templates select="//gnm:Cells"/>
<xsl:apply-templates select="//gnm:Objects"/>
</body>
</html>
</xsl:template>
<xsl:template match="//dc:date">
<meta>
<xsl:attribute name="name">date</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//dc:description">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//meta:keyword">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="//dc:title">
<title> <xsl:value-of select="."/> </title>
</xsl:template>
<xsl:template match="//meta:initial-creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
</meta>
</xsl:template>
<xsl:template match="office:meta/*"/>
<xsl:template match="gnm:Cell">
<p><xsl:value-of select="."/></p>
</xsl:template>
<xsl:template match="gnm:CellComment">
<blockquote><xsl:value-of select="@Text"/></blockquote>
</xsl:template>
</xsl:stylesheet>

40
src/filters/okular-note.xsl Executable file
View File

@ -0,0 +1,40 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:strip-space elements="*" />
<xsl:template match="/">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>
Okular notes about: <xsl:value-of select="/documentInfo/@url" />
</title>
</head>
<body>
<xsl:apply-templates />
</body>
</html>
</xsl:template>
<xsl:template match="node()">
<xsl:apply-templates select="@* | node() "/>
</xsl:template>
<xsl:template match="text()">
<p><xsl:value-of select="."/></p>
<xsl:text >
</xsl:text>
</xsl:template>
<xsl:template match="@contents|@author">
<p><xsl:value-of select="." /></p>
<xsl:text >
</xsl:text>
</xsl:template>
<xsl:template match="@*"/>
</xsl:stylesheet>

View File

@ -0,0 +1,32 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
exclude-result-prefixes="text"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="text:p">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="text:h">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="text:s">
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="text:line-break">
<br />
</xsl:template>
<xsl:template match="text:tab">
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>

109
src/filters/opendoc-flat.xsl Executable file
View File

@ -0,0 +1,109 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
exclude-result-prefixes="office xlink meta ooo dc text"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:apply-templates select="/office:document/office:meta" />
</head>
<body>
<xsl:apply-templates select="/office:document/office:body" />
</body></html>
</xsl:template>
<xsl:template match="/office:document/office:meta">
<xsl:apply-templates select="dc:title"/>
<xsl:apply-templates select="dc:description"/>
<xsl:apply-templates select="dc:subject"/>
<xsl:apply-templates select="meta:keyword"/>
<xsl:apply-templates select="dc:creator"/>
</xsl:template>
<xsl:template match="/office:document/office:body">
<xsl:apply-templates select=".//text:p" />
<xsl:apply-templates select=".//text:h" />
<xsl:apply-templates select=".//text:s" />
<xsl:apply-templates select=".//text:line-break" />
<xsl:apply-templates select=".//text:tab" />
</xsl:template>
<xsl:template match="dc:title">
<title> <xsl:value-of select="."/> </title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="meta:keyword">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="office:body//text:p">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="office:body//text:h">
<p><xsl:apply-templates/></p><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="office:body//text:s">
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="office:body//text:line-break">
<br />
</xsl:template>
<xsl:template match="office:body//text:tab">
<xsl:text> </xsl:text>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,67 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
exclude-result-prefixes="office xlink meta ooo dc"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/office:document-meta">
<xsl:apply-templates select="office:meta/dc:description"/>
<xsl:apply-templates select="office:meta/dc:subject"/>
<xsl:apply-templates select="office:meta/dc:title"/>
<xsl:apply-templates select="office:meta/meta:keyword"/>
<xsl:apply-templates select="office:meta/dc:creator"/>
</xsl:template>
<xsl:template match="dc:title">
<title> <xsl:value-of select="."/> </title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">abstract</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="meta:keyword">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,49 @@
<?xml version="1.0"?>
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<!-- <xsl:output method="text"/> -->
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="cp:coreProperties">
<xsl:text>&#10;</xsl:text>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<xsl:text>&#10;</xsl:text>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">
<!-- <xsl:value-of select="name()"/> pour sortir tous les meta avec
le meme nom que dans le xml (si on devenait dc-natif) -->
<xsl:text>author</xsl:text>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="dcterms:modified">
<meta>
<xsl:attribute name="name">
<xsl:text>date</xsl:text>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta>
<xsl:text>&#10;</xsl:text>
</xsl:template>
<xsl:template match="*">
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,27 @@
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:ve="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
xmlns:v="urn:schemas-microsoft-com:vml"
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
xmlns:w10="urn:schemas-microsoft-com:office:word"
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml">
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="/">
<div>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="w:p">
<p>
<xsl:value-of select="."/>
</p>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,19 @@
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
<xsl:output omit-xml-declaration="yes"/>
<xsl:template match="/">
<div>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="x:t">
<p>
<xsl:value-of select="."/>
</p>
</xsl:template>
</xsl:stylesheet>

View File

@ -12,10 +12,9 @@ import fnmatch
import rclexecm
try:
import pylzma
from py7zlib import Archive7z
except:
print("RECFILTERROR HELPERNOTFOUND python:pylzma")
print("RECFILTERROR HELPERNOTFOUND python:py7zlib")
sys.exit(1);
try:

View File

@ -15,12 +15,10 @@ if PY3:
from urllib.parse import unquote as urllib_unquote
from urllib.parse import urlparse as urlparse_urlparse
from html.parser import HTMLParser
chmpackname = 'pychm3.egg'
else:
from urlparse import urlparse as urlparse_urlparse
from urllib import unquote as urllib_unquote
from HTMLParser import HTMLParser
chmpackname = 'pychm2.egg'
import subprocess
@ -28,12 +26,12 @@ import rclconfig
import rclexecm
# pychm has no official port to Python3, hence no package in the
# standard place. Recoll bundles a python3 port which we install out
# of the standard python places. Look for it:
# sys.path[0] is for MSW, where we install the egg in the filters
# directory? TBD for now
# standard place. Linux Recoll bundles a python3 port which is identical
# to pychm, but named recollchm to avoid conflicts because it is installed
# as a normal python package (in /usr/lib/pythonxx/dist-packages,
# not recoll/filters.). No such issues on Windows
try:
# First try the system version if any
# First try the system (or recoll-local on Windows) version if any
from chm import chm,chmlib
except:
try:

View File

@ -61,7 +61,7 @@ class Executor(RclBaseHandler):
return True, postproc.wrapData()
else:
try:
fullcmd = cmd + [filename]
fullcmd = cmd + [rclexecm.subprocfile(filename)]
proc = subprocess.Popen(fullcmd,
stdout = subprocess.PIPE)
stdout = proc.stdout

View File

@ -29,13 +29,25 @@ import shutil
import getopt
import rclconfig
PY3 = sys.version > '3'
PY3 = (sys.version > '3')
_mswindows = (sys.platform == "win32")
def makebytes(data):
if type(data) == type(u''):
return data.encode("UTF-8")
return data
def subprocfile(fn):
# On Windows PY3 the list2cmdline() method in subprocess assumes that
# all args are str, and we receive file names as UTF-8. So we need
# to convert.
# On Unix all list elements get converted to bytes in the C
# _posixsubprocess module, nothing to do
if PY3 and _mswindows:
return fn.decode('UTF-8')
else:
return fn
my_config = rclconfig.RclConfig()
############################################
@ -77,7 +89,10 @@ class RclExecM:
self.errfout = sys.stderr
def rclog(self, s, doexit = 0, exitvalue = 1):
print("RCLMFILT: %s: %s" % (self.myname, s), file=self.errfout)
# On windows, and I think that it changed quite recently (Qt change?)
# we get stdout as stderr. So don't write at all
if sys.platform != "win32":
print("RCLMFILT: %s: %s" % (self.myname, s), file=self.errfout)
if doexit:
sys.exit(exitvalue)

View File

@ -140,7 +140,7 @@ sub readparam {
# JFD: replaced the "use" call with a runtime load with error checking,
# for compat with the missing filter detection code.
#use Image::ExifTool qw(:Public);
use Image::ExifTool qw(:Public);
eval {require Image::ExifTool; Image::ExifTool->import(qw(:Public));};
if ($@) {
print "RECFILTERROR HELPERNOTFOUND Perl::Image::ExifTool\n";

View File

@ -479,7 +479,8 @@ class PDFExtractor:
print("RECFILTERROR HELPERNOTFOUND pdftotext")
sys.exit(1);
self.filename = params["filename:"]
self.filename = rclexecm.subprocfile(params["filename:"])
#self.em.rclog("openfile: [%s]" % self.filename)
self.currentindex = -1
self.attextractdone = False

View File

@ -42,7 +42,8 @@ outdir = sys.argv[3]
try:
cmd = [sevenz, "e", "-bd", "-y", "-o" + outdir, infile]
subprocess.check_output(cmd, stderr = subprocess.PIPE)
outputname = glob.glob(os.path.join(outdir, "*"))
# Don't use os.path.join, we always want to use '/'
outputname = glob.glob(outdir + "/*")
# There should be only one file in there..
print(outputname[0])
except Exception as err:

76
src/filters/svg.xsl Executable file
View File

@ -0,0 +1,76 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns:dc="http://purl.org/dc/elements/1.1/"
exclude-result-prefixes="svg"
>
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:apply-templates select="svg:svg/svg:title"/>
<xsl:apply-templates select="svg:svg/svg:desc"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
</head>
<body>
<xsl:apply-templates select="//svg:text"/>
</body>
</html>
</xsl:template>
<xsl:template match="svg:desc">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:creator">
<meta>
<xsl:attribute name="name">author</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:subject">
<meta>
<xsl:attribute name="name">keywords</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="dc:description">
<meta>
<xsl:attribute name="name">description</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
</meta><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="svg:title">
<title><xsl:value-of select="."/></title><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="svg:text">
<p><xsl:value-of select="."/></p><xsl:text>
</xsl:text>
</xsl:template>
</xsl:stylesheet>

34
src/filters/xml.xsl Executable file
View File

@ -0,0 +1,34 @@
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:template match="/">
<html>
<head>
<xsl:if test="//*[local-name() = 'title']">
<title>
<xsl:value-of select="//*[local-name() = 'title'][1]"/>
</title>
</xsl:if>
</head>
<body>
<xsl:apply-templates/>
</body>
</html>
</xsl:template>
<xsl:template match="text()">
<xsl:if test="string-length(normalize-space(.)) &gt; 0">
<p><xsl:value-of select="."/></p>
<xsl:text>
</xsl:text>
</xsl:if>
</xsl:template>
<xsl:template match="*">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>

View File

@ -29,17 +29,15 @@
#include <iostream>
#ifndef NO_NAMESPACES
using namespace std;
#endif /* NO_NAMESPACES */
bool MimeHandlerHtml::set_document_file_impl(const string& mt, const string &fn)
{
LOGDEB0("textHtmlToDoc: " << fn << "\n");
string otext;
if (!file_to_string(fn, otext)) {
LOGINFO("textHtmlToDoc: cant read: " << fn << "\n");
string reason;
if (!file_to_string(fn, otext, &reason)) {
LOGERR("textHtmlToDoc: cant read: " << fn << ": " << reason << "\n");
return false;
}
m_filename = fn;

View File

@ -38,9 +38,7 @@ public:
MimeHandlerMail(RclConfig *cnf, const std::string &id);
virtual ~MimeHandlerMail();
virtual bool is_data_input_ok(DataInput input) const {
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
return true;
return false;
return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
}
virtual bool next_document();
virtual bool skip_to_document(const std::string& ipath);

316
src/internfile/mh_xslt.cpp Normal file
View File

@ -0,0 +1,316 @@
/* Copyright (C) 2005 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "autoconfig.h"
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxslt/transform.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/xsltutils.h>
#include "cstr.h"
#include "mh_xslt.h"
#include "log.h"
#include "smallut.h"
#include "md5ut.h"
#include "rclconfig.h"
#include "readfile.h"
using namespace std;
// Do we need this? It would need to be called from recollinit
// Call once, not reentrant
// xmlInitParser();
// LIBXML_TEST_VERSION;
// Probably not: xmlCleanupParser();
class FileScanXML : public FileScanDo {
public:
FileScanXML(const string& fn) : m_fn(fn) {}
virtual ~FileScanXML() {
if (ctxt) {
xmlFreeParserCtxt(ctxt);
}
}
xmlDocPtr getDoc() {
int ret;
if ((ret = xmlParseChunk(ctxt, nullptr, 0, 1))) {
xmlError *error = xmlGetLastError();
LOGERR("FileScanXML: final xmlParseChunk failed with error " <<
ret << " error: " <<
(error ? error->message :
" null return from xmlGetLastError()") << "\n");
return nullptr;
}
return ctxt->myDoc;
}
virtual bool init(int64_t size, string *) {
LOGDEB1("FileScanXML: init: size " << size << endl);
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, m_fn.c_str());
if (ctxt == nullptr) {
LOGERR("FileScanXML: xmlCreatePushParserCtxt failed\n");
return false;
} else {
return true;
}
}
virtual bool data(const char *buf, int cnt, string*) {
if (0) {
string dt(buf, cnt);
LOGDEB1("FileScanXML: data: cnt " << cnt << " data " << dt << endl);
} else {
LOGDEB1("FileScanXML: data: cnt " << cnt << endl);
}
int ret;
if ((ret = xmlParseChunk(ctxt, buf, cnt, 0))) {
xmlError *error = xmlGetLastError();
LOGERR("FileScanXML: xmlParseChunk failed with error " <<
ret << " for [" << buf << "] error " <<
(error ? error->message :
" null return from xmlGetLastError()") << "\n");
return false;
} else {
LOGDEB1("xmlParseChunk ok (sent " << cnt << " bytes)\n");
return true;
}
}
private:
xmlParserCtxtPtr ctxt{nullptr};
string m_fn;
};
class MimeHandlerXslt::Internal {
public:
Internal(MimeHandlerXslt *_p)
: p(_p) {}
~Internal() {
if (metaOrAllSS) {
xsltFreeStylesheet(metaOrAllSS);
}
if (bodySS) {
xsltFreeStylesheet(bodySS);
}
}
xsltStylesheet *prepare_stylesheet(const string& ssnm);
bool process_doc_or_string(bool forpv, const string& fn, const string& data);
bool apply_stylesheet(
const string& fn, const string& member, const string& data,
xsltStylesheet *ssp, string& result, string *md5p);
MimeHandlerXslt *p;
bool ok{false};
string metamember;
xsltStylesheet *metaOrAllSS{nullptr};
string bodymember;
xsltStylesheet *bodySS{nullptr};
string result;
string filtersdir;
};
MimeHandlerXslt::~MimeHandlerXslt()
{
delete m;
}
MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
const std::vector<std::string>& params)
: RecollFilter(cnf, id), m(new Internal(this))
{
LOGDEB("MimeHandlerXslt: params: " << stringsToString(params) << endl);
m->filtersdir = path_cat(cnf->getDatadir(), "filters");
xmlSubstituteEntitiesDefault(0);
xmlLoadExtDtdDefaultValue = 0;
// params can be "xslt stylesheetall" or
// "xslt metamember metastylesheet bodymember bodystylesheet"
if (params.size() == 2) {
m->metaOrAllSS = m->prepare_stylesheet(params[1]);
if (m->metaOrAllSS) {
m->ok = true;
}
} else if (params.size() == 5) {
m->metamember = params[1];
m->metaOrAllSS = m->prepare_stylesheet(params[2]);
m->bodymember = params[3];
m->bodySS = m->prepare_stylesheet(params[4]);
if (m->metaOrAllSS && m->bodySS) {
m->ok = true;
}
} else {
LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
stringsToString(params) << endl);
}
}
xsltStylesheet *MimeHandlerXslt::Internal::prepare_stylesheet(const string& ssnm)
{
string ssfn = path_cat(filtersdir, ssnm);
FileScanXML XMLstyle(ssfn);
string reason;
if (!file_scan(ssfn, &XMLstyle, &reason)) {
LOGERR("MimeHandlerXslt: file_scan failed for style sheet " <<
ssfn << " : " << reason << endl);
return nullptr;
}
xmlDoc *stl = XMLstyle.getDoc();
if (stl == nullptr) {
LOGERR("MimeHandlerXslt: getDoc failed for style sheet " <<
ssfn << endl);
return nullptr;
}
return xsltParseStylesheetDoc(stl);
}
bool MimeHandlerXslt::Internal::apply_stylesheet(
const string& fn, const string& member, const string& data,
xsltStylesheet *ssp, string& result, string *md5p)
{
FileScanXML XMLdoc(fn);
string md5, reason;
bool res;
if (!fn.empty()) {
if (member.empty()) {
res = file_scan(fn, &XMLdoc, 0, -1, &reason, md5p);
} else {
res = file_scan(fn, member, &XMLdoc, &reason);
}
} else {
if (member.empty()) {
res = string_scan(data.c_str(), data.size(), &XMLdoc, &reason, md5p);
} else {
res = string_scan(data.c_str(), data.size(), member, &XMLdoc,
&reason);
}
}
if (!res) {
LOGERR("MimeHandlerXslt::set_document_: file_scan failed for "<<
fn << " " << member << " : " << reason << endl);
return false;
}
xmlDocPtr doc = XMLdoc.getDoc();
if (nullptr == doc) {
LOGERR("MimeHandlerXslt::set_document_: no parsed doc\n");
return false;
}
xmlDocPtr transformed = xsltApplyStylesheet(ssp, doc, NULL);
if (nullptr == transformed) {
LOGERR("MimeHandlerXslt::set_document_: xslt transform failed\n");
xmlFreeDoc(doc);
return false;
}
xmlChar *outstr;
int outlen;
xsltSaveResultToString(&outstr, &outlen, transformed, metaOrAllSS);
result = string((const char*)outstr, outlen);
xmlFree(outstr);
xmlFreeDoc(transformed);
xmlFreeDoc(doc);
return true;
}
bool MimeHandlerXslt::Internal::process_doc_or_string(
bool forpreview, const string& fn, const string& data)
{
if (nullptr == metaOrAllSS && nullptr == bodySS) {
LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
return false;
}
p->m_metaData[cstr_dj_keycharset] = cstr_utf8;
if (nullptr == bodySS) {
string md5;
if (apply_stylesheet(fn, string(), data, metaOrAllSS, result,
forpreview ? nullptr : &md5)) {
if (!forpreview) {
p->m_metaData[cstr_dj_keymd5] = md5;
}
return true;
}
return false;
} else {
result = "<html>\n<head>\n<meta http-equiv=\"Content-Type\""
"content=\"text/html; charset=UTF-8\">";
string part;
if (!apply_stylesheet(fn,metamember, data, metaOrAllSS, part, nullptr)) {
return false;
}
result += part;
result += "</head>\n<body>\n";
if (!apply_stylesheet(fn, bodymember, data, bodySS, part, nullptr)) {
return false;
}
result += part;
result += "</body></html>";
}
return true;
}
bool MimeHandlerXslt::set_document_file_impl(const std::string& mt,
const std::string &fn)
{
LOGDEB0("MimeHandlerXslt::set_document_file_: fn: " << fn << endl);
if (!m || !m->ok) {
return false;
}
bool ret = m->process_doc_or_string(m_forPreview, fn, string());
if (ret) {
m_havedoc = true;
}
return ret;
}
bool MimeHandlerXslt::set_document_string_impl(const string& mt,
const string& txt)
{
LOGDEB0("MimeHandlerXslt::set_document_string_\n");
if (!m || !m->ok) {
return false;
}
bool ret = m->process_doc_or_string(m_forPreview, string(), txt);
if (ret) {
m_havedoc = true;
}
return ret;
}
bool MimeHandlerXslt::next_document()
{
if (!m || !m->ok) {
return false;
}
if (m_havedoc == false)
return false;
m_havedoc = false;
m_metaData[cstr_dj_keymt] = cstr_texthtml;
m_metaData[cstr_dj_keycontent].swap(m->result);
LOGDEB1("MimeHandlerXslt::next_document: result: [" <<
m_metaData[cstr_dj_keycontent] << "]\n");
return true;
}
void MimeHandlerXslt::clear_impl()
{
m_havedoc = false;
m->result.clear();
}

49
src/internfile/mh_xslt.h Normal file
View File

@ -0,0 +1,49 @@
/* Copyright (C) 2018 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef _MH_XSLT_H_INCLUDED_
#define _MH_XSLT_H_INCLUDED_
#include <string>
#include "mimehandler.h"
class MimeHandlerXslt : public RecollFilter {
public:
MimeHandlerXslt(RclConfig *cnf, const std::string& id,
const std::vector<std::string>& params);
virtual ~MimeHandlerXslt();
virtual bool next_document() override;
virtual void clear_impl() override;
virtual bool is_data_input_ok(DataInput input) const override {
return (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING);
}
protected:
virtual bool set_document_file_impl(const std::string& mt,
const std::string& file_path);
virtual bool set_document_string_impl(const std::string& mt,
const std::string& data);
class Internal;
private:
Internal *m{nullptr};
};
#endif /* _MH_XSLT_H_INCLUDED_ */

View File

@ -41,6 +41,7 @@ using namespace std;
#include "mh_symlink.h"
#include "mh_unknown.h"
#include "mh_null.h"
#include "mh_xslt.h"
// Performance help: we use a pool of already known and created
// handlers. There can be several instances for a given mime type
@ -137,11 +138,17 @@ void clearMimeHandlerCache()
/** For mime types set as "internal" in mimeconf:
* create appropriate handler object. */
static RecollFilter *mhFactory(RclConfig *config, const string &mime,
static RecollFilter *mhFactory(RclConfig *config, const string &mimeOrParams,
bool nobuild, string& id)
{
LOGDEB2("mhFactory(" << mime << ")\n");
string lmime(mime);
LOGDEB1("mhFactory(" << mimeOrParams << ")\n");
vector<string> lparams;
stringToStrings(mimeOrParams, lparams);
if (lparams.empty()) {
// ??
return nullptr;
}
string lmime(lparams[0]);
stringtolower(lmime);
if (cstr_textplain == lmime) {
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText\n");
@ -160,11 +167,11 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
MD5String("MimeHandlerMail", id);
return nobuild ? 0 : new MimeHandlerMail(config, id);
} else if ("inode/symlink" == lmime) {
LOGDEB2("mhFactory(" << mime << "): ret MimeHandlerSymlink\n");
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerSymlink\n");
MD5String("MimeHandlerSymlink", id);
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
} else if ("application/x-zerosize" == lmime) {
LOGDEB("mhFactory(" << mime << "): ret MimeHandlerNull\n");
LOGDEB("mhFactory(" << lmime << "): returning MimeHandlerNull\n");
MD5String("MimeHandlerNull", id);
return nobuild ? 0 : new MimeHandlerNull(config, id);
} else if (lmime.find("text/") == 0) {
@ -175,7 +182,11 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
// exec) but still opening with a specific editor.
LOGDEB2("mhFactory(" << mime << "): returning MimeHandlerText(x)\n");
MD5String("MimeHandlerText", id);
return nobuild ? 0 : new MimeHandlerText(config, id);
return nobuild ? 0 : new MimeHandlerText(config, id);
} else if ("xsltproc" == lmime) {
// XML Types processed with one or several xslt style sheets.
MD5String(mimeOrParams, id);
return nobuild ? 0 : new MimeHandlerXslt(config, id, lparams);
} else {
// We should not get there. It means that "internal" was set
// as a handler in mimeconf for a mime type we actually can't
@ -262,7 +273,7 @@ MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
/* Get handler/filter object for given mime type: */
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
bool filtertypes)
bool filtertypes)
{
LOGDEB("getMimeHandler: mtype [" << mtype << "] filtertypes " <<
filtertypes << "\n");
@ -291,7 +302,7 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
}
bool internal = !stringlowercmp("internal", handlertype);
if (internal) {
// For internal types let the factory compute the id
// For internal types let the factory compute the cache id
mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id);
} else {
// exec/execm: use the md5 of the def line
@ -304,16 +315,15 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
goto out;
LOGDEB2("getMimeHandler: " << mtype << " not in cache\n");
// Not in cache.
if (internal) {
// If there is a parameter after "internal" it's the mime
// type to use. This is so that we can have bogus mime
// types like text/x-purple-html-log (for ie: specific
// icon) and still use the html filter on them. This is
// partly redundant with the localfields/rclaptg, but
// better and the latter will probably go away at some
// point in the future.
// type to use, or the further qualifier (e.g. style sheet
// name for xslt types). This is so that we can have bogus
// mime types like text/x-purple-html-log (for ie:
// specific icon) and still use the html filter on
// them. This is partly redundant with the
// localfields/rclaptg, but better? (and the latter will
// probably go away at some point in the future?).
LOGDEB2("handlertype internal, cmdstr [" << cmdstr << "]\n");
h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
goto out;
@ -336,14 +346,10 @@ RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg,
goto out;
}
}
}
// We get here if there was no specific error, but there is no
// identified mime type, or no handler associated.
// Finally, unhandled files are either ignored or their name and
// generic metadata is indexed, depending on configuration
{
} else {
// No identified mime type, or no handler associated.
// Unhandled files are either ignored or their name and
// generic metadata is indexed, depending on configuration
bool indexunknown = false;
cfg->getConfParam("indexallfilenames", &indexunknown);
if (indexunknown) {

View File

@ -118,15 +118,15 @@ bool Uncomp::uncompressfile(const string& ifn,
ExecCmd ex;
int status = ex.doexec(cmd, args, 0, &tfile);
if (status || tfile.empty()) {
LOGERR("uncompressfile: doexec: failed for [" << ifn << "] status 0x" <<
status << "\n");
LOGERR("uncompressfile: doexec: " << cmd << " " <<
stringsToString(args) << " failed for [" <<
ifn << "] status 0x" << status << "\n");
if (!m_dir->wipe()) {
LOGERR("uncompressfile: wipedir failed\n");
}
return false;
}
if (tfile[tfile.length() - 1] == '\n')
tfile.erase(tfile.length() - 1, 1);
rtrimstring(tfile, "\n\r");
m_tfile = tfile;
m_srcpath = ifn;
return true;

View File

@ -12,7 +12,7 @@ import conftree
class RclDynConf:
def __init__(self, fname):
self.data = ConfSimple(fname)
self.data = conftree.ConfSimple(fname)
def getStringList(self, sk):
nms = self.data.getNames(sk)
@ -95,6 +95,6 @@ class RclExtraDbs:
if __name__ == '__main__':
config = RclConfig()
print(config.getConfParam("topdirs"))
print("topdirs = %s" % config.getConfParam("topdirs"))
extradbs = RclExtraDbs(config)
print(extradbs.getActDbs())

View File

@ -341,7 +341,7 @@ void RclMain::startNativeViewer(Rcl::Doc doc, int pagenum, QString term)
// If using an actual file, check that it exists, and if it is
// compressed, we may need an uncompressed version
if (!fn.empty() && theconfig->mimeViewerNeedsUncomp(doc.mimetype)) {
if (access(fn.c_str(), R_OK) != 0) {
if (!path_readable(fn)) {
QMessageBox::warning(0, "Recoll",
tr("Can't access file: ") + u8s2qs(fn));
return;
@ -445,9 +445,13 @@ void RclMain::execViewer(const map<string, string>& subs, bool enterHistory,
#endif
QStatusBar *stb = statusBar();
if (stb) {
string fcharset = theconfig->getDefCharset(true);
string prcmd;
#ifdef _WIN32
prcmd = ncmd;
#else
string fcharset = theconfig->getDefCharset(true);
transcode(ncmd, prcmd, fcharset, "UTF-8");
#endif
QString msg = tr("Executing: [") +
QString::fromUtf8(prcmd.c_str()) + "]";
stb->showMessage(msg, 10000);

View File

@ -375,7 +375,9 @@ QVariant RecollModel::data(const QModelIndex& index, int role) const
string data = m_getters[index.column()](colname, doc);
#ifndef _WIN32
// Special case url, because it may not be utf-8. URL-encode in this case.
// Not on windows, where we always read the paths as Unicode.
if (!colname.compare("url")) {
int ecnt;
string data1;
@ -383,6 +385,7 @@ QVariant RecollModel::data(const QModelIndex& index, int role) const
data = url_encode(data);
}
}
#endif
list<string> lr;
g_hiliter.plaintorich(data, lr, m_hdata);

View File

@ -257,9 +257,8 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
Xapian::DB_CREATE_OR_OVERWRITE;
#ifdef _WIN32
// Xapian is quite bad at erasing partial db which can
// occur because of open file deletion errors on
// Windows.
// On Windows, Xapian is quite bad at erasing partial db which can
// occur because of open file deletion errors.
if (mode == DbTrunc) {
if (path_exists(path_cat(dir, "iamchert"))) {
wipedir(dir);
@ -268,9 +267,21 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
}
#endif
if (::access(dir.c_str(), 0) == 0) {
// Existing index
if (path_exists(dir)) {
// Existing index.
xwdb = Xapian::WritableDatabase(dir, action);
if (action == Xapian::DB_CREATE_OR_OVERWRITE ||
xwdb.get_doccount() == 0) {
// New or empty index. Set the "store text" option
// according to configuration. The metadata record will be
// written further down.
m_storetext = o_index_storedoctext;
LOGDEB("Db:: index " << (m_storetext?"stores":"does not store") <<
" document text\n");
} else {
// Existing non empty. Get the option from the index.
storesDocText(xwdb);
}
} else {
// New index. If possible, and depending on config, use a stub
// to force using Chert. No sense in doing this if we are
@ -301,23 +312,22 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
LOGINF("Rcl::Db::openWrite: new index will " << (m_storetext?"":"not ")
<< "store document text\n");
#else
// Old Xapian (chert only) or newer (no chert). Use the
// Old Xapian (chert only) or much newer (no chert). Use the
// default index backend and let the user decide of the
// abstract generation method. The configured default is to
// store the text.
xwdb = Xapian::WritableDatabase(dir, action);
m_storetext = o_index_storedoctext;
#endif
// Set the storetext value inside the index descriptor (new
// with recoll 1.24, maybe we'll have other stuff to store in
// there in the future).
}
// If the index is empty, write the data format version,
// and the storetext option value inside the index descriptor (new
// with recoll 1.24, maybe we'll have other stuff to store in
// there in the future).
if (xwdb.get_doccount() == 0) {
string desc = string("storetext=") + (m_storetext ? "1" : "0") + "\n";
xwdb.set_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY, desc);
}
// If the index is empty, write the data format version at once
// to avoid stupid error messages:
if (xwdb.get_doccount() == 0) {
xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION);
}
@ -328,21 +338,26 @@ void Db::Native::openWrite(const string& dir, Db::OpenMode mode)
#endif
}
void Db::Native::openRead(const string& dir)
void Db::Native::storesDocText(Xapian::Database& db)
{
m_iswritable = false;
xrdb = Xapian::Database(dir);
string desc = xrdb.get_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY);
string desc = db.get_metadata(cstr_RCL_IDX_DESCRIPTOR_KEY);
ConfSimple cf(desc, 1);
string val;
m_storetext = false;
if (cf.get("storetext", val) && stringToBool(val)) {
m_storetext = true;
}
LOGDEB("Db::openRead: index " << (m_storetext?"stores":"does not store") <<
LOGDEB("Db:: index " << (m_storetext?"stores":"does not store") <<
" document text\n");
}
void Db::Native::openRead(const string& dir)
{
m_iswritable = false;
xrdb = Xapian::Database(dir);
storesDocText(xrdb);
}
/* See comment in class declaration: return all subdocuments of a
* document given by its unique id. */
bool Db::Native::subDocs(const string &udi, int idxi,

View File

@ -105,6 +105,10 @@ class Db::Native {
void openWrite(const std::string& dir, Db::OpenMode mode);
void openRead(const string& dir);
// Determine if an existing index is of the full-text-storing kind
// by looking at the index metadata. Stores the result in m_storetext
void storesDocText(Xapian::Database&);
// Final steps of doc update, part which need to be single-threaded
bool addOrUpdateWrite(const string& udi, const string& uniterm,

View File

@ -79,38 +79,38 @@ application/postscript = exec rclps
application/sql = internal text/plain
application/vnd.ms-excel = execm rclxls.py
application/vnd.ms-powerpoint = execm rclppt.py
application/vnd.oasis.opendocument.text = execm rclsoff.py
application/vnd.oasis.opendocument.text-template = execm rclsoff.py
application/vnd.oasis.opendocument.presentation = execm rclsoff.py
application/vnd.oasis.opendocument.spreadsheet = execm rclsoff.py
application/vnd.oasis.opendocument.graphics = execm rclsoff.py
application/vnd.oasis.opendocument.presentation-flat-xml = execm rclsoff-flat.py
application/vnd.oasis.opendocument.text-flat-xml = execm rclsoff-flat.py
application/vnd.oasis.opendocument.spreadsheet-flat-xml = execm rclsoff-flat.py
application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.text-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.oasis.opendocument.spreadsheet-flat-xml = internal xsltproc opendoc-flat.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
execm rclopxml.py
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
execm rclopxml.py
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
execm rclopxml.py
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
execm rclopxml.py
application/vnd.sun.xml.calc = execm rclsoff.py
application/vnd.sun.xml.calc.template = execm rclsoff.py
application/vnd.sun.xml.draw = execm rclsoff.py
application/vnd.sun.xml.draw.template = execm rclsoff.py
application/vnd.sun.xml.impress = execm rclsoff.py
application/vnd.sun.xml.impress.template = execm rclsoff.py
application/vnd.sun.xml.math = execm rclsoff.py
application/vnd.sun.xml.writer = execm rclsoff.py
application/vnd.sun.xml.writer.global = execm rclsoff.py
application/vnd.sun.xml.writer.template = execm rclsoff.py
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.wordperfect = exec wpd2html;mimetype=text/html
application/x-abiword = execm rclabw.py
application/x-abiword = internal xsltproc abiword.xsl
application/x-awk = internal text/plain
application/x-chm = execm rclchm
application/x-dia-diagram = execm rcldia;mimetype=text/plain
@ -118,12 +118,12 @@ application/x-dvi = exec rcldvi
application/x-flac = execm rclaudio
application/x-gnote = execm rclxml.py
application/x-gnuinfo = execm rclinfo
application/x-gnumeric = execm rclgnm.py
application/x-gnumeric = internal xsltproc gnumeric.xsl
application/x-kword = exec rclkwd
application/x-lyx = exec rcllyx
application/x-mimehtml = internal message/rfc822
#application/x-mobipocket-ebook = execm rclmobi
application/x-okular-notes = execm rclokulnote.py
application/x-okular-notes = internal xsltproc okular-note.xsl
application/x-perl = internal text/plain
# Returned by xdg-mime for .php. Future-proofing
application/x-php = internal text/plain
@ -149,7 +149,7 @@ image/jpeg = execm rclimg
image/png = execm rclimg
image/tiff = execm rclimg
image/vnd.djvu = execm rcldjvu.py
image/svg+xml = execm rclsvg.py
image/svg+xml = internal xsltproc svg.xsl
image/x-xcf = execm rclimg
image/x-nikon-nef = execm rclimg
inode/symlink = internal
@ -168,7 +168,7 @@ text/css = internal text/plain
application/javascript = internal text/plain
text/x-bibtex = exec rclbibtex.sh ; mimetype = text/plain
text/x-csv = internal text/plain
text/x-fictionbook = execm rclfb2.py
text/x-fictionbook = internal xsltproc fb2.xsl
text/x-gaim-log = exec rclgaim
text/x-html-aptosid-man = exec rclaptosidman
text/x-lua = internal
@ -190,7 +190,7 @@ text/x-tex = exec rcltex
# All parameter and tag names, attribute values etc, are indexed as
# text. rclxml.py tries to just index the text content.
#application/xml = execm rclxml.py
#text/xml = execm rclxml.py
#text/xml = internal xsltproc xml.xsl
application/xml = internal text/plain
text/xml = internal text/plain

View File

@ -507,6 +507,13 @@ int ConfSimple::eraseKey(const string& sk)
return write();
}
int ConfSimple::clear()
{
m_submaps.clear();
m_order.clear();
return write();
}
// Walk the tree, calling user function at each node
ConfSimple::WalkerCode
ConfSimple::sortwalk(WalkerCode(*walker)(void *, const string&, const string&),
@ -692,6 +699,13 @@ bool ConfSimple::commentsAsXML(ostream& out)
}
break;
}
case ConfLine::CFL_SK:
out << "<subkey>" << it->m_data << "</subkey>" << endl;
break;
case ConfLine::CFL_VAR:
out << "<varsetting>" << it->m_data << " = " <<
it->m_value << "</varsetting>" << endl;
break;
default:
break;
}

View File

@ -159,10 +159,7 @@ public:
void reparse(const std::string& in);
/** Clear all content */
void clear() {
m_submaps.clear();
m_order.clear();
}
int clear();
/**
* Get string value for named parameter, from specified subsection (looks

View File

@ -36,6 +36,7 @@
#include "log.h"
#include "pathut.h"
#include "fstreewalk.h"
#include "transcode.h"
using namespace std;
@ -310,6 +311,20 @@ FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
return FtwOk;
}
#ifdef _WIN32
#define DIRENT _wdirent
#define DIRHDL _WDIR
#define OPENDIR _wopendir
#define CLOSEDIR _wclosedir
#define READDIR _wreaddir
#else
#define DIRENT dirent
#define DIRHDL DIR
#define OPENDIR opendir
#define CLOSEDIR closedir
#define READDIR readdir
#endif
// Note that the 'norecurse' flag is handled as part of the directory read.
// This means that we always go into the top 'walk()' parameter if it is a
// directory, even if norecurse is set. Bug or Feature ?
@ -341,24 +356,25 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
// This is a directory, read it and process entries:
#ifndef _WIN32
// Detect if directory already seen. This could just be several
// symlinks pointing to the same place (if FtwFollow is set), it
// could also be some other kind of cycle. In any case, there is
// no point in entering again.
// For now, we'll ignore the "other kind of cycle" part and only monitor
// this is FtwFollow is set
#ifndef _WIN32
if (data->options & FtwFollow) {
DirId dirid(stp->st_dev, stp->st_ino);
if (data->donedirs.find(dirid) != data->donedirs.end()) {
LOGINFO("Not processing [" << (top) << "] (already seen as other path)\n" );
LOGINFO("Not processing [" << top <<
"] (already seen as other path)\n");
return status;
}
data->donedirs.insert(dirid);
}
#endif
DIR *d = opendir(top.c_str());
SYSPATH(top, systop);
DIRHDL *d = OPENDIR(systop);
if (d == 0) {
data->logsyserr("opendir", top);
switch (errno) {
@ -376,42 +392,38 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
}
}
struct dirent *ent;
while ((ent = readdir(d)) != 0) {
struct DIRENT *ent;
while ((ent = READDIR(d)) != 0) {
string fn;
struct stat st;
#ifdef _WIN32
string sdname;
if (!wchartoutf8(ent->d_name, sdname)) {
LOGERR("wchartoutf8 failed in " << top << endl);
continue;
}
const char *dname = sdname.c_str();
#else
const char *dname = ent->d_name;
#endif
// Maybe skip dotfiles
if ((data->options & FtwSkipDotFiles) && ent->d_name[0] == '.')
if ((data->options & FtwSkipDotFiles) && dname[0] == '.')
continue;
// Skip . and ..
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
if (!strcmp(dname, ".") || !strcmp(dname, ".."))
continue;
// Skipped file names match ?
if (!data->skippedNames.empty()) {
if (inSkippedNames(ent->d_name))
if (inSkippedNames(dname))
continue;
}
fn = path_cat(top, ent->d_name);
#ifdef _WIN32
// readdir gets the useful attrs, no inode indirection on windows,
// spare the path_fileprops() call, but make sure we mimick it.
memset(&st, 0, sizeof(st));
st.st_mtime = ent->d_mtime;
st.st_size = ent->d_size;
st.st_mode = ent->d_mode;
// ctime is really creation time on Windows. Just use mtime
// for all. We only use ctime on Unix to catch xattr changes
// anyway.
st.st_ctime = st.st_mtime;
#else
fn = path_cat(top, dname);
int statret = path_fileprops(fn.c_str(), &st, data->options&FtwFollow);
if (statret == -1) {
data->logsyserr("stat", fn);
continue;
}
#endif
if (!data->skippedPaths.empty()) {
// We do not check the ancestors. This means that you can have
@ -461,7 +473,7 @@ FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
out:
if (d)
closedir(d);
CLOSEDIR(d);
return status;
}

View File

@ -20,15 +20,17 @@
#include <string.h>
#include "md5ut.h"
#include "readfile.h"
using namespace std;
class FileScanMd5 : public FileScanDo {
// Quite incredibly if this class is named FileScanMd5 like the
// different one in readfile.cpp, the vtables get mixed up and mh_xslt
// crashes while calling a virtual function (gcc 6.3 and 7.3)
class FileScanMd5loc : public FileScanDo {
public:
FileScanMd5(string& d) : digest(d) {}
virtual bool init(size_t size, string *)
FileScanMd5loc(string& d) : digest(d) {}
virtual bool init(int64_t, string *)
{
MD5Init(&ctx);
return true;
@ -44,7 +46,7 @@ public:
bool MD5File(const string& filename, string &digest, string *reason)
{
FileScanMd5 md5er(digest);
FileScanMd5loc md5er(digest);
if (!file_scan(filename, &md5er, reason))
return false;
// We happen to know that digest and md5er.digest are the same object

7573
src/utils/miniz.cpp Normal file

File diff suppressed because it is too large Load Diff

1325
src/utils/miniz.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -24,13 +24,19 @@
#include <stdio.h>
#include <math.h>
#include <errno.h>
#include <dirent.h>
#ifdef _WIN32
#include "dirent.h"
#include "safefcntl.h"
#include "safeunistd.h"
#include "safewindows.h"
#include "safesysstat.h"
#include "transcode.h"
#define STAT _wstat
#define LSTAT _wstat
#define STATBUF _stat
#define ACCESS _waccess
#else // Not windows ->
#include <fcntl.h>
@ -39,10 +45,13 @@
#include <pwd.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <dirent.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#define STAT stat
#define LSTAT lstat
#define STATBUF stat
#define ACCESS access
#endif
#include <cstdlib>
@ -506,8 +515,9 @@ bool path_makepath(const string& ipath, int mode)
bool path_isdir(const string& path)
{
struct stat st;
if (lstat(path.c_str(), &st) < 0) {
struct STATBUF st;
SYSPATH(path, syspath);
if (LSTAT(syspath, &st) < 0) {
return false;
}
if (S_ISDIR(st.st_mode)) {
@ -518,8 +528,9 @@ bool path_isdir(const string& path)
long long path_filesize(const string& path)
{
struct stat st;
if (stat(path.c_str(), &st) < 0) {
struct STATBUF st;
SYSPATH(path, syspath);
if (STAT(syspath, &st) < 0) {
return -1;
}
return (long long)st.st_size;
@ -531,8 +542,9 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
return -1;
}
memset(stp, 0, sizeof(struct stat));
struct stat mst;
int ret = follow ? stat(path.c_str(), &mst) : lstat(path.c_str(), &mst);
struct STATBUF mst;
SYSPATH(path, syspath);
int ret = follow ? STAT(syspath, &mst) : LSTAT(syspath, &mst);
if (ret != 0) {
return ret;
}
@ -551,7 +563,13 @@ int path_fileprops(const std::string path, struct stat *stp, bool follow)
bool path_exists(const string& path)
{
return access(path.c_str(), 0) == 0;
SYSPATH(path, syspath);
return ACCESS(syspath, 0) == 0;
}
bool path_readable(const string& path)
{
SYSPATH(path, syspath);
return ACCESS(syspath, R_OK) == 0;
}
// Allowed punctuation in the path part of an URI according to RFC2396

View File

@ -87,10 +87,20 @@ extern int path_fileprops(const std::string path, struct stat *stp,
/// Returns true if last elt could be checked to exist. False may mean that
/// the file/dir does not exist or that an error occurred.
extern bool path_exists(const std::string& path);
/// Same but must be readable
extern bool path_readable(const std::string& path);
/// Return separator for PATH environment variable
extern std::string path_PATHsep();
#ifdef _WIN32
#define SYSPATH(PATH, SPATH) wchar_t PATH ## _buf[2048]; \
utf8towchar(PATH, PATH ## _buf, 2048); \
wchar_t *SPATH = PATH ## _buf;
#else
#define SYSPATH(PATH, SPATH) const char *SPATH = PATH.c_str()
#endif
/// Dump directory
extern bool readdir(const std::string& dir, std::string& reason,
std::set<std::string>& entries);

View File

@ -164,10 +164,15 @@ const string& path_pkgdatadir()
// into either utf-8 if transcoding succeeds, or url-encoded
bool printableUrl(const string& fcharset, const string& in, string& out)
{
#ifdef _WIN32
// On windows our paths are always utf-8
out = in;
#else
int ecnt = 0;
if (!transcode(in, out, fcharset, "UTF-8", &ecnt) || ecnt) {
out = url_encode(in, 7);
}
#endif
return true;
}

View File

@ -14,43 +14,67 @@
* Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifndef TEST_READFILE
#ifdef BUILDING_RECOLL
#include "autoconfig.h"
#else
#include "config.h"
#endif
#include "readfile.h"
#include <errno.h>
#include <sys/types.h>
#ifdef _WIN32
#include "safefcntl.h"
#include "safesysstat.h"
#include "safeunistd.h"
#include "transcode.h"
#define OPEN _wopen
#else
#define O_BINARY 0
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#define OPEN open
#endif
#include <string>
#include "readfile.h"
#include "smallut.h"
#include "pathut.h"
using std::string;
#ifdef READFILE_ENABLE_MD5
#include "md5.h"
#endif
#ifdef MDU_INCLUDE_LOG
#include MDU_INCLUDE_LOG
#else
#include "log.h"
#endif
using namespace std;
///////////////
// Implementation of basic interface: read whole file to memory buffer
class FileToString : public FileScanDo {
public:
FileToString(string& data) : m_data(data) {}
string& m_data;
bool init(size_t size, string *reason) {
// Note: the fstat() + reserve() (in init()) calls divide cpu
// usage almost by 2 on both linux i586 and macosx (compared to
// just append()) Also tried a version with mmap, but it's
// actually slower on the mac and not faster on linux.
virtual bool init(int64_t size, string *reason) {
if (size > 0) {
m_data.reserve(size);
}
return true;
}
bool data(const char *buf, int cnt, string *reason) {
virtual bool data(const char *buf, int cnt, string *reason) {
try {
m_data.append(buf, cnt);
} catch (...) {
@ -59,248 +83,558 @@ public:
}
return true;
}
string& m_data;
};
bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
string *reason)
{
FileToString accum(data);
return file_scan(fn, &accum, offs, cnt, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
}
bool file_to_string(const string& fn, string& data, string *reason)
{
return file_to_string(fn, data, 0, size_t(-1), reason);
}
bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
string *reason)
/////////////
// Callback/filtering interface
// Abstract class base for both source (origin) and filter
// (midstream). Both have a downstream
class FileScanUpstream {
public:
virtual void setDownstream(FileScanDo *down) {
m_down = down;
}
virtual FileScanDo *out() {
return m_down;
}
protected:
FileScanDo *m_down{nullptr};
};
// Source element.
class FileScanSource : public FileScanUpstream {
public:
FileScanSource(FileScanDo *down) {
setDownstream(down);
}
virtual bool scan() = 0;
};
// Inside element of a transformation pipe. The idea is that elements
// which don't recognize the data get themselves out of the pipe
// (pop()). Typically, only one of the decompression modules
// (e.g. gzip/bzip2/xz...) would remain. For now there is only gzip,
// it pops itself if the data does not have the right magic number
class FileScanFilter : public FileScanDo, public FileScanUpstream {
public:
virtual void insertAtSink(FileScanDo *sink, FileScanUpstream *upstream) {
setDownstream(sink);
if (m_down) {
m_down->setUpstream(this);
}
setUpstream(upstream);
if (m_up) {
m_up->setDownstream(this);
}
}
// Remove myself from the pipe.
virtual void pop() {
if (m_down) {
m_down->setUpstream(m_up);
}
if (m_up) {
m_up->setDownstream(m_down);
}
}
virtual void setUpstream(FileScanUpstream *up) override {
m_up = up;
}
private:
FileScanUpstream *m_up{nullptr};
};
#if defined(READFILE_ENABLE_ZLIB)
#include <zlib.h>
class GzFilter : public FileScanFilter {
public:
virtual ~GzFilter() {
if (m_initdone) {
inflateEnd(&m_stream);
}
}
virtual bool init(int64_t size, string *reason) override {
LOGDEB1("GzFilter::init\n");
if (out()) {
return out()->init(size, reason);
}
return true;
}
virtual bool data(const char *buf, int cnt, string *reason) override {
LOGDEB1("GzFilter::data: cnt " << cnt << endl);
int error;
m_stream.next_in = (Bytef*)buf;
m_stream.avail_in = cnt;
if (m_initdone == false) {
// We do not support a first read cnt < 2. This quite
// probably can't happen with a compressed file (size>2)
// except if we're reading a tty which is improbable. So
// assume this is a regular file.
const unsigned char *ubuf = (const unsigned char *)buf;
if ((cnt < 2) || ubuf[0] != 0x1f || ubuf[1] != 0x8b) {
LOGDEB1("GzFilter::data: not gzip. out() is " << out() << "\n");
pop();
if (out()) {
return out()->data(buf, cnt, reason);
} else {
return false;
}
}
m_stream.opaque = nullptr;
m_stream.zalloc = alloc_func;
m_stream.zfree = free_func;
m_stream.next_out = (Bytef*)m_obuf;
m_stream.avail_out = m_obs;
if ((error = inflateInit2(&m_stream, 15+32)) != Z_OK) {
LOGERR("inflateInit2 error: " << error << endl);
if (reason) {
*reason += " Zlib inflateinit failed";
if (m_stream.msg && *m_stream.msg) {
*reason += string(": ") + m_stream.msg;
}
}
return false;
}
m_initdone = true;
}
while (m_stream.avail_in != 0) {
m_stream.next_out = (Bytef*)m_obuf;
m_stream.avail_out = m_obs;
if ((error = inflate(&m_stream, Z_SYNC_FLUSH)) < Z_OK) {
LOGERR("inflate error: " << error << endl);
if (reason) {
*reason += " Zlib inflate failed";
if (m_stream.msg && *m_stream.msg) {
*reason += string(": ") + m_stream.msg;
}
}
return false;
}
if (out() &&
!out()->data(m_obuf, m_obs - m_stream.avail_out, reason)) {
return false;
}
}
return true;
}
static voidpf alloc_func(voidpf opaque, uInt items, uInt size) {
return malloc(items * size);
}
static void free_func(voidpf opaque, voidpf address) {
free(address);
}
bool m_initdone{false};
z_stream m_stream;
char m_obuf[10000];
const int m_obs{10000};
};
#endif // GZ
#ifdef READFILE_ENABLE_MD5
class FileScanMd5 : public FileScanFilter {
public:
FileScanMd5(string& d) : digest(d) {}
virtual bool init(int64_t size, string *reason) override {
LOGDEB1("FileScanMd5: init\n");
MD5Init(&ctx);
if (out()) {
return out()->init(size, reason);
}
return true;
}
virtual bool data(const char *buf, int cnt, string *reason) override {
LOGDEB1("FileScanMd5: data. cnt " << cnt << endl);
MD5Update(&ctx, (const unsigned char*)buf, cnt);
if (out() && !out()->data(buf, cnt, reason)) {
return false;
}
return true;
}
bool finish() {
LOGDEB1("FileScanMd5: finish\n");
MD5Final(digest, &ctx);
return true;
}
string &digest;
MD5_CTX ctx;
};
#endif // MD5
// Source taking data from a regular file
class FileScanSourceFile : public FileScanSource {
public:
FileScanSourceFile(FileScanDo *next, const string& fn, int64_t startoffs,
int64_t cnttoread, string *reason)
: FileScanSource(next), m_fn(fn), m_startoffs(startoffs),
m_cnttoread(cnttoread), m_reason(reason) { }
virtual bool scan() {
LOGDEB1("FileScanSourceFile: reading " << m_fn << " offs " <<
m_startoffs<< " cnt " << m_cnttoread << " out " << out() << endl);
const int RDBUFSZ = 8192;
bool ret = false;
bool noclosing = true;
int fd = 0;
struct stat st;
// Initialize st_size: if fn.empty() , the fstat() call won't happen.
st.st_size = 0;
// If we have a file name, open it, else use stdin.
if (!m_fn.empty()) {
SYSPATH(m_fn, realpath);
fd = OPEN(realpath, O_RDONLY | O_BINARY);
if (fd < 0 || fstat(fd, &st) < 0) {
catstrerror(m_reason, "open/stat", errno);
return false;
}
noclosing = false;
}
#if defined O_NOATIME && O_NOATIME != 0
if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
// perror("fcntl");
}
#endif
if (out()) {
if (m_cnttoread != -1 && m_cnttoread) {
out()->init(m_cnttoread + 1, m_reason);
} else if (st.st_size > 0) {
out()->init(st.st_size + 1, m_reason);
} else {
out()->init(0, m_reason);
}
}
int64_t curoffs = 0;
if (m_startoffs > 0 && !m_fn.empty()) {
if (lseek(fd, m_startoffs, SEEK_SET) != m_startoffs) {
catstrerror(m_reason, "lseek", errno);
return false;
}
curoffs = m_startoffs;
}
char buf[RDBUFSZ];
int64_t totread = 0;
for (;;) {
size_t toread = RDBUFSZ;
if (m_startoffs > 0 && curoffs < m_startoffs) {
toread = size_t(MIN(RDBUFSZ, m_startoffs - curoffs));
}
if (m_cnttoread != -1) {
toread = MIN(toread, (uint64_t)(m_cnttoread - totread));
}
ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
if (n < 0) {
catstrerror(m_reason, "read", errno);
goto out;
}
if (n == 0) {
break;
}
curoffs += n;
if (curoffs - n < m_startoffs) {
continue;
}
if (!out()->data(buf, n, m_reason)) {
goto out;
}
totread += n;
if (m_cnttoread > 0 && totread >= m_cnttoread) {
break;
}
}
ret = true;
out:
if (fd >= 0 && !noclosing) {
close(fd);
}
return ret;
}
protected:
string m_fn;
int64_t m_startoffs;
int64_t m_cnttoread;
string *m_reason;
};
#if defined(READFILE_ENABLE_MINIZ)
#include "miniz.h"
// Source taking data from a ZIP archive member
class FileScanSourceZip : public FileScanSource {
public:
FileScanSourceZip(FileScanDo *next, const string& fn,
const string& member, string *reason)
: FileScanSource(next), m_fn(fn), m_member(member),
m_reason(reason) {}
FileScanSourceZip(const char *data, size_t cnt, FileScanDo *next,
const string& member, string *reason)
: FileScanSource(next), m_data(data), m_cnt(cnt), m_member(member),
m_reason(reason) {}
virtual bool scan() {
bool ret = false;
mz_zip_archive zip;
mz_zip_zero_struct(&zip);
void *opaque = this;
bool ret1;
if (m_fn.empty()) {
ret1 = mz_zip_reader_init_mem(&zip, m_data, m_cnt, 0);
} else {
SYSPATH(m_fn, realpath);
ret1 = mz_zip_reader_init_file(&zip, realpath, 0);
}
if (!ret1) {
if (m_reason) {
*m_reason += "mz_zip_reader_init_xx() failed: ";
*m_reason +=
string(mz_zip_get_error_string(zip.m_last_error));
}
return false;
}
mz_uint32 file_index;
if (mz_zip_reader_locate_file_v2(&zip, m_member.c_str(), NULL, 0,
&file_index) < 0) {
if (m_reason) {
*m_reason += "mz_zip_reader_locate_file() failed: ";
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
}
goto out;
}
mz_zip_archive_file_stat zstat;
if (!mz_zip_reader_file_stat(&zip, file_index, &zstat)) {
if (m_reason) {
*m_reason += "mz_zip_reader_file_stat() failed: ";
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
}
goto out;
}
if (out()) {
if (!out()->init(zstat.m_uncomp_size, m_reason)) {
goto out;
}
}
if (!mz_zip_reader_extract_to_callback(
&zip, file_index, write_cb, opaque, 0)) {
if (m_reason) {
*m_reason += "mz_zip_reader_extract_to_callback() failed: ";
*m_reason += string(mz_zip_get_error_string(zip.m_last_error));
}
goto out;
}
ret = true;
out:
mz_zip_reader_end(&zip);
return ret;
}
static size_t write_cb(void *pOpaque, mz_uint64 file_ofs,
const void *pBuf, size_t n) {
const char *cp = (const char*)pBuf;
LOGDEB1("write_cb: ofs " << file_ofs << " cnt " << n << " data: " <<
string(cp, n) << endl);
FileScanSourceZip *ths = (FileScanSourceZip *)pOpaque;
if (ths->out()) {
if (!ths->out()->data(cp, n, ths->m_reason)) {
return (size_t)-1;
}
}
return n;
}
protected:
const char *m_data;
size_t m_cnt;
string m_fn;
string m_member;
string *m_reason;
};
bool file_scan(const std::string& filename, const std::string& membername,
FileScanDo* doer, std::string *reason)
{
FileToString accum(data);
return file_scan(fn, &accum, offs, cnt, reason);
if (membername.empty()) {
return file_scan(filename, doer, 0, -1, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
} else {
FileScanSourceZip source(doer, filename, membername, reason);
return source.scan();
}
}
bool string_scan(const char *data, size_t cnt, const std::string& membername,
FileScanDo* doer, std::string *reason)
{
if (membername.empty()) {
return string_scan(data, cnt, doer, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
} else {
FileScanSourceZip source(data, cnt, doer, membername, reason);
return source.scan();
}
}
#endif // READFILE_ENABLE_ZIP
bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
int64_t cnttoread, string *reason
#ifdef READFILE_ENABLE_MD5
, string *md5p
#endif
)
{
LOGDEB1("file_scan: doer " << doer << endl);
#if defined(READFILE_ENABLE_ZLIB)
bool nodecomp = startoffs != 0;
#endif
if (startoffs < 0) {
startoffs = 0;
}
FileScanSourceFile source(doer, fn, startoffs, cnttoread, reason);
FileScanUpstream *up = &source;
up = up;
#if defined(READFILE_ENABLE_ZLIB)
GzFilter gzfilter;
if (!nodecomp) {
gzfilter.insertAtSink(doer, up);
up = &gzfilter;
}
#endif
#ifdef READFILE_ENABLE_MD5
// We compute the MD5 on the uncompressed data, so insert this
// right at the source (after the decompressor).
string digest;
FileScanMd5 md5filter(digest);
if (md5p) {
md5filter.insertAtSink(doer, up);
up = &md5filter;
}
#endif
bool ret = source.scan();
#ifdef READFILE_ENABLE_MD5
if (md5p) {
md5filter.finish();
MD5HexPrint(digest, *md5p);
}
#endif
return ret;
}
bool file_scan(const string& fn, FileScanDo* doer, string *reason)
{
return file_scan(fn, doer, 0, size_t(-1), reason);
return file_scan(fn, doer, 0, -1, reason
#ifdef READFILE_ENABLE_MD5
, nullptr
#endif
);
}
const int RDBUFSZ = 8192;
// Note: the fstat() + reserve() (in init()) calls divide cpu usage almost by 2
// on both linux i586 and macosx (compared to just append())
// Also tried a version with mmap, but it's actually slower on the mac and not
// faster on linux.
bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
size_t cnttoread, string *reason)
{
if (startoffs < 0) {
*reason += " file_scan: negative startoffs not allowed";
return false;
}
bool ret = false;
bool noclosing = true;
int fd = 0;
struct stat st;
// Initialize st_size: if fn.empty() , the fstat() call won't happen.
st.st_size = 0;
class FileScanSourceBuffer : public FileScanSource {
public:
FileScanSourceBuffer(FileScanDo *next, const char *data, size_t cnt,
string *reason)
: FileScanSource(next), m_data(data), m_cnt(cnt), m_reason(reason) {}
// If we have a file name, open it, else use stdin.
if (!fn.empty()) {
fd = open(fn.c_str(), O_RDONLY | O_BINARY);
if (fd < 0 || fstat(fd, &st) < 0) {
catstrerror(reason, "open/stat", errno);
return false;
virtual bool scan() {
if (out()) {
if (!out()->init(m_cnt, m_reason)) {
return false;
}
return out()->data(m_data, m_cnt, m_reason);
} else {
return true;
}
noclosing = false;
}
protected:
const char *m_data{nullptr};
size_t m_cnt{0};
string *m_reason{nullptr};
};
#if defined O_NOATIME && O_NOATIME != 0
if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
// perror("fcntl");
bool string_scan(const char *data, size_t cnt, FileScanDo* doer,
std::string *reason
#ifdef READFILE_ENABLE_MD5
, std::string *md5p
#endif
)
{
FileScanSourceBuffer source(doer, data, cnt, reason);
FileScanUpstream *up = &source;
up = up;
#ifdef READFILE_ENABLE_MD5
string digest;
FileScanMd5 md5filter(digest);
if (md5p) {
md5filter.insertAtSink(doer, up);
up = &md5filter;
}
#endif
bool ret = source.scan();
if (cnttoread != (size_t) - 1 && cnttoread) {
doer->init(cnttoread + 1, reason);
} else if (st.st_size > 0) {
doer->init(size_t(st.st_size + 1), reason);
} else {
doer->init(0, reason);
}
int64_t curoffs = 0;
if (startoffs > 0 && !fn.empty()) {
if (lseek(fd, startoffs, SEEK_SET) != startoffs) {
catstrerror(reason, "lseek", errno);
return false;
}
curoffs = startoffs;
}
char buf[RDBUFSZ];
size_t totread = 0;
for (;;) {
size_t toread = RDBUFSZ;
if (startoffs > 0 && curoffs < startoffs) {
toread = size_t(MIN(RDBUFSZ, startoffs - curoffs));
}
if (cnttoread != size_t(-1)) {
toread = MIN(toread, cnttoread - totread);
}
ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
if (n < 0) {
catstrerror(reason, "read", errno);
goto out;
}
if (n == 0) {
break;
}
curoffs += n;
if (curoffs - n < startoffs) {
continue;
}
if (!doer->data(buf, n, reason)) {
goto out;
}
totread += n;
if (cnttoread > 0 && totread >= cnttoread) {
break;
}
}
ret = true;
out:
if (fd >= 0 && !noclosing) {
close(fd);
#ifdef READFILE_ENABLE_MD5
if (md5p) {
md5filter.finish();
MD5HexPrint(digest, *md5p);
}
#endif
return ret;
}
#else // Test
#include "autoconfig.h"
#include <stdio.h>
#include <sys/types.h>
#include "safesysstat.h"
#include <stdlib.h>
#include <string>
#include <iostream>
using namespace std;
#include "readfile.h"
#include "fstreewalk.h"
using namespace std;
class myCB : public FsTreeWalkerCB {
public:
FsTreeWalker::Status processone(const string& path,
const struct stat *st,
FsTreeWalker::CbFlag flg) {
if (flg == FsTreeWalker::FtwDirEnter) {
//cout << "[Entering " << path << "]" << endl;
} else if (flg == FsTreeWalker::FtwDirReturn) {
//cout << "[Returning to " << path << "]" << endl;
} else if (flg == FsTreeWalker::FtwRegular) {
//cout << path << endl;
string s, reason;
if (!file_to_string(path, s, &reason)) {
cerr << "Failed: " << reason << " : " << path << endl;
} else {
//cout <<
//"================================================" << endl;
cout << path << endl;
// cout << s;
}
reason.clear();
}
return FsTreeWalker::FtwOk;
}
};
static int op_flags;
#define OPT_MOINS 0x1
#define OPT_c 0x2
#define OPT_o 0x4
static const char *thisprog;
static char usage [] =
"trreadfile [-o offs] [-c cnt] topdirorfile\n\n"
;
static void
Usage(void)
{
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
exit(1);
}
int main(int argc, const char **argv)
{
int64_t offs = 0;
size_t cnt = size_t(-1);
thisprog = argv[0];
argc--;
argv++;
while (argc > 0 && **argv == '-') {
(*argv)++;
if (!(**argv))
/* Cas du "adb - core" */
{
Usage();
}
while (**argv)
switch (*(*argv)++) {
case 'c':
op_flags |= OPT_c;
if (argc < 2) {
Usage();
}
cnt = atoll(*(++argv));
argc--;
goto b1;
case 'o':
op_flags |= OPT_o;
if (argc < 2) {
Usage();
}
offs = strtoull(*(++argv), 0, 0);
argc--;
goto b1;
default:
Usage();
break;
}
b1:
argc--;
argv++;
}
if (argc != 1) {
Usage();
}
string top = *argv++;
argc--;
cerr << "filename " << top << " offs " << offs << " cnt " << cnt << endl;
struct stat st;
if (!top.empty() && stat(top.c_str(), &st) < 0) {
perror("stat");
exit(1);
}
if (!top.empty() && S_ISDIR(st.st_mode)) {
FsTreeWalker walker;
myCB cb;
walker.walk(top, cb);
if (walker.getErrCnt() > 0) {
cout << walker.getReason();
}
} else {
string s, reason;
if (!file_to_string(top, s, offs, cnt, &reason)) {
cerr << reason << endl;
exit(1);
} else {
cout << s;
}
}
exit(0);
}
#endif //TEST_READFILE

View File

@ -21,30 +21,85 @@
#include <string>
/**
* Read file in chunks, calling an accumulator for each chunk. Can be used
* for reading in a file, computing an md5...
*/
class FileScanUpstream;
/** Data sink for the file reader. */
class FileScanDo {
public:
virtual ~FileScanDo() {}
virtual bool init(size_t size, std::string *reason) = 0;
virtual bool data(const char *buf, int cnt, std::string* reason) = 0;
/* Initialize and allocate.
* @param size if set, lower bound of data size.
* @param reason[output] set to error message in case of error.
* @return false for error (file_scan will return), true if ok.
*/
virtual bool init(int64_t size, std::string *reason) = 0;
/* Process chunk of data
* @param buf the data buffer.
* @param cnt byte count.
* @param reason[output] set to error message in case of error.
* @return false for error (file_scan will return), true if ok.
*/
virtual bool data(const char *buf, int cnt, std::string *reason) = 0;
virtual void setUpstream(FileScanUpstream*) {}
};
bool file_scan(const std::string& filename, FileScanDo* doer, std::string *reason = 0);
/* Same but only process count cnt from offset offs. Set cnt to size_t(-1)
* for no limit */
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t offs, size_t cnt,
std::string *reason = 0);
/** Open and read file, calling the FileScanDo data() method for each chunk.
*
* @param filename File name. Use empty value for stdin
* @param doer the data processor. The init() method will be called
* initially witht a lower bound of the data size (may be used to
* reserve a buffer), or with a 0 size if nothing is known about the
* size. The data() method will be called for every chunk of data
* read.
* @param offs Start offset. If not zero, will disable decompression
* (set to -1 to start at 0 with no decompression).
* @param cnt Max bytes in output. Set cnt to -1 for no limit.
* @param[output] md5p If not null, points to a string to store the hex ascii
* md5 of the uncompressed data.
* @param[output] reason If not null, points to a string for storing an
* error message if the return value is false.
* @return true if the operation ended normally, else false.
*/
bool file_scan(const std::string& fn, FileScanDo* doer, int64_t startoffs,
int64_t cnttoread, std::string *reason
#ifdef READFILE_ENABLE_MD5
, std::string *md5p
#endif
);
/** Same as above, not offset/cnt/md5 */
bool file_scan(const std::string& filename, FileScanDo* doer,
std::string *reason);
/** Same as file_scan, from a memory buffer. No libz processing */
bool string_scan(const char *data, size_t cnt, FileScanDo* doer,
std::string *reason
#ifdef READFILE_ENABLE_MD5
, std::string *md5p
#endif
);
#if defined(READFILE_ENABLE_MINIZ)
/* Process a zip archive member */
bool file_scan(const std::string& filename, const std::string& membername,
FileScanDo* doer, std::string *reason);
bool string_scan(const char* data, size_t cnt, const std::string& membername,
FileScanDo* doer, std::string *reason);
#endif
/**
* Read file into string.
* @return true for ok, false else
*/
bool file_to_string(const std::string& filename, std::string& data, std::string *reason = 0);
bool file_to_string(const std::string& filename, std::string& data,
std::string *reason = 0);
/** Read file chunk into string. Set cnt to size_t(-1) for whole file */
/** Read file chunk into string. Set cnt to -1 for going to
* eof, offs to -1 for going from the start without decompression */
bool file_to_string(const std::string& filename, std::string& data,
int64_t offs, size_t cnt, std::string *reason = 0);
#endif /* _READFILE_H_INCLUDED_ */

View File

@ -21,14 +21,16 @@
#include <string>
#include <iostream>
#include <mutex>
using std::string;
#include <errno.h>
#include <iconv.h>
#include <wchar.h>
#include "transcode.h"
#include "log.h"
using namespace std;
// We gain approximately 25% exec time for word at a time conversions by
// caching the iconv_open thing.
//
@ -42,7 +44,7 @@ using std::string;
bool transcode(const string &in, string &out, const string &icode,
const string &ocode, int *ecnt)
{
LOGDEB2("Transcode: " << (icode) << " -> " << (ocode) << "\n" );
LOGDEB2("Transcode: " << icode << " -> " << ocode << "\n");
#ifdef ICONV_CACHE_OPEN
static iconv_t ic = (iconv_t)-1;
static string cachedicode;
@ -100,8 +102,9 @@ bool transcode(const string &in, string &out, const string &icode,
" : " + strerror(errno);
#endif
if (errno == EILSEQ) {
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n" );
LOGDEB1(" Input consumed " << (ip - in) << " output produced " << (out.length() + OBSIZ - osiz) << "\n" );
LOGDEB1("transcode:iconv: bad input seq.: shift, retry\n");
LOGDEB1(" Input consumed " << ip - in << " output produced " <<
out.length() + OBSIZ - osiz << "\n");
out.append(obuf, OBSIZ - osiz);
out += "?";
mecnt++;
@ -144,14 +147,67 @@ error:
}
if (mecnt)
LOGDEB("transcode: [" << (icode) << "]->[" << (ocode) << "] " << (mecnt) << " errors\n" );
LOGDEB("transcode: [" << icode << "]->[" << ocode << "] " <<
mecnt << " errors\n");
if (ecnt)
*ecnt = mecnt;
return ret;
}
bool wchartoutf8(const wchar_t *in, std::string& out)
{
static iconv_t ic = (iconv_t)-1;
if (ic == (iconv_t)-1) {
if((ic = iconv_open("UTF-8", "WCHAR_T")) == (iconv_t)-1) {
LOGERR("wchartoutf8: iconv_open failed\n");
return false;
}
}
const int OBSIZ = 8192;
char obuf[OBSIZ], *op;
out.erase();
size_t isiz = 2 * wcslen(in);
out.reserve(isiz);
const char *ip = (const char *)in;
#else
while (isiz > 0) {
size_t osiz;
op = obuf;
osiz = OBSIZ;
if(iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1
&& errno != E2BIG) {
LOGERR("wchartoutf8: iconv error, errno: " << errno << endl);
return false;
}
out.append(obuf, OBSIZ - osiz);
}
return true;
}
bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap)
{
static iconv_t ic = (iconv_t)-1;
if (ic == (iconv_t)-1) {
if((ic = iconv_open("WCHAR_T", "UTF-8")) == (iconv_t)-1) {
LOGERR("utf8towchar: iconv_open failed\n");
return false;
}
}
size_t isiz = in.size();
const char *ip = in.c_str();
size_t osiz = (size_t)obytescap-2;
char *op = (char *)out;
if (iconv(ic, (ICONV_CONST char **)&ip, &isiz, &op, &osiz) == (size_t)-1) {
LOGERR("utf8towchar: iconv error, errno: " << errno << endl);
return false;
}
*op++ = 0;
*op = 0;
return true;
}
#else // -> TEST
#include <stdio.h>
#include <stdlib.h>
@ -222,4 +278,3 @@ int main(int argc, char **argv)
exit(0);
}
#endif

View File

@ -36,4 +36,9 @@ extern bool transcode(const std::string &in, std::string &out,
const std::string &ocode,
int *ecnt = 0);
#ifdef _WIN32
extern bool wchartoutf8(const wchar_t *in, std::string& out);
extern bool utf8towchar(const std::string& in, wchar_t *out, size_t obytescap);
#endif
#endif /* _TRANSCODE_H_INCLUDED_ */

View File

@ -1,154 +0,0 @@
/*
Implementation of POSIX directory browsing functions and types for Win32.
Author: Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
History: Created March 1997. Updated June 2003 and July 2012.
Rights: See end of file.
*/
#include <dirent.h>
#include <errno.h>
#include <io.h> /* _findfirst and _findnext set errno iff they return -1 */
#include <stdlib.h>
#include <string.h>
#ifdef __cplusplus
extern "C"
{
#endif
typedef ptrdiff_t handle_type; /* C99's intptr_t not sufficiently portable */
struct DIR
{
handle_type handle; /* -1 for failed rewind */
struct _finddata_t info;
struct dirent result; /* d_name null iff first time */
char *name; /* null-terminated char string */
};
DIR *opendir(const char *name)
{
DIR *dir = 0;
if(name && name[0])
{
size_t base_length = strlen(name);
const char *all = /* search pattern must end with suitable wildcard */
strchr("/\\", name[base_length - 1]) ? "*" : "/*";
if((dir = (DIR *) malloc(sizeof *dir)) != 0 &&
(dir->name = (char *) malloc(base_length + strlen(all) + 1)) != 0)
{
strcat(strcpy(dir->name, name), all);
if((dir->handle =
(handle_type) _findfirst(dir->name, &dir->info)) != -1)
{
dir->result.d_name = 0;
}
else /* rollback */
{
free(dir->name);
free(dir);
dir = 0;
}
}
else /* rollback */
{
free(dir);
dir = 0;
errno = ENOMEM;
}
}
else
{
errno = EINVAL;
}
return dir;
}
int closedir(DIR *dir)
{
int result = -1;
if(dir)
{
if(dir->handle != -1)
{
result = _findclose(dir->handle);
}
free(dir->name);
free(dir);
}
if(result == -1) /* map all errors to EBADF */
{
errno = EBADF;
}
return result;
}
struct dirent *readdir(DIR *dir)
{
struct dirent *result = 0;
if(dir && dir->handle != -1)
{
if(!dir->result.d_name || _findnext(dir->handle, &dir->info) != -1)
{
result = &dir->result;
result->d_mtime = dir->info.time_write;
result->d_size = dir->info.size;
result->d_name = dir->info.name;
if (dir->info.attrib & _A_SUBDIR)
result->d_mode = S_IFDIR;
else
result->d_mode = S_IFREG;
}
}
else
{
errno = EBADF;
}
return result;
}
void rewinddir(DIR *dir)
{
if(dir && dir->handle != -1)
{
_findclose(dir->handle);
dir->handle = (handle_type) _findfirst(dir->name, &dir->info);
dir->result.d_name = 0;
}
else
{
errno = EBADF;
}
}
#ifdef __cplusplus
}
#endif
/*
Copyright Kevlin Henney, 1997, 2003, 2012. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose is hereby granted without fee, provided
that this copyright and permissions notice appear in all copies and
derivatives.
This software is supplied "as is" without express or implied warranty.
But that said, if there are any problems please get in touch.
*/

View File

@ -1,57 +0,0 @@
#ifndef DIRENT_INCLUDED
#define DIRENT_INCLUDED
/*
Declaration of POSIX directory browsing functions and types for Win32.
Author: Kevlin Henney (kevlin@acm.org, kevlin@curbralan.com)
History: Created March 1997. Updated June 2003.
Rights: See end of file.
*/
#include <sys/stat.h>
#ifdef __cplusplus
extern "C"
{
#endif
typedef struct DIR DIR;
struct dirent
{
char *d_name;
// The native call we use, findfirst/next return file attributes at once,
// no need for a separate stat() call in most cases
// Note that ctime is actually creation time. No use for posix.
time_t d_mtime;
off_t d_size;
int d_mode; // S_IFREG or S_IFDIR only
};
DIR *opendir(const char *);
int closedir(DIR *);
struct dirent *readdir(DIR *);
void rewinddir(DIR *);
/*
Copyright Kevlin Henney, 1997, 2003. All rights reserved.
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose is hereby granted without fee, provided
that this copyright and permissions notice appear in all copies and
derivatives.
This software is supplied "as is" without express or implied warranty.
But that said, if there are any problems please get in touch.
*/
#ifdef __cplusplus
}
#endif
#endif

View File

@ -32,6 +32,7 @@
#include <psapi.h>
#include "smallut.h"
#include "pathut.h"
#include "transcode.h"
using namespace std;
@ -752,7 +753,7 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
return false;
}
STARTUPINFO siStartInfo;
STARTUPINFOW siStartInfo;
BOOL bSuccess = FALSE;
// Set up members of the PROCESS_INFORMATION structure.
@ -760,8 +761,8 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
// Set up members of the STARTUPINFO structure.
// This structure specifies the STDIN and STDOUT handles for redirection.
ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
siStartInfo.cb = sizeof(STARTUPINFO);
ZeroMemory(&siStartInfo, sizeof(siStartInfo));
siStartInfo.cb = sizeof(siStartInfo);
if (m->m_flags & EXF_SHOWWINDOW) {
siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
if (m->m_flags & EXF_MAXIMIZED) {
@ -782,12 +783,15 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
// Create the child process.
// Need a writable buffer for the command line, for some reason.
LOGDEB1("ExecCmd:startExec: cmdline [" << (cmdline) << "]\n" );
LOGDEB("ExecCmd:startExec: cmdline [" << cmdline << "]\n");
#if 0
LPSTR buf = (LPSTR)malloc(cmdline.size() + 1);
memcpy(buf, cmdline.c_str(), cmdline.size());
buf[cmdline.size()] = 0;
bSuccess = CreateProcess(NULL,
buf, // command line
#endif
SYSPATH(cmdline, wcmdline);
bSuccess = CreateProcessW(NULL,
wcmdline, // command line
NULL, // process security attributes
NULL, // primary thread security attrs
TRUE, // handles are inherited
@ -798,9 +802,10 @@ int ExecCmd::startExec(const string &cmd, const vector<string>& args,
&m->m_piProcInfo); // PROCESS_INFORMATION
if (!bSuccess) {
printError("ExecCmd::doexec: CreateProcess");
}
}
free(envir);
free(buf);
// free(buf);
// Close child-side handles else we'll never see eofs
if (!CloseHandle(hOutputWrite))
printError("CloseHandle");

View File

@ -47,54 +47,43 @@ application/x-lzma = uncompress python rcluncomp.py 7z %f %t
# each filter, see the exemples below (ie: msword)
[index]
application/msword = execm python rcldoc.py
application/pdf = execm python rclpdf.py
application/vnd.ms-excel = execm python rclxls.py
application/vnd.ms-powerpoint = execm python rclppt.py
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
execm python rclopxml.py
application/vnd.oasis.opendocument.text = execm python rclsoff.py
application/vnd.oasis.opendocument.text-template = execm python rclsoff.py
application/vnd.oasis.opendocument.presentation = execm python rclsoff.py
application/vnd.oasis.opendocument.spreadsheet = execm python rclsoff.py
application/vnd.oasis.opendocument.graphics = execm python rclsoff.py
application/vnd.sun.xml.calc = execm python rclsoff.py
application/vnd.sun.xml.calc.template = execm python rclsoff.py
application/vnd.sun.xml.draw = execm python rclsoff.py
application/vnd.sun.xml.draw.template = execm python rclsoff.py
application/vnd.sun.xml.impress = execm python rclsoff.py
application/vnd.sun.xml.impress.template = execm python rclsoff.py
application/vnd.sun.xml.math = execm python rclsoff.py
application/vnd.sun.xml.writer = execm python rclsoff.py
application/vnd.sun.xml.writer.global = execm python rclsoff.py
application/vnd.sun.xml.writer.template = execm python rclsoff.py
application/pdf = execm python rclpdf.py
application/vnd.oasis.opendocument.text = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.text-template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.presentation = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.spreadsheet = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.oasis.opendocument.graphics = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.calc.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.draw.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.impress.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.math = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.global = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.sun.xml.writer.template = internal xsltproc meta.xml opendoc-meta.xsl content.xml opendoc-body.xsl
application/vnd.wordperfect = exec wpd/wpd2html;mimetype=text/html
application/x-abiword = execm python rclabw.py
text/x-fictionbook = execm python rclfb2.py
application/x-abiword = internal xsltproc abiword.xsl
text/x-fictionbook = internal xsltproc fb2.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.document = \
execm python rclopxml.py
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.wordprocessingml.template = \
execm python rclopxml.py
internal xsltproc docProps/core.xml openxml-meta.xsl word/document.xml openxml-word-body.xsl
application/vnd.openxmlformats-officedocument.presentationml.template = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.presentationml.presentation = \
execm python rclopxml.py
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet = \
execm python rclopxml.py
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/vnd.openxmlformats-officedocument.spreadsheetml.template =\
execm python rclopxml.py
internal xsltproc docProps/core.xml openxml-meta.xsl xl/sharedStrings.xml openxml-xls-body.xsl
application/epub+zip = execm python rclepub
# Returned by xdg-mime for .js. Future-proofing
@ -121,17 +110,18 @@ application/x-shellscript = internal text/plain
#application/x-tar = execm python rcltar
application/x-webarchive = execm python rclwar
application/x-7z-compressed = execm python rcl7z
application/zip = execm python rclzip;charset=default
audio/mpeg = execm python rclaudio
audio/mp4 = execm python rclaudio
audio/aac = execm python rclaudio
audio/x-karaoke = execm python rclkar
image/gif = execm python rclimg.py
image/jp2 = execm python rclimg.py
image/jpeg = execm python rclimg.py
image/png = execm python rclimg.py
image/tiff = execm python rclimg.py
image/svg+xml = execm python rclsvg.py
#image/x-xcf = execm perl rclimg
image/gif = execm rclimg.exe
image/jp2 = execm rclimg.exe
image/jpeg = execm rclimg.exe
image/png = execm rclimg.exe
image/tiff = execm rclimg.exe
image/svg+xml = internal xsltproc svg.xsl
#image/x-xcf = execm rclimg.exe
inode/symlink = internal
application/x-zerosize = internal
inode/x-empty = internal application/x-zerosize

View File

@ -24,36 +24,39 @@ test -d $DESTDIR || mkdir $DESTDIR || fatal cant create $DESTDIR
# Recoll src tree
RCL=c:/recoll/src/
RCLW=$RCL/windows/
# Recoll dependancies
RCLDEPS=c:/recolldeps/
ReleaseBuild=y
UNRTF=c:/recolldeps/unrtf
ANTIWORD=c:/recolldeps/antiword
PYXSLT=C:/recolldeps/pyxslt
PYEXIV2=C:/recolldeps/pyexiv2
#LIBXAPIAN=c:/temp/xapian-core-1.2.21/.libs/libxapian-22.dll
LIBXAPIAN=c:/recolldeps/xapian-core-1.4.5/.libs/libxapian-30.dll
MUTAGEN=C:/recolldeps/mutagen-1.32/
EPUB=C:/recolldeps/epub-0.5.2
FUTURE=C:/recolldeps/python2-future
ZLIB=c:/recolldeps/zlib-1.2.8
POPPLER=c:/recolldeps/poppler-0.36/
LIBWPD=c:/recolldeps/libwpd/libwpd-0.10.0/
LIBREVENGE=c:/recolldeps/libwpd/librevenge-0.0.1.jfd/
CHM=c:/recolldeps/pychm
# Where to find libgcc_s_dw2-1.dll for progs which need it copied
gccpath=`which gcc`
MINGWBIN=`dirname $gccpath`
PYTHON=${RCLDEPS}py-python3
UNRTF=${RCLDEPS}unrtf
ANTIWORD=${RCLDEPS}antiword
PYXSLT=${RCLDEPS}pyxslt
PYEXIV2=${RCLDEPS}pyexiv2
LIBXAPIAN=${RCLDEPS}xapian-core-1.4.5/.libs/libxapian-30.dll
MUTAGEN=${RCLDEPS}mutagen-1.32/
EPUB=${RCLDEPS}epub-0.5.2
FUTURE=${RCLDEPS}python2-future
ZLIB=${RCLDEPS}zlib-1.2.8
POPPLER=${RCLDEPS}poppler-0.36/
LIBWPD=${RCLDEPS}libwpd/libwpd-0.10.0/
LIBREVENGE=${RCLDEPS}libwpd/librevenge-0.0.1.jfd/
CHM=${RCLDEPS}pychm
MISC=${RCLDEPS}misc
# Where to copy the Qt Dlls from:
QTBIN=C:/Qt/Qt5.8.0/5.8/mingw53_32/bin
QTGCCBIN=C:/qt/Qt5.8.0/Tools/mingw530_32/bin/
# Where to find libgcc_s_dw2-1.dll for progs which need it copied
MINGWBIN=$QTBIN
PATH=$MINGWBIN:$QTGCCBIN:$PATH
export PATH
# Qt arch
QTA=Desktop_Qt_5_8_0_MinGW_32bit
RCLW=$RCL/windows/
if test X$ReleaseBuild = X'y'; then
qtsdir=release
else
@ -73,9 +76,14 @@ RCLS=$RCLW/build-rclstartw-${QTA}-${qtsdir}/${qtsdir}/rclstartw.exe
################
# Script:
FILTERS=$DESTDIR/Share/filters
fatal()
{
echo $*
exit 1
}
# checkcopy.
chkcp()
{
@ -115,7 +123,12 @@ copyzlib()
{
chkcp $ZLIB/zlib1.dll $DESTDIR
}
copypython()
{
mkdir -p $DESTDIR/Share/filters/python
cp -rp $PYTHON/* $DESTDIR/Share/filters/python
chkcp $PYTHON/python.exe $DESTDIR/Share/filters/python/python.exe
}
copyrecoll()
{
# bindir=$RCL/windows/$PLATFORM/$CONFIGURATION/
@ -127,6 +140,7 @@ copyrecoll()
chkcp $RCLIDX $DESTDIR
chkcp $RCLQ $DESTDIR
chkcp $RCLS $DESTDIR
chkcp $MINGWBIN/libgcc_s_dw2-1.dll $DESTDIR
chkcp $RCL/COPYING $DESTDIR/COPYING.txt
chkcp $RCL/doc/user/usermanual.html $DESTDIR/Share/doc
@ -143,7 +157,9 @@ copyrecoll()
chkcp $RCL/python/recoll/recoll/rclconfig.py $FILTERS
chkcp $RCL/python/recoll/recoll/conftree.py $FILTERS
chkcp $RCL/filters/* $FILTERS
chkcp $RCL/filters/* $FILTERS
rm $FILTERS/rclimg $FILTERS/rclimg.py
chkcp $RCLDEPS/rclimg/rclimg.exe $FILTERS
chkcp $RCL/qtgui/mtpics/* $DESTDIR/Share/images
chkcp $RCL/qtgui/i18n/*.qm $DESTDIR/Share/translations
}
@ -169,9 +185,8 @@ copyunrtf()
chkcp $bindir/unrtf.exe $FILTERS
chkcp $UNRTF/outputs/*.conf $FILTERS/Share
chkcp $UNRTF/outputs/SYMBOL.charmap $FILTERS/Share
# libiconv2 is not present in qt, get it from mingw direct. is C, should
# be compatible
chkcp c:/MinGW/bin/libiconv-2.dll $FILTERS
# libiconv-2 originally comes from mingw
chkcp $MISC/libiconv-2.dll $FILTERS
}
copymutagen()
@ -187,6 +202,10 @@ copyepub()
# chkcp to check that epub is where we think it is
chkcp $EPUB/build/lib/epub/opf.py $FILTERS/epub
}
# We used to copy the future module to the filters dir, but it is now
# part of the origin Python tree in recolldeps. (2 dirs:
# site-packages/builtins, site-packages/future)
copyfuture()
{
cp -rp $FUTURE/future $FILTERS/
@ -246,6 +265,18 @@ for d in doc examples filters images translations; do
fatal mkdir $d failed
done
# First check that the config is ok
cmp -s $RCL/common/autoconfig.h $RCL/common/autoconfig-win.h || \
fatal autoconfig.h and autoconfig-win.h differ
VERSION=`cat $RCL/VERSION`
CFVERS=`grep PACKAGE_VERSION $RCL/common/autoconfig.h | \
cut -d ' ' -f 3 | sed -e 's/"//g'`
test "$VERSION" = "$CFVERS" ||
fatal Versions in VERSION and autoconfig.h differ
echo Packaging version $CFVERS
# copyrecoll must stay before copyqt so that windeployqt can do its thing
copyrecoll
copyqt
@ -255,9 +286,10 @@ copypoppler
copyantiword
copyunrtf
copyxslt
copyfuture
#copyfuture
copymutagen
copyepub
copypyexiv2
#copypyexiv2
copywpd
copychm
#copychm
copypython

View File

@ -14,6 +14,13 @@ DEFINES -= UNICODE
DEFINES -= _UNICODE
DEFINES += _MBCS
DEFINES += PSAPI_VERSION=1
DEFINES += READFILE_ENABLE_MINIZ
DEFINES += READFILE_ENABLE_MD5
DEFINES += READFILE_ENABLE_ZLIB
# This is necessary to avoid an undefined impl__xmlFree.
# See comment in libxml/xmlexports.h
DEFINES += LIBXML_STATIC
SOURCES += \
../../aspell/rclaspell.cpp \
@ -50,6 +57,7 @@ SOURCES += \
../../internfile/mh_mail.cpp \
../../internfile/mh_mbox.cpp \
../../internfile/mh_text.cpp \
../../internfile/mh_xslt.cpp \
../../internfile/mimehandler.cpp \
../../internfile/myhtmlparse.cpp \
../../internfile/txtdcode.cpp \
@ -105,6 +113,7 @@ SOURCES += \
../../utils/md5.cpp \
../../utils/md5ut.cpp \
../../utils/mimeparse.cpp \
../../utils/miniz.cpp \
../../utils/pathut.cpp \
../../utils/pxattr.cpp \
../../utils/rclionice.cpp \
@ -114,8 +123,7 @@ SOURCES += \
../../utils/strmatcher.cpp \
../../utils/transcode.cpp \
../../utils/wipedir.cpp \
../../windows/strptime.cpp \
../../windows/dirent.c
../../windows/strptime.cpp
INCLUDEPATH += ../../common ../../index ../../internfile ../../query \
../../unac ../../utils ../../aspell ../../rcldb ../../qtgui \
@ -129,10 +137,16 @@ windows {
contains(QMAKE_CC, cl){
# Visual Studio
}
LIBS += c:/temp/xapian-core-1.4.5/.libs/libxapian-30.dll \
c:/temp/zlib-1.2.8/zlib1.dll -liconv -lshlwapi -lpsapi -lkernel32
LIBS += C:/recolldeps/libxslt/libxslt-1.1.29/win32/bin.mingw/libxslt.a \
C:/recolldeps/libxml2/libxml2-2.9.4+dfsg1/win32/bin.mingw/libxml2.a \
c:/recolldeps/xapian-core-1.4.5/.libs/libxapian-30.dll \
c:/recolldeps/zlib-1.2.8/zlib1.dll \
-liconv -lshlwapi -lpsapi -lkernel32
INCLUDEPATH += ../../windows \
C:/temp/xapian-core-1.4.5/include
C:/recolldeps/xapian-core-1.4.5/include \
C:/recolldeps/libxslt/libxslt-1.1.29/ \
C:/recolldeps/libxml2/libxml2-2.9.4+dfsg1/include
}
unix {

View File

@ -19,6 +19,9 @@
#include <shellapi.h>
#include <stdio.h>
#include <stdlib.h>
#include "safewindows.h"
#include "pathut.h"
#include "transcode.h"
using namespace std;
@ -41,6 +44,10 @@ int op_flags;
int main(int argc, char *argv[])
{
int wargc;
wchar_t **wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
// Yes we could use wargv
thisprog = argv[0];
argc--; argv++;
int imode = 0;
@ -62,7 +69,9 @@ int main(int argc, char *argv[])
if (argc != 1) {
Usage();
}
char *fn = strdup(argv[0]);
wchar_t *wfn = wargv[1];
// Do we need this ?
//https://msdn.microsoft.com/en-us/library/windows/desktop/bb762153%28v=vs.85%29.aspx
//CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
@ -73,9 +82,10 @@ int main(int argc, char *argv[])
default: wmode = SW_SHOWNORMAL; break;
}
int ret = (int)ShellExecute(NULL, "open", fn, NULL, NULL, wmode);
int ret = (int)ShellExecuteW(NULL, L"open", wfn, NULL, NULL, wmode);
if (ret) {
fprintf(stderr, "ShellExecute returned %d\n", ret);
}
LocalFree(wargv);
return ret;
}

View File

@ -2,7 +2,7 @@
; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
#define MyAppName "Recoll"
#define MyAppVersion "1.24.1-20180517-96c6fd"
#define MyAppVersion "1.25.0-20190125-540140bd"
#define MyAppPublisher "Recoll.org"
#define MyAppURL "http://www.recoll.org"
#define MyAppExeName "recoll.exe"