more filter conversion to python: svg and xml. Get rid of rclnull
--HG-- branch : WINDOWSPORT
This commit is contained in:
parent
07e2aa68a3
commit
24c77d2984
@ -1,9 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
# It may make sense in some cases to set this null filter (no output)
|
|
||||||
# instead of using recoll_noindex or leaving the default filter in
|
|
||||||
# case one doesn't want to install it: this will avoid endless retries
|
|
||||||
# to reindex the affected files, as recoll will think it has succeeded
|
|
||||||
# indexing them. Downside: the files won't be indexed when one
|
|
||||||
# actually installs the real filter, will need a -z
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
140
src/filters/rclsvg.py
Executable file
140
src/filters/rclsvg.py
Executable file
@ -0,0 +1,140 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
######################################
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import rclexecm
|
||||||
|
import rclxslt
|
||||||
|
|
||||||
|
stylesheet_all = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
exclude-result-prefixes="svg"
|
||||||
|
>
|
||||||
|
|
||||||
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:title"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:desc"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
|
||||||
|
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<xsl:apply-templates select="//svg:text"/>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="svg:desc">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:creator">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">author</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:subject">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="dc:description">
|
||||||
|
<meta>
|
||||||
|
<xsl:attribute name="name">description</xsl:attribute>
|
||||||
|
<xsl:attribute name="content">
|
||||||
|
<xsl:value-of select="."/>
|
||||||
|
</xsl:attribute>
|
||||||
|
</meta><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="svg:title">
|
||||||
|
<title><xsl:value-of select="."/></title><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="svg:text">
|
||||||
|
<p><xsl:value-of select="."/></p><xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
class SVGExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.currentindex = 0
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no mime or file name")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
fn = params["filename:"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = open(fn, 'r').read()
|
||||||
|
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("%s: bad data: " % (fn, err))
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = SVGExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
98
src/filters/rclxml.py
Executable file
98
src/filters/rclxml.py
Executable file
@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (C) 2014 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
######################################
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import rclexecm
|
||||||
|
import rclxslt
|
||||||
|
|
||||||
|
stylesheet_all = '''<?xml version="1.0"?>
|
||||||
|
<xsl:stylesheet version="1.0"
|
||||||
|
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||||
|
|
||||||
|
<xsl:output method="html" encoding="UTF-8"/>
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<xsl:if test="//*[local-name() = 'title']">
|
||||||
|
<title>
|
||||||
|
<xsl:value-of select="//*[local-name() = 'title'][1]"/>
|
||||||
|
</title>
|
||||||
|
</xsl:if>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="text()">
|
||||||
|
<xsl:if test="string-length(normalize-space(.)) > 0">
|
||||||
|
<p><xsl:value-of select="."/></p>
|
||||||
|
<xsl:text>
|
||||||
|
</xsl:text>
|
||||||
|
</xsl:if>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="*">
|
||||||
|
<xsl:apply-templates/>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
||||||
|
'''
|
||||||
|
|
||||||
|
class XMLExtractor:
|
||||||
|
def __init__(self, em):
|
||||||
|
self.em = em
|
||||||
|
self.currentindex = 0
|
||||||
|
|
||||||
|
def extractone(self, params):
|
||||||
|
if not params.has_key("filename:"):
|
||||||
|
self.em.rclog("extractone: no mime or file name")
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
fn = params["filename:"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = open(fn, 'r').read()
|
||||||
|
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||||
|
except Exception as err:
|
||||||
|
self.em.rclog("%s: bad data: " % (fn, err))
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||||
|
|
||||||
|
###### File type handler api, used by rclexecm ---------->
|
||||||
|
def openfile(self, params):
|
||||||
|
self.currentindex = 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getipath(self, params):
|
||||||
|
return self.extractone(params)
|
||||||
|
|
||||||
|
def getnext(self, params):
|
||||||
|
if self.currentindex >= 1:
|
||||||
|
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||||
|
else:
|
||||||
|
ret= self.extractone(params)
|
||||||
|
self.currentindex += 1
|
||||||
|
return ret
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
proto = rclexecm.RclExecM()
|
||||||
|
extract = XMLExtractor(proto)
|
||||||
|
rclexecm.main(proto, extract)
|
||||||
59
src/internfile/mh_null.h
Normal file
59
src/internfile/mh_null.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
/* Copyright (C) 2004 J.F.Dockes
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the
|
||||||
|
* Free Software Foundation, Inc.,
|
||||||
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*/
|
||||||
|
#ifndef _MH_NULL_H_INCLUDED_
|
||||||
|
#define _MH_NULL_H_INCLUDED_
|
||||||
|
|
||||||
|
// It may make sense in some cases to set this null filter (no output)
|
||||||
|
// instead of using recoll_noindex or leaving the default filter in
|
||||||
|
// case one doesn't want to install it: this will avoid endless retries
|
||||||
|
// to reindex the affected files, as recoll will think it has succeeded
|
||||||
|
// indexing them. Downside: the files won't be indexed when one
|
||||||
|
// actually installs the real filter, will need a -z
|
||||||
|
// Actually used for empty files
|
||||||
|
// Associated to application/x-zerosize, so use
|
||||||
|
// <mimetype> = internal application/x-zerosize
|
||||||
|
// in mimeconf
|
||||||
|
#include <string>
|
||||||
|
#include "cstr.h"
|
||||||
|
#include "mimehandler.h"
|
||||||
|
|
||||||
|
class MimeHandlerNull : public RecollFilter {
|
||||||
|
public:
|
||||||
|
MimeHandlerNull(RclConfig *cnf, const std::string& id)
|
||||||
|
: RecollFilter(cnf, id)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual ~MimeHandlerNull()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual bool set_document_file(const string& mt, const string& fn)
|
||||||
|
{
|
||||||
|
RecollFilter::set_document_file(mt, fn);
|
||||||
|
return m_havedoc = true;
|
||||||
|
}
|
||||||
|
virtual bool next_document()
|
||||||
|
{
|
||||||
|
if (m_havedoc == false)
|
||||||
|
return false;
|
||||||
|
m_havedoc = false;
|
||||||
|
m_metaData[cstr_dj_keycontent] = cstr_null;
|
||||||
|
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _MH_NULL_H_INCLUDED_ */
|
||||||
@ -39,6 +39,7 @@ using namespace std;
|
|||||||
#include "mh_text.h"
|
#include "mh_text.h"
|
||||||
#include "mh_symlink.h"
|
#include "mh_symlink.h"
|
||||||
#include "mh_unknown.h"
|
#include "mh_unknown.h"
|
||||||
|
#include "mh_null.h"
|
||||||
#include "ptmutex.h"
|
#include "ptmutex.h"
|
||||||
|
|
||||||
// Performance help: we use a pool of already known and created
|
// Performance help: we use a pool of already known and created
|
||||||
@ -162,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
|||||||
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
||||||
MD5String("MimeHandlerSymlink", id);
|
MD5String("MimeHandlerSymlink", id);
|
||||||
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||||
|
} else if ("application/x-zerosize" == lmime) {
|
||||||
|
LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str()));
|
||||||
|
MD5String("MimeHandlerNull", id);
|
||||||
|
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
||||||
} else if (lmime.find("text/") == 0) {
|
} else if (lmime.find("text/") == 0) {
|
||||||
// Try to handle unknown text/xx as text/plain. This
|
// Try to handle unknown text/xx as text/plain. This
|
||||||
// only happen if the text/xx was defined as "internal" in
|
// only happen if the text/xx was defined as "internal" in
|
||||||
|
|||||||
@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu
|
|||||||
image/svg+xml = exec rclsvg
|
image/svg+xml = exec rclsvg
|
||||||
image/x-xcf = execm rclimg
|
image/x-xcf = execm rclimg
|
||||||
inode/symlink = internal
|
inode/symlink = internal
|
||||||
inode/x-empty = exec rclnull
|
application/x-zerosize = internal
|
||||||
|
inode/x-empty = internal application/x-zerosize
|
||||||
message/rfc822 = internal
|
message/rfc822 = internal
|
||||||
text/calendar = execm rclics;mimetype=text/plain
|
text/calendar = execm rclics;mimetype=text/plain
|
||||||
text/html = internal
|
text/html = internal
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user