more filter conversion to python: svg and xml. Get rid of rclnull
--HG-- branch : WINDOWSPORT
This commit is contained in:
parent
07e2aa68a3
commit
24c77d2984
@ -1,9 +0,0 @@
|
||||
#!/bin/sh
|
||||
# It may make sense in some cases to set this null filter (no output)
|
||||
# instead of using recoll_noindex or leaving the default filter in
|
||||
# case one doesn't want to install it: this will avoid endless retries
|
||||
# to reindex the affected files, as recoll will think it has succeeded
|
||||
# indexing them. Downside: the files won't be indexed when one
|
||||
# actually installs the real filter, will need a -z
|
||||
|
||||
exit 0
|
||||
140
src/filters/rclsvg.py
Executable file
140
src/filters/rclsvg.py
Executable file
@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
######################################
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import rclxslt
|
||||
|
||||
stylesheet_all = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
exclude-result-prefixes="svg"
|
||||
>
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<xsl:apply-templates select="svg:svg/svg:title"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:desc"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:creator"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:subject"/>
|
||||
<xsl:apply-templates select="svg:svg/svg:metadata/descendant::dc:description"/>
|
||||
</head>
|
||||
<body>
|
||||
<xsl:apply-templates select="//svg:text"/>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="svg:desc">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:creator">
|
||||
<meta>
|
||||
<xsl:attribute name="name">author</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:subject">
|
||||
<meta>
|
||||
<xsl:attribute name="name">keywords</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="dc:description">
|
||||
<meta>
|
||||
<xsl:attribute name="name">description</xsl:attribute>
|
||||
<xsl:attribute name="content">
|
||||
<xsl:value-of select="."/>
|
||||
</xsl:attribute>
|
||||
</meta><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="svg:title">
|
||||
<title><xsl:value-of select="."/></title><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="svg:text">
|
||||
<p><xsl:value-of select="."/></p><xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
class SVGExtractor:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.currentindex = 0
|
||||
|
||||
def extractone(self, params):
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
fn = params["filename:"]
|
||||
|
||||
try:
|
||||
data = open(fn, 'r').read()
|
||||
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||
except Exception as err:
|
||||
self.em.rclog("%s: bad data: " % (fn, err))
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = SVGExtractor(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
98
src/filters/rclxml.py
Executable file
98
src/filters/rclxml.py
Executable file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (C) 2014 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
######################################
|
||||
|
||||
import sys
|
||||
import rclexecm
|
||||
import rclxslt
|
||||
|
||||
stylesheet_all = '''<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
||||
|
||||
<xsl:output method="html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:template match="/">
|
||||
<html>
|
||||
<head>
|
||||
<xsl:if test="//*[local-name() = 'title']">
|
||||
<title>
|
||||
<xsl:value-of select="//*[local-name() = 'title'][1]"/>
|
||||
</title>
|
||||
</xsl:if>
|
||||
</head>
|
||||
<body>
|
||||
<xsl:apply-templates/>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="text()">
|
||||
<xsl:if test="string-length(normalize-space(.)) > 0">
|
||||
<p><xsl:value-of select="."/></p>
|
||||
<xsl:text>
|
||||
</xsl:text>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="*">
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
'''
|
||||
|
||||
class XMLExtractor:
|
||||
def __init__(self, em):
|
||||
self.em = em
|
||||
self.currentindex = 0
|
||||
|
||||
def extractone(self, params):
|
||||
if not params.has_key("filename:"):
|
||||
self.em.rclog("extractone: no mime or file name")
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
fn = params["filename:"]
|
||||
|
||||
try:
|
||||
data = open(fn, 'r').read()
|
||||
docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
|
||||
except Exception as err:
|
||||
self.em.rclog("%s: bad data: " % (fn, err))
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
return (True, docdata, "", rclexecm.RclExecM.eofnow)
|
||||
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.currentindex = 0
|
||||
return True
|
||||
|
||||
def getipath(self, params):
|
||||
return self.extractone(params)
|
||||
|
||||
def getnext(self, params):
|
||||
if self.currentindex >= 1:
|
||||
return (False, "", "", rclexecm.RclExecM.eofnow)
|
||||
else:
|
||||
ret= self.extractone(params)
|
||||
self.currentindex += 1
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
proto = rclexecm.RclExecM()
|
||||
extract = XMLExtractor(proto)
|
||||
rclexecm.main(proto, extract)
|
||||
59
src/internfile/mh_null.h
Normal file
59
src/internfile/mh_null.h
Normal file
@ -0,0 +1,59 @@
|
||||
/* Copyright (C) 2004 J.F.Dockes
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the
|
||||
* Free Software Foundation, Inc.,
|
||||
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
#ifndef _MH_NULL_H_INCLUDED_
|
||||
#define _MH_NULL_H_INCLUDED_
|
||||
|
||||
// It may make sense in some cases to set this null filter (no output)
|
||||
// instead of using recoll_noindex or leaving the default filter in
|
||||
// case one doesn't want to install it: this will avoid endless retries
|
||||
// to reindex the affected files, as recoll will think it has succeeded
|
||||
// indexing them. Downside: the files won't be indexed when one
|
||||
// actually installs the real filter, will need a -z
|
||||
// Actually used for empty files
|
||||
// Associated to application/x-zerosize, so use
|
||||
// <mimetype> = internal application/x-zerosize
|
||||
// in mimeconf
|
||||
#include <string>
|
||||
#include "cstr.h"
|
||||
#include "mimehandler.h"
|
||||
|
||||
class MimeHandlerNull : public RecollFilter {
|
||||
public:
|
||||
MimeHandlerNull(RclConfig *cnf, const std::string& id)
|
||||
: RecollFilter(cnf, id)
|
||||
{
|
||||
}
|
||||
virtual ~MimeHandlerNull()
|
||||
{
|
||||
}
|
||||
virtual bool set_document_file(const string& mt, const string& fn)
|
||||
{
|
||||
RecollFilter::set_document_file(mt, fn);
|
||||
return m_havedoc = true;
|
||||
}
|
||||
virtual bool next_document()
|
||||
{
|
||||
if (m_havedoc == false)
|
||||
return false;
|
||||
m_havedoc = false;
|
||||
m_metaData[cstr_dj_keycontent] = cstr_null;
|
||||
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* _MH_NULL_H_INCLUDED_ */
|
||||
@ -39,6 +39,7 @@ using namespace std;
|
||||
#include "mh_text.h"
|
||||
#include "mh_symlink.h"
|
||||
#include "mh_unknown.h"
|
||||
#include "mh_null.h"
|
||||
#include "ptmutex.h"
|
||||
|
||||
// Performance help: we use a pool of already known and created
|
||||
@ -162,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
|
||||
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
|
||||
MD5String("MimeHandlerSymlink", id);
|
||||
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
|
||||
} else if ("application/x-zerosize" == lmime) {
|
||||
LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str()));
|
||||
MD5String("MimeHandlerNull", id);
|
||||
return nobuild ? 0 : new MimeHandlerNull(config, id);
|
||||
} else if (lmime.find("text/") == 0) {
|
||||
// Try to handle unknown text/xx as text/plain. This
|
||||
// only happen if the text/xx was defined as "internal" in
|
||||
|
||||
@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu
|
||||
image/svg+xml = exec rclsvg
|
||||
image/x-xcf = execm rclimg
|
||||
inode/symlink = internal
|
||||
inode/x-empty = exec rclnull
|
||||
application/x-zerosize = internal
|
||||
inode/x-empty = internal application/x-zerosize
|
||||
message/rfc822 = internal
|
||||
text/calendar = execm rclics;mimetype=text/plain
|
||||
text/html = internal
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user