diff --git a/src/filters/rclnull b/src/filters/rclnull deleted file mode 100755 index eba02f4a..00000000 --- a/src/filters/rclnull +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -# It may make sense in some cases to set this null filter (no output) -# instead of using recoll_noindex or leaving the default filter in -# case one doesn't want to install it: this will avoid endless retries -# to reindex the affected files, as recoll will think it has succeeded -# indexing them. Downside: the files won't be indexed when one -# actually installs the real filter, will need a -z - -exit 0 diff --git a/src/filters/rclsvg.py b/src/filters/rclsvg.py new file mode 100755 index 00000000..b005ff86 --- /dev/null +++ b/src/filters/rclsvg.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# Copyright (C) 2014 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### + +import sys +import rclexecm +import rclxslt + +stylesheet_all = ''' + + + + + + + + + + + + + + + + + + + + + + keywords + + + + + + + + + + author + + + + + + + + + + keywords + + + + + + + + + + description + + + + + + + + + <xsl:value-of select="."/> + + + + +

+ +
+ +
+''' + +class SVGExtractor: + def __init__(self, em): + self.em = em + self.currentindex = 0 + + def extractone(self, params): + if not params.has_key("filename:"): + self.em.rclog("extractone: no mime or file name") + return (False, "", "", rclexecm.RclExecM.eofnow) + fn = params["filename:"] + + try: + data = open(fn, 'r').read() + docdata = rclxslt.apply_sheet_data(stylesheet_all, data) + except Exception as err: + self.em.rclog("%s: bad data: " % (fn, err)) + return (False, "", "", rclexecm.RclExecM.eofnow) + + return (True, docdata, "", rclexecm.RclExecM.eofnow) + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + self.currentindex = 0 + return True + + def getipath(self, params): + return self.extractone(params) + + def getnext(self, params): + if self.currentindex >= 1: + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(params) + self.currentindex += 1 + return ret + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + extract = SVGExtractor(proto) + rclexecm.main(proto, extract) diff --git a/src/filters/rclxml.py b/src/filters/rclxml.py new file mode 100755 index 00000000..f3cecd1e --- /dev/null +++ b/src/filters/rclxml.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# Copyright (C) 2014 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +###################################### + +import sys +import rclexecm +import rclxslt + +stylesheet_all = ''' + + + + + + + + + + <xsl:value-of select="//*[local-name() = 'title'][1]"/> + + + + + + + + + + + +

+ + +
+
+ + + + + +
+''' + +class XMLExtractor: + def __init__(self, em): + self.em = em + self.currentindex = 0 + + def extractone(self, params): + if not params.has_key("filename:"): + self.em.rclog("extractone: no mime or file name") + return (False, "", "", rclexecm.RclExecM.eofnow) + fn = params["filename:"] + + try: + data = open(fn, 'r').read() + docdata = rclxslt.apply_sheet_data(stylesheet_all, data) + except Exception as err: + self.em.rclog("%s: bad data: " % (fn, err)) + return (False, "", "", rclexecm.RclExecM.eofnow) + + return (True, docdata, "", rclexecm.RclExecM.eofnow) + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + self.currentindex = 0 + return True + + def getipath(self, params): + return self.extractone(params) + + def getnext(self, params): + if self.currentindex >= 1: + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(params) + self.currentindex += 1 + return ret + +if __name__ == '__main__': + proto = rclexecm.RclExecM() + extract = XMLExtractor(proto) + rclexecm.main(proto, extract) diff --git a/src/internfile/mh_null.h b/src/internfile/mh_null.h new file mode 100644 index 00000000..5554d57d --- /dev/null +++ b/src/internfile/mh_null.h @@ -0,0 +1,59 @@ +/* Copyright (C) 2004 J.F.Dockes + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#ifndef _MH_NULL_H_INCLUDED_ +#define _MH_NULL_H_INCLUDED_ + +// It may make sense in some cases to set this null filter (no output) +// instead of using recoll_noindex or leaving the default filter in +// case one doesn't want to install it: this will avoid endless retries +// to reindex the affected files, as recoll will think it has succeeded +// indexing them. Downside: the files won't be indexed when one +// actually installs the real filter, will need a -z +// Actually used for empty files +// Associated to application/x-zerosize, so use +// = internal application/x-zerosize +// in mimeconf +#include +#include "cstr.h" +#include "mimehandler.h" + +class MimeHandlerNull : public RecollFilter { + public: + MimeHandlerNull(RclConfig *cnf, const std::string& id) + : RecollFilter(cnf, id) + { + } + virtual ~MimeHandlerNull() + { + } + virtual bool set_document_file(const string& mt, const string& fn) + { + RecollFilter::set_document_file(mt, fn); + return m_havedoc = true; + } + virtual bool next_document() + { + if (m_havedoc == false) + return false; + m_havedoc = false; + m_metaData[cstr_dj_keycontent] = cstr_null; + m_metaData[cstr_dj_keymt] = cstr_textplain; + return true; + } +}; + +#endif /* _MH_NULL_H_INCLUDED_ */ diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp index 05e33223..d7fcce33 100644 --- a/src/internfile/mimehandler.cpp +++ b/src/internfile/mimehandler.cpp @@ -39,6 +39,7 @@ using namespace std; #include "mh_text.h" #include "mh_symlink.h" #include "mh_unknown.h" +#include "mh_null.h" #include "ptmutex.h" // Performance help: we use a pool of already known and created @@ -162,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime, LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str())); MD5String("MimeHandlerSymlink", id); return nobuild ? 0 : new MimeHandlerSymlink(config, id); + } else if ("application/x-zerosize" == lmime) { + LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str())); + MD5String("MimeHandlerNull", id); + return nobuild ? 0 : new MimeHandlerNull(config, id); } else if (lmime.find("text/") == 0) { // Try to handle unknown text/xx as text/plain. This // only happen if the text/xx was defined as "internal" in diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf index 29695b72..8732d030 100644 --- a/src/sampleconf/mimeconf +++ b/src/sampleconf/mimeconf @@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu image/svg+xml = exec rclsvg image/x-xcf = execm rclimg inode/symlink = internal -inode/x-empty = exec rclnull +application/x-zerosize = internal +inode/x-empty = internal application/x-zerosize message/rfc822 = internal text/calendar = execm rclics;mimetype=text/plain text/html = internal