diff --git a/src/filters/rclnull b/src/filters/rclnull
deleted file mode 100755
index eba02f4a..00000000
--- a/src/filters/rclnull
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/sh
-# It may make sense in some cases to set this null filter (no output)
-# instead of using recoll_noindex or leaving the default filter in
-# case one doesn't want to install it: this will avoid endless retries
-# to reindex the affected files, as recoll will think it has succeeded
-# indexing them. Downside: the files won't be indexed when one
-# actually installs the real filter, will need a -z
-
-exit 0
diff --git a/src/filters/rclsvg.py b/src/filters/rclsvg.py
new file mode 100755
index 00000000..b005ff86
--- /dev/null
+++ b/src/filters/rclsvg.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+# Copyright (C) 2014 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+######################################
+
+import sys
+import rclexecm
+import rclxslt
+
+stylesheet_all = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ keywords
+
+
+
+
+
+
+
+
+
+ author
+
+
+
+
+
+
+
+
+
+ keywords
+
+
+
+
+
+
+
+
+
+ description
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+class SVGExtractor:
+ def __init__(self, em):
+ self.em = em
+ self.currentindex = 0
+
+ def extractone(self, params):
+ if not params.has_key("filename:"):
+ self.em.rclog("extractone: no mime or file name")
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+ fn = params["filename:"]
+
+ try:
+ data = open(fn, 'r').read()
+ docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
+ except Exception as err:
+ self.em.rclog("%s: bad data: " % (fn, err))
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+
+ return (True, docdata, "", rclexecm.RclExecM.eofnow)
+
+ ###### File type handler api, used by rclexecm ---------->
+ def openfile(self, params):
+ self.currentindex = 0
+ return True
+
+ def getipath(self, params):
+ return self.extractone(params)
+
+ def getnext(self, params):
+ if self.currentindex >= 1:
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+ else:
+ ret= self.extractone(params)
+ self.currentindex += 1
+ return ret
+
+if __name__ == '__main__':
+ proto = rclexecm.RclExecM()
+ extract = SVGExtractor(proto)
+ rclexecm.main(proto, extract)
diff --git a/src/filters/rclxml.py b/src/filters/rclxml.py
new file mode 100755
index 00000000..f3cecd1e
--- /dev/null
+++ b/src/filters/rclxml.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# Copyright (C) 2014 J.F.Dockes
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+######################################
+
+import sys
+import rclexecm
+import rclxslt
+
+stylesheet_all = '''
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+'''
+
+class XMLExtractor:
+ def __init__(self, em):
+ self.em = em
+ self.currentindex = 0
+
+ def extractone(self, params):
+ if not params.has_key("filename:"):
+ self.em.rclog("extractone: no mime or file name")
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+ fn = params["filename:"]
+
+ try:
+ data = open(fn, 'r').read()
+ docdata = rclxslt.apply_sheet_data(stylesheet_all, data)
+ except Exception as err:
+ self.em.rclog("%s: bad data: " % (fn, err))
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+
+ return (True, docdata, "", rclexecm.RclExecM.eofnow)
+
+ ###### File type handler api, used by rclexecm ---------->
+ def openfile(self, params):
+ self.currentindex = 0
+ return True
+
+ def getipath(self, params):
+ return self.extractone(params)
+
+ def getnext(self, params):
+ if self.currentindex >= 1:
+ return (False, "", "", rclexecm.RclExecM.eofnow)
+ else:
+ ret= self.extractone(params)
+ self.currentindex += 1
+ return ret
+
+if __name__ == '__main__':
+ proto = rclexecm.RclExecM()
+ extract = XMLExtractor(proto)
+ rclexecm.main(proto, extract)
diff --git a/src/internfile/mh_null.h b/src/internfile/mh_null.h
new file mode 100644
index 00000000..5554d57d
--- /dev/null
+++ b/src/internfile/mh_null.h
@@ -0,0 +1,59 @@
+/* Copyright (C) 2004 J.F.Dockes
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#ifndef _MH_NULL_H_INCLUDED_
+#define _MH_NULL_H_INCLUDED_
+
+// It may make sense in some cases to set this null filter (no output)
+// instead of using recoll_noindex or leaving the default filter in
+// case one doesn't want to install it: this will avoid endless retries
+// to reindex the affected files, as recoll will think it has succeeded
+// indexing them. Downside: the files won't be indexed when one
+// actually installs the real filter, will need a -z
+// Actually used for empty files
+// Associated to application/x-zerosize, so use
+// = internal application/x-zerosize
+// in mimeconf
+#include
+#include "cstr.h"
+#include "mimehandler.h"
+
+class MimeHandlerNull : public RecollFilter {
+ public:
+ MimeHandlerNull(RclConfig *cnf, const std::string& id)
+ : RecollFilter(cnf, id)
+ {
+ }
+ virtual ~MimeHandlerNull()
+ {
+ }
+ virtual bool set_document_file(const string& mt, const string& fn)
+ {
+ RecollFilter::set_document_file(mt, fn);
+ return m_havedoc = true;
+ }
+ virtual bool next_document()
+ {
+ if (m_havedoc == false)
+ return false;
+ m_havedoc = false;
+ m_metaData[cstr_dj_keycontent] = cstr_null;
+ m_metaData[cstr_dj_keymt] = cstr_textplain;
+ return true;
+ }
+};
+
+#endif /* _MH_NULL_H_INCLUDED_ */
diff --git a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp
index 05e33223..d7fcce33 100644
--- a/src/internfile/mimehandler.cpp
+++ b/src/internfile/mimehandler.cpp
@@ -39,6 +39,7 @@ using namespace std;
#include "mh_text.h"
#include "mh_symlink.h"
#include "mh_unknown.h"
+#include "mh_null.h"
#include "ptmutex.h"
// Performance help: we use a pool of already known and created
@@ -162,6 +163,10 @@ static RecollFilter *mhFactory(RclConfig *config, const string &mime,
LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
MD5String("MimeHandlerSymlink", id);
return nobuild ? 0 : new MimeHandlerSymlink(config, id);
+ } else if ("application/x-zerosize" == lmime) {
+ LOGDEB(("mhFactory(%s): ret MimeHandlerNull\n", mime.c_str()));
+ MD5String("MimeHandlerNull", id);
+ return nobuild ? 0 : new MimeHandlerNull(config, id);
} else if (lmime.find("text/") == 0) {
// Try to handle unknown text/xx as text/plain. This
// only happen if the text/xx was defined as "internal" in
diff --git a/src/sampleconf/mimeconf b/src/sampleconf/mimeconf
index 29695b72..8732d030 100644
--- a/src/sampleconf/mimeconf
+++ b/src/sampleconf/mimeconf
@@ -133,7 +133,8 @@ image/vnd.djvu = exec rcldjvu
image/svg+xml = exec rclsvg
image/x-xcf = execm rclimg
inode/symlink = internal
-inode/x-empty = exec rclnull
+application/x-zerosize = internal
+inode/x-empty = internal application/x-zerosize
message/rfc822 = internal
text/calendar = execm rclics;mimetype=text/plain
text/html = internal