diff --git a/src/filters/rclorgmode.py b/src/filters/rclorgmode.py index b009d8e8..84254cf3 100755 --- a/src/filters/rclorgmode.py +++ b/src/filters/rclorgmode.py @@ -1,19 +1,37 @@ #!/usr/bin/env python3 -from __future__ import print_function +# Copyright (C) 2020-2022 J.F.Dockes +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the +# Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# Read an org-mode file, break it into "documents" along the separator lines -# and interface with recoll execm +'''Read an org-mode file, optionally break it into subdocs" along level 1 headings''' -import rclexecm import sys import re +import rclexecm +import rclconfig +import conftree + class OrgModeExtractor: def __init__(self, em): self.file = "" - self.contents = [] self.em = em self.selftext = "" + self.docs = [] + config = rclconfig.RclConfig() + self.createsubdocs = conftree.valToBool(config.getConfParam("orgmodesubdocs")) def extractone(self, index): if index >= len(self.docs): @@ -34,7 +52,6 @@ class OrgModeExtractor: ###### File type handler api, used by rclexecm ----------> def openfile(self, params): self.file = params["filename"] - try: data = open(self.file, "rb").read() except Exception as e: @@ -42,6 +59,9 @@ class OrgModeExtractor: return False self.currentindex = -1 + if not self.createsubdocs: + self.selftext = data + return True res = rb'''^\* ''' self.docs = re.compile(res, flags=re.MULTILINE).split(data) @@ -63,6 +83,8 @@ class OrgModeExtractor: return self.extractone(index) def getnext(self, params): + if not self.createsubdocs: + return (True, self.selftext, "", rclexecm.RclExecM.eofnext) if self.currentindex == -1: # Return "self" doc diff --git a/src/sampleconf/recoll.conf b/src/sampleconf/recoll.conf index 96b4c245..c094ce31 100644 --- a/src/sampleconf/recoll.conf +++ b/src/sampleconf/recoll.conf @@ -1061,8 +1061,15 @@ snippetMaxPosWalk = 1000000 # abbyyocrcmd = /opt/ABBYYOCR11/abbyyocr11 -# Parameters set for specific -# locations +# Parameters for specific handlers + +# +# Index org-mode level 1 sections as separate sub-documents +# This is the default. If set to false, org-mode files will be indexed as plain text +# +orgmodesubdocs = 1 + +# Parameters set for specific locations # You could specify different parameters for a subdirectory like this: #[~/hungariandocs/plain]