Add orgmodesubdocs recoll.conf parameter to switch rclorgmode from using whole text or creating level-1 subdocs (default is subdocs)
This commit is contained in:
parent
561592b618
commit
f2b24cf22d
@ -1,19 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import print_function
|
||||
# Copyright (C) 2020-2022 J.F.Dockes
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the
|
||||
# Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
# Read an org-mode file, break it into "documents" along the separator lines
|
||||
# and interface with recoll execm
|
||||
'''Read an org-mode file, optionally break it into subdocs" along level 1 headings'''
|
||||
|
||||
import rclexecm
|
||||
import sys
|
||||
import re
|
||||
|
||||
import rclexecm
|
||||
import rclconfig
|
||||
import conftree
|
||||
|
||||
class OrgModeExtractor:
|
||||
def __init__(self, em):
|
||||
self.file = ""
|
||||
self.contents = []
|
||||
self.em = em
|
||||
self.selftext = ""
|
||||
self.docs = []
|
||||
config = rclconfig.RclConfig()
|
||||
self.createsubdocs = conftree.valToBool(config.getConfParam("orgmodesubdocs"))
|
||||
|
||||
def extractone(self, index):
|
||||
if index >= len(self.docs):
|
||||
@ -34,7 +52,6 @@ class OrgModeExtractor:
|
||||
###### File type handler api, used by rclexecm ---------->
|
||||
def openfile(self, params):
|
||||
self.file = params["filename"]
|
||||
|
||||
try:
|
||||
data = open(self.file, "rb").read()
|
||||
except Exception as e:
|
||||
@ -42,6 +59,9 @@ class OrgModeExtractor:
|
||||
return False
|
||||
|
||||
self.currentindex = -1
|
||||
if not self.createsubdocs:
|
||||
self.selftext = data
|
||||
return True
|
||||
|
||||
res = rb'''^\* '''
|
||||
self.docs = re.compile(res, flags=re.MULTILINE).split(data)
|
||||
@ -63,6 +83,8 @@ class OrgModeExtractor:
|
||||
return self.extractone(index)
|
||||
|
||||
def getnext(self, params):
|
||||
if not self.createsubdocs:
|
||||
return (True, self.selftext, "", rclexecm.RclExecM.eofnext)
|
||||
|
||||
if self.currentindex == -1:
|
||||
# Return "self" doc
|
||||
|
||||
@ -1061,8 +1061,15 @@ snippetMaxPosWalk = 1000000
|
||||
# </var>
|
||||
abbyyocrcmd = /opt/ABBYYOCR11/abbyyocr11
|
||||
|
||||
# <grouptitle id="SPECLOCATIONS">Parameters set for specific
|
||||
# locations</grouptitle>
|
||||
# <grouptitle id="MISCHANDLERPARAMS">Parameters for specific handlers</grouptitle>
|
||||
|
||||
# <var name="orgmodesubdocs" type="bool">
|
||||
# <brief>Index org-mode level 1 sections as separate sub-documents</brief>
|
||||
# <descr>This is the default. If set to false, org-mode files will be indexed as plain text</descr>
|
||||
# </var>
|
||||
orgmodesubdocs = 1
|
||||
|
||||
# <grouptitle id="SPECLOCATIONS">Parameters set for specific locations</grouptitle>
|
||||
|
||||
# You could specify different parameters for a subdirectory like this:
|
||||
#[~/hungariandocs/plain]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user