Add orgmodesubdocs recoll.conf parameter to switch rclorgmode from using whole text or creating level-1 subdocs (default is subdocs)
This commit is contained in:
parent
561592b618
commit
f2b24cf22d
@ -1,19 +1,37 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from __future__ import print_function
|
# Copyright (C) 2020-2022 J.F.Dockes
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the
|
||||||
|
# Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
# Read an org-mode file, break it into "documents" along the separator lines
|
'''Read an org-mode file, optionally break it into subdocs" along level 1 headings'''
|
||||||
# and interface with recoll execm
|
|
||||||
|
|
||||||
import rclexecm
|
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
import rclexecm
|
||||||
|
import rclconfig
|
||||||
|
import conftree
|
||||||
|
|
||||||
class OrgModeExtractor:
|
class OrgModeExtractor:
|
||||||
def __init__(self, em):
|
def __init__(self, em):
|
||||||
self.file = ""
|
self.file = ""
|
||||||
self.contents = []
|
|
||||||
self.em = em
|
self.em = em
|
||||||
self.selftext = ""
|
self.selftext = ""
|
||||||
|
self.docs = []
|
||||||
|
config = rclconfig.RclConfig()
|
||||||
|
self.createsubdocs = conftree.valToBool(config.getConfParam("orgmodesubdocs"))
|
||||||
|
|
||||||
def extractone(self, index):
|
def extractone(self, index):
|
||||||
if index >= len(self.docs):
|
if index >= len(self.docs):
|
||||||
@ -34,7 +52,6 @@ class OrgModeExtractor:
|
|||||||
###### File type handler api, used by rclexecm ---------->
|
###### File type handler api, used by rclexecm ---------->
|
||||||
def openfile(self, params):
|
def openfile(self, params):
|
||||||
self.file = params["filename"]
|
self.file = params["filename"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = open(self.file, "rb").read()
|
data = open(self.file, "rb").read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -42,6 +59,9 @@ class OrgModeExtractor:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
self.currentindex = -1
|
self.currentindex = -1
|
||||||
|
if not self.createsubdocs:
|
||||||
|
self.selftext = data
|
||||||
|
return True
|
||||||
|
|
||||||
res = rb'''^\* '''
|
res = rb'''^\* '''
|
||||||
self.docs = re.compile(res, flags=re.MULTILINE).split(data)
|
self.docs = re.compile(res, flags=re.MULTILINE).split(data)
|
||||||
@ -63,6 +83,8 @@ class OrgModeExtractor:
|
|||||||
return self.extractone(index)
|
return self.extractone(index)
|
||||||
|
|
||||||
def getnext(self, params):
|
def getnext(self, params):
|
||||||
|
if not self.createsubdocs:
|
||||||
|
return (True, self.selftext, "", rclexecm.RclExecM.eofnext)
|
||||||
|
|
||||||
if self.currentindex == -1:
|
if self.currentindex == -1:
|
||||||
# Return "self" doc
|
# Return "self" doc
|
||||||
|
|||||||
@ -1061,8 +1061,15 @@ snippetMaxPosWalk = 1000000
|
|||||||
# </var>
|
# </var>
|
||||||
abbyyocrcmd = /opt/ABBYYOCR11/abbyyocr11
|
abbyyocrcmd = /opt/ABBYYOCR11/abbyyocr11
|
||||||
|
|
||||||
# <grouptitle id="SPECLOCATIONS">Parameters set for specific
|
# <grouptitle id="MISCHANDLERPARAMS">Parameters for specific handlers</grouptitle>
|
||||||
# locations</grouptitle>
|
|
||||||
|
# <var name="orgmodesubdocs" type="bool">
|
||||||
|
# <brief>Index org-mode level 1 sections as separate sub-documents</brief>
|
||||||
|
# <descr>This is the default. If set to false, org-mode files will be indexed as plain text</descr>
|
||||||
|
# </var>
|
||||||
|
orgmodesubdocs = 1
|
||||||
|
|
||||||
|
# <grouptitle id="SPECLOCATIONS">Parameters set for specific locations</grouptitle>
|
||||||
|
|
||||||
# You could specify different parameters for a subdirectory like this:
|
# You could specify different parameters for a subdirectory like this:
|
||||||
#[~/hungariandocs/plain]
|
#[~/hungariandocs/plain]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user