add support for ipython/jupyter notebooks

This commit is contained in:
Jean-Francois Dockes 2021-10-10 08:11:59 +02:00
parent c182f13c96
commit 7b81c16ea0
6 changed files with 70 additions and 2 deletions

View File

@ -706,6 +706,7 @@ filters/rclics \
filters/rclimg \
filters/rclimg.py \
filters/rclinfo \
filters/rclipynb.py \
filters/rclkar \
filters/rclkwd \
filters/rcllatinclass.py \

57
src/filters/rclipynb.py Executable file
View File

@ -0,0 +1,57 @@
#!/usr/bin/env python3
# Copyright (C) 2021 J.F.Dockes
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
# Recoll handler for iPython / Jupyter notebook files.
from __future__ import print_function
import os
import sys
import json
import rclexecm
from rclbasehandler import RclBaseHandler
class IPYNBextractor(RclBaseHandler):
def __init__(self, em):
super(IPYNBextractor, self).__init__(em)
def html_text(self, fn):
text = open(fn, 'rb').read()
data = json.loads(text)
mdtext = ""
for cell in data["cells"]:
if cell["cell_type"] == "markdown":
mdtext += "\n"
for line in cell["source"]:
mdtext += "# " + line
mdtext += "\n"
elif cell["cell_type"] == "code":
mdtext += "\n\n"
for line in cell["source"]:
mdtext += line
mdtext += "\n"
print("%s"%mdtext, file=sys.stderr)
self.outputmimetype = 'text/plain'
return mdtext
# Main program: create protocol handler and extractor and run them
proto = rclexecm.RclExecM()
extract = IPYNBextractor(proto)
rclexecm.main(proto, extract)

View File

@ -151,6 +151,7 @@ application/vnd.sun.xml.writer.template = \
#application/x-tar = execm rcltar
application/epub+zip = execm rclepub
application/x-ipynb+json = exec jupyter nbconvert --to script --stdout ; mimetype = text/plain
application/javascript = internal text/plain
application/ogg = execm rclaudio
application/pdf = execm rclpdf.py
@ -256,6 +257,7 @@ application/ogg = sownd
application/pdf = pdf
application/postscript = postscript
application/sql = source
application/x-ipynb+json = document
application/vnd.ms-excel = spreadsheet
application/vnd.ms-office = document
application/vnd.ms-outlook = document
@ -407,13 +409,14 @@ text = \
application/x-dvi \
application/x-gnote \
application/x-gnuinfo \
application/x-ipynb+json \
application/x-kword \
application/x-lyx \
application/x-mobipocket-ebook \
application/x-okular-notes \
application/x-perl \
application/x-php \
application/x-ruby \
application/x-ruby \
application/x-scribus \
application/x-shellscript \
application/x-tex \

View File

@ -50,6 +50,8 @@
.sql = application/sql
.tcl = text/x-tcl
.ipynb = application/x-ipynb+json
.xml = text/xml
.note = application/x-gnote

View File

@ -24,7 +24,7 @@
xallexcepts = application/pdf application/postscript application/x-dvi \
text/html|gnuinfo text/html|chm text/html|epub text/html|rclman \
application/x-fsdirectory|parentopen inode/directory|parentopen \
application/vnd.ms-visio.drawing
application/vnd.ms-visio.drawing application/x-ipynb+json
[view]
@ -40,6 +40,8 @@ application/epub+zip = ebook-viewer %f
# html documents. This is almost always what we want.
text/html|epub = ebook-viewer %F;ignoreipath=1
application/x-ipynb+json = jupyter notebook %f
application/x-gnote = gnote %f
application/x-mobipocket-ebook = ebook-viewer %f

View File

@ -135,6 +135,7 @@ application/vnd.sun.xml.writer.template = \
#application/x-tar = execm python rcltar
application/epub+zip = execm python rclepub
application/x-ipynb+json = execm python rclipynb.py
application/javascript = internal text/plain
application/ogg = execm python rclaudio
application/pdf = execm python rclpdf.py
@ -241,6 +242,7 @@ application/x-fsdirectory = folder
application/x-gnote = document
#application/x-gnuinfo = book
application/x-gnumeric = spreadsheet
application/x-ipynb+json = document
application/x-kword = wordprocessing
application/x-lyx = wordprocessing
application/x-mimehtml = message
@ -331,6 +333,7 @@ text = \
application/x-dvi \
application/x-gnote \
application/x-gnuinfo \
application/x-ipynb+json \
application/x-kword \
application/x-lyx \
application/x-mobipocket-ebook \