polyphonic/app/library/pdf_utils.py
2023-03-02 10:05:26 +11:00

66 lines
1.8 KiB
Python

import tempfile
import subprocess
import os.path
import string
SAFECHARS = string.ascii_letters + string.digits + " _-"
def extract_pages(source, bookmark, start=None, end=None, count=1):
return extract_and_concat([(source, bookmark, start, end, count)])
def extract_and_concat(items):
# create a temporary directory for our sections
d = tempfile.TemporaryDirectory(prefix="polyphonic_")
pdfmarks = os.path.join(d.name, 'pdfmarks.txt')
marks = open(pdfmarks, 'w')
sections = []
current_page = 1
for i, (source, bookmark, start, end, count) in enumerate(items):
if count == 0:
continue
if not start:
sections.append(source)
else:
if not end:
end = start
dest = os.path.join(d.name, f'section_{i}.pdf')
cmd = ['gs', '-sDEVICE=pdfwrite', '-dBATCH', '-dNOPAUSE',
f'-dFirstPage={start}', f'-dLastPage={end}',
f'-sOutputFile={dest}',
source]
bookmark = "".join(filter(lambda c: c in SAFECHARS, bookmark))
marks.write(f'[/Title ({bookmark}) /Page {current_page} /OUT pdfmark\n')
p = subprocess.run(cmd, check=True, capture_output=True)
pages = len([ x for x in p.stdout.splitlines() if x.decode('utf8').startswith('Page ')])
for j in range(count):
sections.append(dest)
current_page += pages
marks.close()
# concat the items
output = tempfile.NamedTemporaryFile(prefix="polyphonic_", suffix='.pdf')
cmd = ['gs', '-sDEVICE=pdfwrite', '-q', '-dBATCH', '-dNOPAUSE',
'-sOutputFile=-']
cmd.extend(sections)
cmd.append(pdfmarks)
subprocess.run(cmd, stdout=output)
output.seek(0)
return output