polyphonic/app/library/pdf_utils.py
2026-05-12 11:03:11 +10:00

77 lines
2.0 KiB
Python

import tempfile
import subprocess
import os.path
import string
SAFECHARS = string.ascii_letters + string.digits + " _-"
def extract_pages(source, bookmark, start=None, end=None, count=1):
return extract_and_concat([(source, bookmark, start, end, count)])
def extract_and_concat(items):
# create a temporary directory for our sections
d = tempfile.TemporaryDirectory(prefix="polyphonic_")
pdfmarks = os.path.join(d.name, "pdfmarks.txt")
marks = open(pdfmarks, "w")
sections = []
current_page = 1
for i, (source, bookmark, start, end, count) in enumerate(items):
if count == 0:
continue
if not start:
sections.append(source)
else:
if not end:
end = start
dest = os.path.join(d.name, f"section_{i}.pdf")
cmd = [
"gs",
"-sDEVICE=pdfwrite",
"-dBATCH",
"-dNOPAUSE",
f"-dFirstPage={start}",
f"-dLastPage={end}",
f"-sOutputFile={dest}",
source,
]
bookmark = "".join(filter(lambda c: c in SAFECHARS, bookmark))
marks.write(f"[/Title ({bookmark}) /Page {current_page} /OUT pdfmark\n")
p = subprocess.run(cmd, check=True, capture_output=True)
pages = len(
[
x
for x in p.stdout.splitlines()
if x.decode("utf8").startswith("Page ")
]
)
for j in range(count):
sections.append(dest)
current_page += pages
marks.close()
# concat the items
output = tempfile.NamedTemporaryFile(prefix="polyphonic_", suffix=".pdf")
cmd = ["gs", "-sDEVICE=pdfwrite", "-q", "-dBATCH", "-dNOPAUSE", "-sOutputFile=-"]
cmd.extend(sections)
cmd.append(pdfmarks)
subprocess.run(cmd, stdout=output)
output.seek(0)
return output