import tempfile import subprocess import os.path import string SAFECHARS = string.ascii_letters + string.digits + " _-" def extract_pages(source, bookmark, start=None, end=None, count=1): return extract_and_concat([(source, bookmark, start, end, count)]) def extract_and_concat(items): # create a temporary directory for our sections d = tempfile.TemporaryDirectory(prefix="polyphonic_") pdfmarks = os.path.join(d.name, "pdfmarks.txt") marks = open(pdfmarks, "w") sections = [] current_page = 1 for i, (source, bookmark, start, end, count) in enumerate(items): if count == 0: continue if not start: sections.append(source) else: if not end: end = start dest = os.path.join(d.name, f"section_{i}.pdf") cmd = [ "gs", "-sDEVICE=pdfwrite", "-dBATCH", "-dNOPAUSE", f"-dFirstPage={start}", f"-dLastPage={end}", f"-sOutputFile={dest}", source, ] bookmark = "".join(filter(lambda c: c in SAFECHARS, bookmark)) marks.write(f"[/Title ({bookmark}) /Page {current_page} /OUT pdfmark\n") p = subprocess.run(cmd, check=True, capture_output=True) pages = len( [ x for x in p.stdout.splitlines() if x.decode("utf8").startswith("Page ") ] ) for j in range(count): sections.append(dest) current_page += pages marks.close() # concat the items output = tempfile.NamedTemporaryFile(prefix="polyphonic_", suffix=".pdf") cmd = ["gs", "-sDEVICE=pdfwrite", "-q", "-dBATCH", "-dNOPAUSE", "-sOutputFile=-"] cmd.extend(sections) cmd.append(pdfmarks) subprocess.run(cmd, stdout=output) output.seek(0) return output