import tempfile import subprocess import os.path import string SAFECHARS = string.ascii_letters + string.digits + " _-" def extract_pages(source, bookmark, start=None, end=None, count=1): return extract_and_concat([(source, bookmark, start, end, count)]) def extract_and_concat(items): # create a temporary directory for our sections d = tempfile.TemporaryDirectory(prefix="polyphonic_") pdfmarks = os.path.join(d.name, 'pdfmarks.txt') marks = open(pdfmarks, 'w') sections = [] current_page = 1 for i, (source, bookmark, start, end, count) in enumerate(items): if count == 0: continue if not start: sections.append(source) else: if not end: end = start dest = os.path.join(d.name, f'section_{i}.pdf') cmd = ['gs', '-sDEVICE=pdfwrite', '-dBATCH', '-dNOPAUSE', f'-dFirstPage={start}', f'-dLastPage={end}', f'-sOutputFile={dest}', source] bookmark = "".join(filter(lambda c: c in SAFECHARS, bookmark)) marks.write(f'[/Title ({bookmark}) /Page {current_page} /OUT pdfmark\n') p = subprocess.run(cmd, check=True, capture_output=True) pages = len([ x for x in p.stdout.splitlines() if x.decode('utf8').startswith('Page ')]) for j in range(count): sections.append(dest) current_page += pages marks.close() # concat the items output = tempfile.NamedTemporaryFile(prefix="polyphonic_", suffix='.pdf') cmd = ['gs', '-sDEVICE=pdfwrite', '-q', '-dBATCH', '-dNOPAUSE', '-sOutputFile=-'] cmd.extend(sections) cmd.append(pdfmarks) subprocess.run(cmd, stdout=output) output.seek(0) return output