From d115bcfaa272eb5a6ef69a616f865ce201864db1 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dockes Date: Sat, 21 Nov 2015 12:46:58 +0100 Subject: [PATCH] rclmpdf.py: p2/3 compat --- src/filters/rclmpdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/filters/rclmpdf.py b/src/filters/rclmpdf.py index 4d7e9634..100c7622 100755 --- a/src/filters/rclmpdf.py +++ b/src/filters/rclmpdf.py @@ -135,9 +135,9 @@ class PDFExtractor: b'content="text/html; charset=UTF-8">\n' didcs = True - m = re.search(rb'(.*)(.*)(<\/title>.*)', line) + m = re.search(b'''(.*<title>)(.*)(<\/title>.*)''', line) if not m: - m = re.search(rb'(.*content=")(.*)(".*/>.*)', line) + m = re.search(b'''(.*content=")(.*)(".*/>.*)''', line) if m: line = m.group(1) + self.em.htmlescape(m.group(2)) + \ m.group(3)