pdf xmp: pdfextrametafix: add method which takes the xml elt as arg instead of the text content
This commit is contained in:
parent
3b82aa7927
commit
b43d1b3287
@ -439,14 +439,35 @@ class PDFExtractor:
|
||||
for metanm,rclnm in self.extrameta:
|
||||
for rdfdesc in rdfdesclist:
|
||||
try:
|
||||
elt = rdfdesc.find(metanm, rdfdesc.nsmap)
|
||||
elts = rdfdesc.findall(metanm, rdfdesc.nsmap)
|
||||
except:
|
||||
# We get an exception when this rdf:Description does not
|
||||
# define the required namespace.
|
||||
continue
|
||||
text = None
|
||||
if elt is not None:
|
||||
text = self._xmltreetext(elt)
|
||||
|
||||
if elts:
|
||||
for elt in elts:
|
||||
text = None
|
||||
try:
|
||||
# First try to get text from a custom element handler
|
||||
text = emf.metafixelt(metanm, elt)
|
||||
except:
|
||||
pass
|
||||
|
||||
if text is None:
|
||||
# still nothing here, read the element text
|
||||
text = self._xmltreetext(elt)
|
||||
try:
|
||||
# try to run metafix
|
||||
text = emf.metafix(metanm, text)
|
||||
except:
|
||||
pass
|
||||
|
||||
if text:
|
||||
# Can't use setfield as it only works for
|
||||
# text/plain output at the moment.
|
||||
#self.em.rclog("Appending: (%s,%s)"%(rclnm,text))
|
||||
metaheaders.append((rclnm, text))
|
||||
else:
|
||||
# Some docs define the values as attributes. don't
|
||||
# know if this is valid but anyway...
|
||||
@ -456,17 +477,13 @@ class PDFExtractor:
|
||||
except:
|
||||
fullnm = metanm
|
||||
text = rdfdesc.get(fullnm)
|
||||
# Should we set empty values ?
|
||||
if text:
|
||||
if emf:
|
||||
if text:
|
||||
try:
|
||||
# try to run metafix
|
||||
text = emf.metafix(metanm, text)
|
||||
except:
|
||||
pass
|
||||
# Can't use setfield as it only works for
|
||||
# text/plain output at the moment.
|
||||
#self.em.rclog("Appending: (%s,%s)"%(rclnm,text))
|
||||
metaheaders.append((rclnm, text))
|
||||
metaheaders.append((rclnm, text))
|
||||
if metaheaders:
|
||||
if emf:
|
||||
try:
|
||||
|
||||
@ -1,12 +1,17 @@
|
||||
import sys
|
||||
import re
|
||||
import lxml.etree as ET
|
||||
|
||||
class MetaFixer(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
# def metafixelt(self, nm, xml):
|
||||
# print("MetaFixer: metafixelt: %s" % ET.tostring(xml), file=sys.stderr)
|
||||
# return "hello"
|
||||
|
||||
def metafix(self, nm, txt):
|
||||
#print("Metafixer: mfix: nm [%s] txt [%s]" % (nm, txt), file=sys.stderr)
|
||||
print("Metafixer: mfix: nm [%s] txt [%s]" % (nm, txt), file=sys.stderr)
|
||||
if nm == 'pdf:Producer':
|
||||
txt += " metafixerunique"
|
||||
elif nm == 'someothername':
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user