Index pdf annotations separately under field name annotation. Add annot, pdfannot and pa aliases.
This commit is contained in:
parent
f60851e935
commit
25eda37bc9
@ -443,13 +443,17 @@ class PDFExtractor:
|
|||||||
#self.em.rclog("Annotations: %s" % abypage)
|
#self.em.rclog("Annotations: %s" % abypage)
|
||||||
pagevec = html.split(b"\f")
|
pagevec = html.split(b"\f")
|
||||||
html = b""
|
html = b""
|
||||||
|
annotsfield = b""
|
||||||
pagenum = 1
|
pagenum = 1
|
||||||
for page in pagevec:
|
for page in pagevec:
|
||||||
html += page
|
html += page
|
||||||
if pagenum in abypage:
|
if pagenum in abypage:
|
||||||
html += abypage[pagenum].encode('utf-8')
|
html += abypage[pagenum].encode('utf-8')
|
||||||
|
annotsfield += abypage[pagenum].encode('utf-8') + b" - "
|
||||||
html += b"\f"
|
html += b"\f"
|
||||||
pagenum += 1
|
pagenum += 1
|
||||||
|
if annotsfield:
|
||||||
|
self.em.setfield("pdfannot", annotsfield)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def _selfdoc(self):
|
def _selfdoc(self):
|
||||||
|
|||||||
@ -71,7 +71,7 @@ rclUnsplitFN = XSFS
|
|||||||
xapyear = Y
|
xapyear = Y
|
||||||
recipient = XTO
|
recipient = XTO
|
||||||
rclbes = XB ; noterms = 1
|
rclbes = XB ; noterms = 1
|
||||||
|
annotation = XA
|
||||||
|
|
||||||
[values]
|
[values]
|
||||||
###########
|
###########
|
||||||
@ -106,6 +106,7 @@ keywords=
|
|||||||
rclaptg=
|
rclaptg=
|
||||||
rclbes=
|
rclbes=
|
||||||
recipient=
|
recipient=
|
||||||
|
annotation=
|
||||||
|
|
||||||
[aliases]
|
[aliases]
|
||||||
##########################
|
##########################
|
||||||
@ -132,6 +133,7 @@ keywords = keyword xesam:keyword tag tags dc:subject xesam:subject \
|
|||||||
mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
|
mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
|
||||||
recipient = to xesam:recipient
|
recipient = to xesam:recipient
|
||||||
url = dc:identifier xesam:url
|
url = dc:identifier xesam:url
|
||||||
|
annotation = pdfannot
|
||||||
|
|
||||||
##################
|
##################
|
||||||
# The queryaliases section defines aliases which are used exclusively at
|
# The queryaliases section defines aliases which are used exclusively at
|
||||||
@ -140,6 +142,7 @@ url = dc:identifier xesam:url
|
|||||||
[queryaliases]
|
[queryaliases]
|
||||||
filename = fn
|
filename = fn
|
||||||
containerfilename = cfn
|
containerfilename = cfn
|
||||||
|
annotation = annot pa
|
||||||
|
|
||||||
[xattrtofields]
|
[xattrtofields]
|
||||||
######################
|
######################
|
||||||
|
|||||||
22
tests/pdf-annots/pdf-annots.sh
Executable file
22
tests/pdf-annots/pdf-annots.sh
Executable file
@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
topdir=`dirname $0`/..
|
||||||
|
. $topdir/shared.sh
|
||||||
|
|
||||||
|
initvariables $0
|
||||||
|
|
||||||
|
(
|
||||||
|
recollq '"new test JF annotation using Adobe Acrobat X"'
|
||||||
|
|
||||||
|
# This supposes that the fields file is customized, which is not
|
||||||
|
# the case by default
|
||||||
|
echo
|
||||||
|
echo "Extracting the value for an annotation field:"
|
||||||
|
recollq -F annotation pdfannot:'"DAVID: Test of a highlight"' | \
|
||||||
|
tail -1 | base64 -d
|
||||||
|
|
||||||
|
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||||
|
|
||||||
|
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
|
||||||
|
|
||||||
|
checkresult
|
||||||
5
tests/pdf-annots/pdf-annots.txt
Normal file
5
tests/pdf-annots/pdf-annots.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
1 results
|
||||||
|
application/pdf [file:///home/dockes/projets/fulltext/testrecoll/pdf-annots/Présidentielle en Biélorussie .pdf] [Présidentielle en Biélorussie : la candidate de l’opposition, Svetlana Tikhanovskaïa, s’est réfugiée en Lituanie] 325430 bytes
|
||||||
|
|
||||||
|
Extracting the value for an annotation field:
|
||||||
|
P.: 3, D:20200904094331+00'240', highlight : None P.: 3, D:20200904100158+00'240', highlight : DAVID: Test of a highlight that has a note attached to
|
||||||
Loading…
x
Reference in New Issue
Block a user