Index pdf annotations separately under field name annotation. Add annot, pdfannot and pa aliases.

This commit is contained in:
Jean-Francois Dockes 2020-10-12 10:05:38 +02:00
parent f60851e935
commit 25eda37bc9
4 changed files with 35 additions and 1 deletions

View File

@ -443,13 +443,17 @@ class PDFExtractor:
#self.em.rclog("Annotations: %s" % abypage)
pagevec = html.split(b"\f")
html = b""
annotsfield = b""
pagenum = 1
for page in pagevec:
html += page
if pagenum in abypage:
html += abypage[pagenum].encode('utf-8')
annotsfield += abypage[pagenum].encode('utf-8') + b" - "
html += b"\f"
pagenum += 1
if annotsfield:
self.em.setfield("pdfannot", annotsfield)
return html
def _selfdoc(self):

View File

@ -71,7 +71,7 @@ rclUnsplitFN = XSFS
xapyear = Y
recipient = XTO
rclbes = XB ; noterms = 1
annotation = XA
[values]
###########
@ -106,6 +106,7 @@ keywords=
rclaptg=
rclbes=
recipient=
annotation=
[aliases]
##########################
@ -132,6 +133,7 @@ keywords = keyword xesam:keyword tag tags dc:subject xesam:subject \
mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
recipient = to xesam:recipient
url = dc:identifier xesam:url
annotation = pdfannot
##################
# The queryaliases section defines aliases which are used exclusively at
@ -140,6 +142,7 @@ url = dc:identifier xesam:url
[queryaliases]
filename = fn
containerfilename = cfn
annotation = annot pa
[xattrtofields]
######################

22
tests/pdf-annots/pdf-annots.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/sh
topdir=`dirname $0`/..
. $topdir/shared.sh
initvariables $0
(
recollq '"new test JF annotation using Adobe Acrobat X"'
# This supposes that the fields file is customized, which is not
# the case by default
echo
echo "Extracting the value for an annotation field:"
recollq -F annotation pdfannot:'"DAVID: Test of a highlight"' | \
tail -1 | base64 -d
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
checkresult

View File

@ -0,0 +1,5 @@
1 results
application/pdf [file:///home/dockes/projets/fulltext/testrecoll/pdf-annots/Présidentielle en Biélorussie .pdf] [Présidentielle en Biélorussie : la candidate de lopposition, Svetlana Tikhanovskaïa, sest réfugiée en Lituanie] 325430 bytes
Extracting the value for an annotation field:
P.: 3, D:20200904094331+00'240', highlight : None P.: 3, D:20200904100158+00'240', highlight : DAVID: Test of a highlight that has a note attached to