Index pdf annotations separately under field name annotation. Add annot, pdfannot and pa aliases.
This commit is contained in:
parent
f60851e935
commit
25eda37bc9
@ -443,13 +443,17 @@ class PDFExtractor:
|
||||
#self.em.rclog("Annotations: %s" % abypage)
|
||||
pagevec = html.split(b"\f")
|
||||
html = b""
|
||||
annotsfield = b""
|
||||
pagenum = 1
|
||||
for page in pagevec:
|
||||
html += page
|
||||
if pagenum in abypage:
|
||||
html += abypage[pagenum].encode('utf-8')
|
||||
annotsfield += abypage[pagenum].encode('utf-8') + b" - "
|
||||
html += b"\f"
|
||||
pagenum += 1
|
||||
if annotsfield:
|
||||
self.em.setfield("pdfannot", annotsfield)
|
||||
return html
|
||||
|
||||
def _selfdoc(self):
|
||||
|
||||
@ -71,7 +71,7 @@ rclUnsplitFN = XSFS
|
||||
xapyear = Y
|
||||
recipient = XTO
|
||||
rclbes = XB ; noterms = 1
|
||||
|
||||
annotation = XA
|
||||
|
||||
[values]
|
||||
###########
|
||||
@ -106,6 +106,7 @@ keywords=
|
||||
rclaptg=
|
||||
rclbes=
|
||||
recipient=
|
||||
annotation=
|
||||
|
||||
[aliases]
|
||||
##########################
|
||||
@ -132,6 +133,7 @@ keywords = keyword xesam:keyword tag tags dc:subject xesam:subject \
|
||||
mtype = mime mimetype xesam:mimetype contenttype xesam:contenttype dc:format
|
||||
recipient = to xesam:recipient
|
||||
url = dc:identifier xesam:url
|
||||
annotation = pdfannot
|
||||
|
||||
##################
|
||||
# The queryaliases section defines aliases which are used exclusively at
|
||||
@ -140,6 +142,7 @@ url = dc:identifier xesam:url
|
||||
[queryaliases]
|
||||
filename = fn
|
||||
containerfilename = cfn
|
||||
annotation = annot pa
|
||||
|
||||
[xattrtofields]
|
||||
######################
|
||||
|
||||
22
tests/pdf-annots/pdf-annots.sh
Executable file
22
tests/pdf-annots/pdf-annots.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/sh
|
||||
|
||||
topdir=`dirname $0`/..
|
||||
. $topdir/shared.sh
|
||||
|
||||
initvariables $0
|
||||
|
||||
(
|
||||
recollq '"new test JF annotation using Adobe Acrobat X"'
|
||||
|
||||
# This supposes that the fields file is customized, which is not
|
||||
# the case by default
|
||||
echo
|
||||
echo "Extracting the value for an annotation field:"
|
||||
recollq -F annotation pdfannot:'"DAVID: Test of a highlight"' | \
|
||||
tail -1 | base64 -d
|
||||
|
||||
) 2> $mystderr | egrep -v '^Recoll query: ' > $mystdout
|
||||
|
||||
diff -w ${myname}.txt $mystdout > $mydiffs 2>&1
|
||||
|
||||
checkresult
|
||||
5
tests/pdf-annots/pdf-annots.txt
Normal file
5
tests/pdf-annots/pdf-annots.txt
Normal file
@ -0,0 +1,5 @@
|
||||
1 results
|
||||
application/pdf [file:///home/dockes/projets/fulltext/testrecoll/pdf-annots/Présidentielle en Biélorussie .pdf] [Présidentielle en Biélorussie : la candidate de l’opposition, Svetlana Tikhanovskaïa, s’est réfugiée en Lituanie] 325430 bytes
|
||||
|
||||
Extracting the value for an annotation field:
|
||||
P.: 3, D:20200904094331+00'240', highlight : None P.: 3, D:20200904100158+00'240', highlight : DAVID: Test of a highlight that has a note attached to
|
||||
Loading…
x
Reference in New Issue
Block a user