#!/usr/bin/env python3 # Python-based Image Tag extractor for Recoll. This is less thorough # than the Perl-based rclimg script, but useful if you don't want to # have to install Perl (e.g. on Windows). # # Uses pyexiv2. Also tried Pillow, found it useless for tags. # from __future__ import print_function import sys import os import rclexecm import re from rclbasehandler import RclBaseHandler try: import pyexiv2 except: print("RECFILTERROR HELPERNOTFOUND python3:pyexiv2") sys.exit(1); khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$') pyexiv2_titles = { 'Xmp.dc.subject', 'Xmp.lr.hierarchicalSubject', 'Xmp.MicrosoftPhoto.LastKeywordXMP', } # Keys for which we set meta tags meta_pyexiv2_keys = { 'Xmp.dc.subject', 'Xmp.lr.hierarchicalSubject', 'Xmp.MicrosoftPhoto.LastKeywordXMP', 'Xmp.digiKam.TagsList', 'Exif.Photo.DateTimeDigitized', 'Exif.Photo.DateTimeOriginal', 'Exif.Image.DateTime', } exiv2_dates = ['Exif.Photo.DateTimeOriginal', 'Exif.Image.DateTime', 'Exif.Photo.DateTimeDigitized'] class ImgTagExtractor(RclBaseHandler): def __init__(self, em): super(ImgTagExtractor, self).__init__(em) def html_text(self, filename): ok = False metadata = pyexiv2.ImageMetadata(filename) metadata.read() keys = metadata.exif_keys + metadata.iptc_keys + metadata.xmp_keys mdic = {} for k in keys: # we skip numeric keys and undecoded makernote data if k != 'Exif.Photo.MakerNote' and not khexre.match(k): mdic[k] = str(metadata[k].raw_value) docdata = b'
\n' ttdata = set() for k in pyexiv2_titles: if k in mdic: ttdata.add(self.em.htmlescape(mdic[k])) if ttdata: title = "" for v in ttdata: v = v.replace('[', '').replace(']', '').replace("'", "") title += v + " " docdata += rclexecm.makebytes("