""" .. versionadded:: 0.14.0 This module contains the XMP data model classes and namespace registry, in addition to a simplified document metadata model used for automated metadata management. """ import enum from dataclasses import dataclass, field from datetime import datetime from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Union from pyhanko import __version__ from pyhanko.pdf_utils.misc import StringWithLanguage __all__ = [ 'DocumentMetadata', 'VENDOR', 'MetaString', 'ExpandedName', 'Qualifiers', 'XmpValue', 'XmpStructure', 'XmpArrayType', 'XmpArray', 'NS', 'XML_LANG', 'RDF_RDF', 'RDF_SEQ', 'RDF_BAG', 'RDF_ALT', 'RDF_LI', 'RDF_VALUE', 'RDF_RESOURCE', 'RDF_PARSE_TYPE', 'RDF_ABOUT', 'RDF_DESCRIPTION', 'DC_TITLE', 'DC_CREATOR', 'DC_DESCRIPTION', 'PDF_PRODUCER', 'PDF_KEYWORDS', 'X_XMPMETA', 'X_XMPTK', 'XMP_CREATORTOOL', 'XMP_CREATEDATE', 'XMP_MODDATE', ] VENDOR = 'pyHanko ' + __version__ """ pyHanko version identifier in textual form """ MetaString = Union[StringWithLanguage, str, None] """ A regular string, a string with a language code, or nothing at all. """ @dataclass class DocumentMetadata: """ Simple representation of document metadata. All entries are optional. """ title: MetaString = None """ The document's title. """ author: MetaString = None """ The document's author. """ subject: MetaString = None """ The document's subject. """ keywords: List[str] = field(default_factory=list) """ Keywords associated with the document. """ creator: MetaString = None """ The software that was used to author the document. .. note:: This is distinct from the producer, which is typically used to indicate which PDF processor(s) interacted with the file. """ created: Union[str, datetime, None] = None """ The time when the document was created. To set it to the current time, specify ``now``. """ last_modified: Union[str, datetime, None] = "now" """ The time when the document was last modified. Defaults to the current time upon serialisation if not specified. """ xmp_extra: List['XmpStructure'] = field(default_factory=list) """ Extra XMP metadata. """ xmp_unmanaged: bool = False """ Flag metadata as XMP-only. This means that the info dictionary will be cleared out as much as possible, and that all attributes other than :attr:`xmp_extra` will be ignored when updating XMP metadata. .. note:: The last-modified date and producer entries in the info dictionary will still be updated. .. note:: :class:`DocumentMetadata` represents a data model that is much more simple than what XMP is actually capable of. You can use this flag if you need more fine-grained control. """ def view_over(self, base: 'DocumentMetadata'): return DocumentMetadata( title=self.title or base.title, author=self.author or base.author, subject=self.subject or base.subject, keywords=list(self.keywords or base.keywords), creator=self.creator or base.creator, created=self.created or base.created, last_modified=self.last_modified, ) @dataclass(frozen=True) class ExpandedName: """ An expanded XML name. """ ns: str """ The URI of the namespace in which the name resides. """ local_name: str """ The local part of the name. """ def __str__(self): ns = self.ns sep = '' if ns.endswith('/') or ns.endswith('#') else '/' return f"{ns}{sep}{self.local_name}" def __repr__(self): return str(self) NS = { 'xml': 'http://www.w3.org/XML/1998/namespace', 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'xmp': 'http://ns.adobe.com/xap/1.0/', 'dc': 'http://purl.org/dc/elements/1.1/', 'pdf': 'http://ns.adobe.com/pdf/1.3/', 'pdfaid': 'http://www.aiim.org/pdfa/ns/id/', 'pdfuaid': 'http://www.aiim.org/pdfua/ns/id/', 'pdfaSchema': 'http://www.aiim.org/pdfa/ns/schema#', 'pdfaExtension': 'http://www.aiim.org/pdfa/ns/extension/', 'pdfaProperty': 'http://www.aiim.org/pdfa/ns/property#', 'x': 'adobe:ns:meta/', } """ Known namespaces and their customary prefixes. """ XML_LANG = ExpandedName(ns=NS['xml'], local_name='lang') """ ``lang`` in the ``xml`` namespace. """ RDF_RDF = ExpandedName(ns=NS['rdf'], local_name='RDF') """ ``RDF`` in the ``rdf`` namespace. """ RDF_SEQ = ExpandedName(ns=NS['rdf'], local_name='Seq') """ ``Seq`` in the ``rdf`` namespace. """ RDF_BAG = ExpandedName(ns=NS['rdf'], local_name='Bag') """ ``Bag`` in the ``rdf`` namespace. """ RDF_ALT = ExpandedName(ns=NS['rdf'], local_name='Alt') """ ``Alt`` in the ``rdf`` namespace. """ RDF_LI = ExpandedName(ns=NS['rdf'], local_name='li') """ ``li`` in the ``rdf`` namespace. """ RDF_VALUE = ExpandedName(ns=NS['rdf'], local_name='value') """ ``value`` in the ``rdf`` namespace. """ RDF_RESOURCE = ExpandedName(ns=NS['rdf'], local_name='resource') """ ``resource`` in the ``rdf`` namespace. """ RDF_ABOUT = ExpandedName(ns=NS['rdf'], local_name='about') """ ``about`` in the ``rdf`` namespace. """ RDF_PARSE_TYPE = ExpandedName(ns=NS['rdf'], local_name='parseType') """ ``parseType`` in the ``rdf`` namespace. """ RDF_DESCRIPTION = ExpandedName(ns=NS['rdf'], local_name='Description') """ ``Description`` in the ``rdf`` namespace. """ X_XMPMETA = ExpandedName(ns=NS['x'], local_name='xmpmeta') """ ``xmpmeta`` in the ``x`` namespace. """ X_XMPTK = ExpandedName(ns=NS['x'], local_name='xmptk') """ ``xmptk`` in the ``x`` namespace. """ DC_TITLE = ExpandedName(ns=NS['dc'], local_name='title') """ ``title`` in the ``dc`` namespace. """ DC_CREATOR = ExpandedName(ns=NS['dc'], local_name='creator') """ ``creator`` in the ``dc`` namespace. """ DC_DESCRIPTION = ExpandedName(ns=NS['dc'], local_name='description') """ ``description`` in the ``dc`` namespace. """ PDF_KEYWORDS = ExpandedName(ns=NS['pdf'], local_name='keywords') """ ``keywords`` in the ``pdf`` namespace. """ PDF_PRODUCER = ExpandedName(ns=NS['pdf'], local_name='Producer') """ ``Producer`` in the ``pdf`` namespace. """ XMP_CREATORTOOL = ExpandedName(ns=NS['xmp'], local_name='CreatorTool') """ ``CreatorTool`` in the ``xmp`` namespace. """ XMP_CREATEDATE = ExpandedName(ns=NS['xmp'], local_name='CreateDate') """ ``CreateDate`` in the ``xmp`` namespace. """ XMP_MODDATE = ExpandedName(ns=NS['xmp'], local_name='ModifyDate') """ ``ModifyDate`` in the ``xmp`` namespace. """ class Qualifiers: """ XMP value qualifiers wrapper. Implements ``__getitem__``. Note that ``xml:lang`` gets special treatment. :param quals: The qualifiers to model. """ _quals: Dict[ExpandedName, 'XmpValue'] _lang: Optional[str] def __init__(self, quals: Dict[ExpandedName, 'XmpValue']): self._quals = quals try: lang = quals[XML_LANG] del quals[XML_LANG] if not isinstance(lang.value, str): raise TypeError # pragma: nocover self._lang = lang.value except KeyError: self._lang = None @classmethod def of(cls, *lst: Tuple[ExpandedName, 'XmpValue']) -> 'Qualifiers': """ Construct a :class:`.Qualifiers` object from a list of name-value pairs. :param lst: A list of name-value pairs. :return: A :class:`.Qualifiers` object. """ return Qualifiers({k: v for k, v in lst}) @classmethod def lang_as_qual(cls, lang: Optional[str]) -> 'Qualifiers': """ Construct a :class:`.Qualifiers` object that only wraps a language qualifier. :param lang: A language code. :return: A :class:`.Qualifiers` object. """ quals = Qualifiers({}) if lang: quals._lang = lang return quals def __getitem__(self, item): return self._quals[item] def iter_quals( self, with_lang: bool = True ) -> Iterable[Tuple[ExpandedName, 'XmpValue']]: """ Iterate over all qualifiers. :param with_lang: Include the language qualifier. :return: """ yield from self._quals.items() if with_lang and self._lang is not None: yield XML_LANG, XmpValue(self._lang) @property def lang(self) -> Optional[str]: """ Retrieve the language qualifier, if any. """ return self._lang @property def has_non_lang_quals(self) -> bool: """ Check if there are any non-language qualifiers. """ return bool(self._quals) def __bool__(self): return bool(self._quals or self._lang) def __repr__(self): q = dict(self._quals) if self._lang: q['lang'] = self._lang return f"Qualifiers({q!r})" def __eq__(self, other): return ( isinstance(other, Qualifiers) and self._lang == other._lang and self._quals == other._quals ) @dataclass(frozen=True) class XmpUri: """ An XMP URI value. """ value: str def __str__(self): return self.value @dataclass class XmpValue: """ A general XMP value, potentially with qualifiers. """ value: Union['XmpStructure', 'XmpArray', XmpUri, str] """ The value. """ qualifiers: Qualifiers = field(default_factory=Qualifiers.of) """ Qualifiers that apply to the value. """ class XmpStructure: """ A generic XMP structure value. Implements ``__getitem__`` for field access. :param fields: The structure's fields. """ # isomorphic to Qualifiers, but we keep them separate to stay # closer to the spec (and this one doesn't special-case anything) def __init__(self, fields: Dict[ExpandedName, 'XmpValue']): self._fields: Dict[ExpandedName, XmpValue] = fields @classmethod def of(cls, *lst: Tuple[ExpandedName, 'XmpValue']) -> 'XmpStructure': """ Construct an :class:`.XmpStructure` from a list of name-value pairs. :param lst: A list of name-value pairs. :return: An an :class:`.XmpStructure`. """ return cls({k: v for k, v in lst}) def __getitem__(self, item): return self._fields[item] def __iter__(self) -> Iterator[Tuple[ExpandedName, 'XmpValue']]: yield from self._fields.items() def __repr__(self): return f"XmpStructure({self._fields!r})" def __eq__(self, other): return isinstance(other, XmpStructure) and self._fields == other._fields @enum.unique class XmpArrayType(enum.Enum): """ XMP array types. """ ORDERED = 'Seq' """ Ordered array. """ UNORDERED = 'Bag' """ Unordered array. """ ALTERNATIVE = 'Alt' """ Alternative array. """ def as_rdf(self) -> ExpandedName: """ Render the type as an XML name. """ return ExpandedName(ns=NS['rdf'], local_name=str(self.value)) @dataclass class XmpArray: """ An XMP array. """ array_type: XmpArrayType """ The type of the array. """ entries: List[XmpValue] """ The entries in the array. """ @classmethod def ordered(cls, lst: Iterable[XmpValue]) -> 'XmpArray': """ Convert a list to an ordered XMP array. :param lst: An iterable of XMP values. :return: An ordered :class:`.XmpArray`. """ return cls(XmpArrayType.ORDERED, list(lst)) @classmethod def unordered(cls, lst: Iterable[XmpValue]) -> 'XmpArray': """ Convert a list to an unordered XMP array. :param lst: An iterable of XMP values. :return: An unordered :class:`.XmpArray`. """ return cls(XmpArrayType.UNORDERED, list(lst)) @classmethod def alternative(cls, lst: Iterable[XmpValue]) -> 'XmpArray': """ Convert a list to an alternative XMP array. :param lst: An iterable of XMP values. :return: An alternative :class:`.XmpArray`. """ return cls(XmpArrayType.ALTERNATIVE, list(lst)) def __eq__(self, other): if ( not isinstance(other, XmpArray) or self.array_type != other.array_type ): return False if self.array_type == XmpArrayType.UNORDERED: return all(e in self.entries for e in other.entries) and all( e in other.entries for e in self.entries ) else: return self.entries == other.entries