""" This module implements support for PDF-specific signing functionality. """ import asyncio import enum import logging import uuid import warnings from dataclasses import dataclass, field from datetime import datetime from typing import IO, Any, Dict, List, Optional, Set, Tuple, Union import tzlocal from asn1crypto import algos, cms, crl, keys, ocsp from asn1crypto import pdf as asn1_pdf from cryptography.hazmat.primitives import hashes from pyhanko_certvalidator import CertificateValidator, ValidationContext from pyhanko_certvalidator.errors import PathBuildingError, PathValidationError from pyhanko_certvalidator.path import ValidationPath from pyhanko_certvalidator.validate import ACValidationResult, async_validate_ac from pyhanko.pdf_utils import generic, misc from pyhanko.pdf_utils.generic import pdf_name from pyhanko.pdf_utils.incremental_writer import IncrementalPdfFileWriter from pyhanko.pdf_utils.reader import PdfFileReader from pyhanko.pdf_utils.writer import BasePdfFileWriter from pyhanko.sign.ades.api import CAdESSignedAttrSpec from pyhanko.sign.fields import ( FieldMDPSpec, InvisSigSettings, MDPPerm, SeedLockDocument, SigAuthType, SigFieldSpec, SigSeedSubFilter, SigSeedValFlags, SigSeedValueSpec, enumerate_sig_fields, ) from pyhanko.sign.general import ( SigningError, get_cms_hash_algo_for_mechanism, get_pyca_cryptography_hash, ) from pyhanko.sign.timestamps import TimeStamper from pyhanko.stamp import BaseStampStyle from . import constants from .cms_embedder import ( PdfCMSEmbedder, SigAppearanceSetup, SigIOSetup, SigMDPSetup, SigObjSetup, ) from .pdf_byterange import ( BuildProps, DocumentTimestamp, PreparedByteRangeDigest, SignatureObject, ) from .pdf_cms import PdfCMSSignedAttributes, Signer, select_suitable_signing_md __all__ = [ 'PdfSignatureMetadata', 'DSSContentSettings', 'TimestampDSSContentSettings', 'GeneralDSSContentSettings', 'SigDSSPlacementPreference', 'PdfTimeStamper', 'PdfSigner', 'PdfSigningSession', 'PdfTBSDocument', 'PdfPostSignatureDocument', 'PreSignValidationStatus', 'PostSignInstructions', ] from ...pdf_utils.crypt import SerialisedCredential logger = logging.getLogger(__name__) @dataclass(frozen=True) class GeneralDSSContentSettings: """ .. versionadded:: 0.8.0 Settings that govern DSS creation and updating in general. """ include_vri: bool = True """ Flag to control whether to create and update entries in the VRI dictionary. The default is to always update the VRI dictionary. .. note:: The VRI dictionary is a relic of the past that is effectively deprecated in the current PAdES standards, and most modern validators don't rely on it being there. That said, there's no real harm in creating these entries, other than that it occasionally forces DSS updates where none would otherwise be necessary, and that it prevents the DSS from being updated prior to signing (as opposed to after signing). """ skip_if_unneeded: bool = True """ Do not perform a write if updating the DSS would not add any new information. .. note:: This setting is only used if the DSS update would happen in its own revision. """ class SigDSSPlacementPreference(enum.Enum): """ .. versionadded:: 0.8.0 Preference for where to perform a DSS update with validation information for a specific signature. """ TOGETHER_WITH_SIGNATURE = enum.auto() """ Update the DSS in the revision that contains the signature. Doing so can be useful to create a PAdES-B-LT signature in a single revision. Such signatures can be processed by a validator that isn't capable of incremental update analysis. .. warning:: This setting can only be used if :attr:`include_vri` is ``False``. """ SEPARATE_REVISION = enum.auto() """ Always perform the DSS update in a separate revision, after the signature, but before any timestamps are added. .. note:: This is the old default behaviour. """ TOGETHER_WITH_NEXT_TS = enum.auto() """ If the signing workflow includes a document timestamp after the signature, update the DSS in the same revision as the timestamp. In the absence of document timestamps, this is equivalent to :attr:`SEPARATE_REVISION`. .. warning:: This option controls the addition of validation info for the signature and its associated signature timestamp, not the validation info for the document timestamp itself. See :attr:`.DSSContentSettings.next_ts_settings`. In most practical situations, the distinction is only relevant in interrupted signing workflows (see :ref:`interrupted-signing`), where the lifecycle of the validation context is out of pyHanko's hands. """ @dataclass(frozen=True) class TimestampDSSContentSettings(GeneralDSSContentSettings): """ .. versionadded:: 0.8.0 Settings for a DSS update with validation information for a document timestamp. .. note:: In most workflows, adding a document timestamp doesn't trigger any DSS updates beyond VRI additions, because the same TSA is used for signature timestamps and for document timestamps. """ update_before_ts: bool = False """ Perform DSS update before creating the timestamp, instead of after. .. warning:: This setting can only be used if :attr:`include_vri` is ``False``. """ def assert_viable(self): """ Check settings for consistency, and raise :class:`.SigningError` otherwise. """ if self.include_vri and self.update_before_ts: raise SigningError( "If VRI entries are to be included, DSS updates can only be " "performed after the timestamp in question was created." ) @dataclass(frozen=True) class DSSContentSettings(GeneralDSSContentSettings): """ .. versionadded:: 0.8.0 Settings for a DSS update with validation information for a signature. """ placement: SigDSSPlacementPreference = ( SigDSSPlacementPreference.TOGETHER_WITH_NEXT_TS ) """ Preference for where to perform a DSS update with validation information for a specific signature. See :class:`.SigDSSPlacementPreference`. The default is :attr:`.SigDSSPlacementPreference.TOGETHER_WITH_NEXT_TS`. """ next_ts_settings: Optional[TimestampDSSContentSettings] = None """ Explicit settings for DSS updates pertaining to a document timestamp added as part of the same signing workflow, if applicable. If ``None``, a default will be generated based on the values of this settings object. .. note:: When consuming :class:`.DSSContentSettings` objects, you should call :meth:`get_settings_for_ts` instead of relying on the value of this field. """ def get_settings_for_ts(self) -> TimestampDSSContentSettings: """ Retrieve DSS update settings for document timestamps that are part of our signing workflow, if there are any. """ ts_settings = self.next_ts_settings if ts_settings is not None: return ts_settings update_before_ts = ( self.placement == SigDSSPlacementPreference.TOGETHER_WITH_SIGNATURE ) return TimestampDSSContentSettings( include_vri=self.include_vri, skip_if_unneeded=self.skip_if_unneeded, update_before_ts=update_before_ts, ) def assert_viable(self): """ Check settings for consistency, and raise :class:`.SigningError` otherwise. """ pre_sign = ( self.placement == SigDSSPlacementPreference.TOGETHER_WITH_SIGNATURE ) if self.include_vri and pre_sign: raise SigningError( "If VRI entries are to be included, DSS updates can only be " "performed after the signature in question was created." ) self.get_settings_for_ts().assert_viable() @dataclass(frozen=True) class PdfSignatureMetadata: """ Specification for a PDF signature. """ field_name: Optional[str] = None """ The name of the form field to contain the signature. If there is only one available signature field, the name may be inferred. """ md_algorithm: Optional[str] = None """ The name of the digest algorithm to use. It should be supported by `pyca/cryptography`. If ``None``, :func:`.select_suitable_signing_md` will be invoked to generate a suitable default, unless a seed value dictionary happens to be available. """ location: Optional[str] = None """ Location of signing. """ reason: Optional[str] = None """ Reason for signing (textual). """ contact_info: Optional[str] = None """ Information provided by the signer to enable the receiver to contact the signer to verify the signature. """ name: Optional[str] = None """ Name of the signer. This value is usually not necessary to set, since it should appear on the signer's certificate, but there are cases where it might be useful to specify it here (e.g. in situations where signing is delegated to a trusted third party). """ app_build_props: Optional[BuildProps] = None """ Properties of the application that created the signature. If specified, this data will be recorded in the **Prop_Build** dictionary of the signature. """ prop_auth_time: Optional[int] = None """ Number of seconds since signer was last authenticated. """ prop_auth_type: Optional[SigAuthType] = None """ Signature /Prop_AuthType to use. This should be one of :attr:`~.fields.SigAuthType.PIN` or :attr:`~.fields.SigAuthType.PASSWORD` or :attr:`~.fields.SigAuthType.FINGERPRINT` If not specified, this property won't be set on the signature dictionary. """ certify: bool = False """ Sign with an author (certification) signature, as opposed to an approval signature. A document can contain at most one such signature, and it must be the first one. """ # TODO Does this restriction also apply to prior document timestamps? subfilter: Optional[SigSeedSubFilter] = None """ Signature subfilter to use. This should be one of :attr:`~.fields.SigSeedSubFilter.ADOBE_PKCS7_DETACHED` or :attr:`~.fields.SigSeedSubFilter.PADES`. If not specified, the value may be inferred from the signature field's seed value dictionary. Failing that, :attr:`~.fields.SigSeedSubFilter.ADOBE_PKCS7_DETACHED` is used as the default value. """ embed_validation_info: bool = False """ Flag indicating whether validation info (OCSP responses and/or CRLs) should be embedded or not. This is necessary to be able to validate signatures long after they have been made. This flag requires :attr:`validation_context` to be set. The precise manner in which the validation info is embedded depends on the (effective) value of :attr:`subfilter`: * With :attr:`~.fields.SigSeedSubFilter.ADOBE_PKCS7_DETACHED`, the validation information will be embedded inside the CMS object containing the signature. * With :attr:`~.fields.SigSeedSubFilter.PADES`, the validation information will be embedded into the document security store (DSS). """ use_pades_lta: bool = False """ If ``True``, the signer will append an additional document timestamp after writing the signature's validation information to the document security store (DSS). This flag is only meaningful if :attr:`subfilter` is :attr:`~.fields.SigSeedSubFilter.PADES`. The PAdES B-LTA profile solves the long-term validation problem by adding a timestamp chain to the document after the regular signatures, which is updated with new timestamps at regular intervals. This provides an audit trail that ensures the long-term integrity of the validation information in the DSS, since OCSP responses and CRLs also have a finite lifetime. See also :meth:`.PdfTimeStamper.update_archival_timestamp_chain`. """ timestamp_field_name: Optional[str] = None """ Name of the timestamp field created when :attr:`use_pades_lta` is ``True``. If not specified, a unique name will be generated using :mod:`uuid`. """ validation_context: Optional[ValidationContext] = None """ The validation context to use when validating signatures. If provided, the signer's certificate and any timestamp certificates will be validated before signing. This parameter is mandatory when :attr:`embed_validation_info` is ``True``. """ docmdp_permissions: MDPPerm = MDPPerm.FILL_FORMS """ Indicates the document modification policy that will be in force after this signature is created. Only relevant for certification signatures or signatures that apply locking. .. warning:: For non-certification signatures, this is only explicitly allowed since PDF 2.0 (ISO 32000-2), so older software may not respect this setting on approval signatures. """ signer_key_usage: Set[str] = field( default_factory=lambda: constants.DEFAULT_SIGNER_KEY_USAGE ) """ Key usage extensions required for the signer's certificate. Defaults to ``non_repudiation`` only, but sometimes ``digital_signature`` or a combination of both may be more appropriate. See :class:`x509.KeyUsage` for a complete list. Only relevant if a validation context is also provided. """ cades_signed_attr_spec: Optional[CAdESSignedAttrSpec] = None """ .. versionadded:: 0.5.0 Specification for CAdES-specific attributes. """ dss_settings: DSSContentSettings = DSSContentSettings() """ .. versionadded:: 0.8.0 DSS output settings. See :class:`.DSSContentSettings`. """ tight_size_estimates: bool = False """ .. versionadded:: 0.8.0 When estimating the size of a signature container, do not add safety margins. .. note:: This should be OK if the entire CMS object is produced by pyHanko, and the signing scheme produces signatures of a fixed size. However, if the signature container includes unsigned attributes such as signature timestamps, the size of the signature is never entirely predictable. """ ac_validation_context: Optional[ValidationContext] = None """ .. versionadded:: 0.11.0 Validation context for attribute certificates """ def _ensure_esic_ext(pdf_writer: BasePdfFileWriter): """ Helper function to ensure that the output PDF is at least PDF 1.7, and that the relevant ESIC extension for PAdES is enabled if the version lower than 2.0. """ pdf_writer.ensure_output_version(version=(1, 7)) if pdf_writer.output_version < (2, 0): pdf_writer.register_extension(constants.ESIC_EXTENSION_1) def _ensure_iso32001_ext(pdf_writer: BasePdfFileWriter): pdf_writer.ensure_output_version(version=(2, 0)) pdf_writer.register_extension(constants.ISO32001) def _ensure_iso32002_ext(pdf_writer: BasePdfFileWriter): pdf_writer.ensure_output_version(version=(2, 0)) pdf_writer.register_extension(constants.ISO32002) def _is_iso32002_curve(pubkey: keys.PublicKeyInfo): kind, curve_id = pubkey.curve return kind == 'named' and curve_id in constants.ISO32002_CURVE_NAMES class PdfTimeStamper: """ Class to encapsulate the process of appending document timestamps to PDF files. """ def __init__( self, timestamper: TimeStamper, field_name: Optional[str] = None, invis_settings: InvisSigSettings = InvisSigSettings(), readable_field_name: str = "Timestamp", ): self.default_timestamper = timestamper self._field_name = field_name self._readable_field_name = readable_field_name self._invis_settings = invis_settings @property def field_name(self) -> str: """ Retrieve or generate the field name for the signature field to contain the document timestamp. :return: The field name, as a (Python) string. """ return self._field_name or ('Timestamp-' + str(uuid.uuid4())) # TODO I'm not entirely sure that allowing validation_paths to be cached # is wise. In principle, the TSA could issue their next timestamp with a # different certificate (e.g. due to load balancing), which would require # validation regardless. def timestamp_pdf( self, pdf_out: IncrementalPdfFileWriter, md_algorithm, validation_context=None, bytes_reserved=None, validation_paths=None, timestamper: Optional[TimeStamper] = None, *, in_place=False, output=None, dss_settings: TimestampDSSContentSettings = TimestampDSSContentSettings(), chunk_size=misc.DEFAULT_CHUNK_SIZE, tight_size_estimates: bool = False, ): """ .. versionchanged:: 0.9.0 Wrapper around :meth:`async_timestamp_pdf`. Timestamp the contents of ``pdf_out``. Note that ``pdf_out`` should not be written to after this operation. :param pdf_out: An :class:`.IncrementalPdfFileWriter`. :param md_algorithm: The hash algorithm to use when computing message digests. :param validation_context: The :class:`.pyhanko_certvalidator.ValidationContext` against which the TSA response should be validated. This validation context will also be used to update the DSS. :param bytes_reserved: Bytes to reserve for the CMS object in the PDF file. If not specified, make an estimate based on a dummy signature. .. warning:: Since the CMS object is written to the output file as a hexadecimal string, you should request **twice** the (estimated) number of bytes in the DER-encoded version of the CMS object. :param validation_paths: If the validation path(s) for the TSA's certificate are already known, you can pass them using this parameter to avoid having to run the validation logic again. :param timestamper: Override the default :class:`.TimeStamper` associated with this :class:`.PdfTimeStamper`. :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :param dss_settings: DSS output settings. See :class:`.TimestampDSSContentSettings`. :param tight_size_estimates: When estimating the size of a document timestamp container, do not add safety margins. .. note:: External TSAs cannot be relied upon to always produce the exact same output length, which makes this option risky to use. :return: The output stream containing the signed output. """ result = asyncio.run( self.async_timestamp_pdf( pdf_out, md_algorithm, validation_context=validation_context, bytes_reserved=bytes_reserved, validation_paths=validation_paths, timestamper=timestamper, in_place=in_place, output=output, chunk_size=chunk_size, dss_settings=dss_settings, tight_size_estimates=tight_size_estimates, ) ) return result async def async_timestamp_pdf( self, pdf_out: IncrementalPdfFileWriter, md_algorithm, validation_context=None, bytes_reserved=None, validation_paths=None, timestamper: Optional[TimeStamper] = None, *, in_place=False, output=None, dss_settings: TimestampDSSContentSettings = TimestampDSSContentSettings(), chunk_size=misc.DEFAULT_CHUNK_SIZE, tight_size_estimates: bool = False, embed_roots: bool = True, ): """ .. versionadded:: 0.9.0 Timestamp the contents of ``pdf_out``. Note that ``pdf_out`` should not be written to after this operation. :param pdf_out: An :class:`.IncrementalPdfFileWriter`. :param md_algorithm: The hash algorithm to use when computing message digests. :param validation_context: The :class:`.pyhanko_certvalidator.ValidationContext` against which the TSA response should be validated. This validation context will also be used to update the DSS. :param bytes_reserved: Bytes to reserve for the CMS object in the PDF file. If not specified, make an estimate based on a dummy signature. .. warning:: Since the CMS object is written to the output file as a hexadecimal string, you should request **twice** the (estimated) number of bytes in the DER-encoded version of the CMS object. :param validation_paths: If the validation path(s) for the TSA's certificate are already known, you can pass them using this parameter to avoid having to run the validation logic again. :param timestamper: Override the default :class:`.TimeStamper` associated with this :class:`.PdfTimeStamper`. :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :param dss_settings: DSS output settings. See :class:`.TimestampDSSContentSettings`. :param tight_size_estimates: When estimating the size of a document timestamp container, do not add safety margins. .. note:: External TSAs cannot be relied upon to always produce the exact same output length, which makes this option risky to use. :param embed_roots: Option that controls whether the root certificate of each validation path should be embedded into the DSS. The default is ``True``. .. note:: Trust roots are configured by the validator, so embedding them typically does nothing in a typical validation process. Therefore they can be safely omitted in most cases. Nonetheless, embedding the roots can be useful for documentation purposes. :return: The output stream containing the signed output. """ _ensure_esic_ext(pdf_out) from pyhanko.sign import validation timestamper = timestamper or self.default_timestamper if validation_context is not None: paths_coro = timestamper.validation_paths(validation_context) if validation_paths is None: validation_paths = [] async for path in paths_coro: validation_paths.append(path) dss_settings.assert_viable() if dss_settings.update_before_ts: # NOTE: we have to disable VRI in this scenario validation.DocumentSecurityStore.supply_dss_in_writer( pdf_out, sig_contents=None, paths=validation_paths, validation_context=validation_context, embed_roots=embed_roots, ) field_name = self.field_name if bytes_reserved is None: test_signature_cms = await timestamper.async_dummy_response( md_algorithm ) test_len = len(test_signature_cms.dump()) * 2 if tight_size_estimates: bytes_reserved = test_len else: # see sign_pdf comments bytes_reserved = test_len + 2 * (test_len // 4) timestamp_obj = DocumentTimestamp(bytes_reserved=bytes_reserved) field_spec = SigFieldSpec( sig_field_name=field_name, invis_sig_settings=self._invis_settings, readable_field_name=self._readable_field_name, ) cms_writer = PdfCMSEmbedder(new_field_spec=field_spec).write_cms( field_name=field_name, writer=pdf_out, # for LTA, requiring existing_fields_only doesn't make sense # since we should in principle be able to add document timestamps # ad infinitum. existing_fields_only=False, ) next(cms_writer) cms_writer.send(SigObjSetup(sig_placeholder=timestamp_obj)) sig_io = SigIOSetup( md_algorithm=md_algorithm, in_place=in_place, output=output, chunk_size=chunk_size, ) prep_digest: PreparedByteRangeDigest prep_digest, res_output = cms_writer.send(sig_io) timestamp_cms = await timestamper.async_timestamp( prep_digest.document_digest, md_algorithm ) sig_contents = cms_writer.send(timestamp_cms) # update the DSS if necessary if validation_context is not None and not dss_settings.update_before_ts: if not dss_settings.include_vri: sig_contents = None # FIXME in this case, the ser/deser step is unnecessary # and inefficient; should probably rewrite # using supply_dss_in_writer credential_ser: Optional[SerialisedCredential] = None if pdf_out.security_handler: credential = pdf_out.security_handler.extract_credential() if credential: credential_ser = credential.serialise() validation.DocumentSecurityStore.add_dss( output_stream=res_output, sig_contents=sig_contents, paths=validation_paths, validation_context=validation_context, force_write=not dss_settings.skip_if_unneeded, embed_roots=embed_roots, file_credential=credential_ser, ) return misc.finalise_output(output, res_output) def update_archival_timestamp_chain( self, reader: PdfFileReader, validation_context, in_place=True, output=None, chunk_size=misc.DEFAULT_CHUNK_SIZE, default_md_algorithm=constants.DEFAULT_MD, ): """ .. versionchanged:: 0.9.0 Wrapper around :meth:`async_update_archival_timestamp_chain`. Validate the last timestamp in the timestamp chain on a PDF file, and write an updated version to an output stream. :param reader: A :class:`PdfReader` encapsulating the input file. :param validation_context: :class:`.pyhanko_certvalidator.ValidationContext` object to validate the last timestamp. :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :param default_md_algorithm: Message digest to use if there are no preceding timestamps in the file. :return: The output stream containing the signed output. """ coro = self.async_update_archival_timestamp_chain( reader=reader, validation_context=validation_context, in_place=in_place, output=output, chunk_size=chunk_size, default_md_algorithm=default_md_algorithm, ) return asyncio.run(coro) async def async_update_archival_timestamp_chain( self, reader: PdfFileReader, validation_context, in_place=True, output=None, chunk_size=misc.DEFAULT_CHUNK_SIZE, default_md_algorithm=constants.DEFAULT_MD, embed_roots: bool = True, ): """ .. versionadded:: 0.9.0 Validate the last timestamp in the timestamp chain on a PDF file, and write an updated version to an output stream. :param reader: A :class:`PdfReader` encapsulating the input file. :param validation_context: :class:`.pyhanko_certvalidator.ValidationContext` object to validate the last timestamp. :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :param default_md_algorithm: Message digest to use if there are no preceding timestamps in the file. :param embed_roots: Option that controls whether the root certificate of each validation path should be embedded into the DSS. The default is ``True``. .. note:: Trust roots are configured by the validator, so embedding them typically does nothing in a typical validation process. Therefore they can be safely omitted in most cases. Nonetheless, embedding the roots can be useful for documentation purposes. :return: The output stream containing the signed output. """ # TODO expose DSS fine-tuning here as well # In principle, we only have to validate that the last timestamp token # in the current chain is valid. # TODO: add an option to validate the entire timestamp chain # plus all signatures from .. import validation from ..validation.status import TimestampSignatureStatus timestamps = validation.get_timestamp_chain(reader) try: last_timestamp = next(timestamps) except StopIteration: logger.warning( "Document does not have any document timestamps yet. " "This may cause unexpected results." ) last_timestamp = None # Validate the previous timestamp if present tst_status: Optional[TimestampSignatureStatus] = None if last_timestamp is None: md_algorithm = default_md_algorithm else: expected_imprint = last_timestamp.compute_digest() tst_token = last_timestamp.signed_data # run validation logic last_tst_status = await validation.ltv.establish_timestamp_trust( tst_token, validation_context, expected_imprint ) md_algorithm = last_tst_status.md_algorithm tst_status = last_tst_status # Prepare output if in_place: res_output = reader.stream else: res_output = misc.prepare_rw_output_stream(output) reader.stream.seek(0) misc.chunked_write(bytearray(chunk_size), reader.stream, res_output) pdf_out = IncrementalPdfFileWriter(res_output) if last_timestamp is not None: assert tst_status is not None # update the DSS validation.DocumentSecurityStore.supply_dss_in_writer( pdf_out, last_timestamp.pkcs7_content, paths=(tst_status.validation_path,), validation_context=validation_context, embed_roots=embed_roots, ) # append a new timestamp await self.async_timestamp_pdf( pdf_out, md_algorithm, validation_context, in_place=True, embed_roots=embed_roots, ) return misc.finalise_output(output, res_output) def _signatures_exist(handler): try: next(enumerate_sig_fields(handler, filled_status=True)) return True except StopIteration: return False class PdfSigner: """ .. versionchanged: 0.7.0 This class is no longer a subclass of :class:`.PdfTimeStamper`. Class to handle PDF signatures in general. :param signature_meta: The specification of the signature to add. :param signer: :class:`.Signer` object to use to produce the signature object. :param timestamper: :class:`.TimeStamper` object to use to produce any time stamp tokens that might be required. :param stamp_style: Stamp style specification to determine the visible style of the signature, typically an object of type :class:`.TextStampStyle` or :class:`.QRStampStyle`. Defaults to :const:`constants.DEFAULT_SIGNING_STAMP_STYLE`. :param new_field_spec: If a new field is to be created, this parameter allows the caller to specify the field's properties in the form of a :class:`.SigFieldSpec`. This parameter is only meaningful if ``existing_fields_only`` is ``False``. """ _ignore_sv = False def __init__( self, signature_meta: PdfSignatureMetadata, signer: Signer, *, timestamper: Optional[TimeStamper] = None, stamp_style: Optional[BaseStampStyle] = None, new_field_spec: Optional[SigFieldSpec] = None, ): self.signature_meta = signature_meta if ( new_field_spec is not None and new_field_spec.sig_field_name != signature_meta.field_name ): raise SigningError( "Field names specified in SigFieldSpec and " "PdfSignatureMetadata do not agree." ) self.signer = signer stamp_style = stamp_style or constants.DEFAULT_SIGNING_STAMP_STYLE self.stamp_style: BaseStampStyle = stamp_style self.signer_hash_algo: Optional[str] try: mech = self.signer.get_signature_mechanism_for_digest(None) self.signer_hash_algo = get_cms_hash_algo_for_mechanism(mech) except ValueError: self.signer_hash_algo = None self.new_field_spec = new_field_spec self.default_timestamper = timestamper @property def default_md_for_signer(self) -> Optional[str]: """ Name of the default message digest algorithm for this signer, if there is one. This method will try the :attr:`~.PdfSignatureMetadata.md_algorithm` attribute on the signer's :attr:`signature_meta`, or try to retrieve the digest algorithm associated with the underlying :class:`~pyhanko.sign.signers.pdf_cms.Signer`. :return: The name of the message digest algorithm, or ``None``. """ return self.signature_meta.md_algorithm or self.signer_hash_algo def _enforce_certification_constraints(self, reader: PdfFileReader): # TODO we really should take into account the /DocMDP constraints # of _all_ previous signatures, i.e. also approval signatures with # locking instructions etc. if self.signature_meta.certify and _signatures_exist(reader): raise SigningError( "Certification signatures must be the first signature " "in a given document." ) from pyhanko.sign.validation import read_certification_data cd = read_certification_data(reader) # if there is no author signature, we don't have to do anything if cd is None: return if cd.permission == MDPPerm.NO_CHANGES: raise SigningError("Author signature forbids all changes") def _retrieve_seed_value_spec( self, sig_field ) -> Optional[SigSeedValueSpec]: # for testing & debugging if self._ignore_sv: return None sv_dict = sig_field.get('/SV') if sv_dict is None: return None return SigSeedValueSpec.from_pdf_object(sv_dict) def _select_md_algorithm(self, sv_spec: Optional[SigSeedValueSpec]) -> str: signature_meta = self.signature_meta # priority order for the message digest algorithm # (1) If signature_meta specifies a message digest algorithm, use it # (it has been cleared by the SV dictionary checker already) # (2) Use the first algorithm specified in the seed value dictionary, # if a suggestion is present # (3) fall back to select_suitable_signing_md() if sv_spec is not None and sv_spec.digest_methods: sv_md_algorithm = sv_spec.digest_methods[0] else: sv_md_algorithm = None if self.default_md_for_signer is not None: md_algorithm = self.default_md_for_signer elif sv_md_algorithm is not None: md_algorithm = sv_md_algorithm elif self.signer.signing_cert is not None: md_algorithm = select_suitable_signing_md( self.signer.signing_cert.public_key ) else: raise SigningError( "Could not select a default digest algorithm. Please supply " "a value in the signature settings, or configure the signer " "with an explicit signature mechanism that includes a digest " "algorithm specification." ) algorithm_policy = ( signature_meta.validation_context.algorithm_policy if signature_meta.validation_context is not None else None ) if algorithm_policy is not None: now = datetime.now() md_algo_obj = algos.DigestAlgorithm({'algorithm': md_algorithm}) if not algorithm_policy.digest_algorithm_allowed(md_algo_obj, now): raise SigningError( f"The hash algorithm {md_algorithm} is not allowed in the " f"specified validation context (usage policy of type " f"{algorithm_policy.__class__.__name__})." ) return md_algorithm def register_extensions( self, pdf_out: BasePdfFileWriter, *, md_algorithm: str ): if self.signature_meta.subfilter == SigSeedSubFilter.PADES: _ensure_esic_ext(pdf_out) try: sig_mech = self.signer.get_signature_mechanism_for_digest( md_algorithm ) sig_algo = sig_mech.signature_algo except (SigningError, ValueError) as e: logger.debug( f"Failed to introspect signature mechanism: {str(e)}. " f"Will forgo algorithm-based automatic extension registration.", ) return if sig_algo == 'ed25519': _ensure_iso32002_ext(pdf_out) elif sig_algo == 'ed448': _ensure_iso32001_ext(pdf_out) _ensure_iso32002_ext(pdf_out) else: if md_algorithm.startswith('sha3') or md_algorithm == 'shake256': _ensure_iso32001_ext(pdf_out) cert = self.signer.signing_cert if ( cert and sig_algo == 'ecdsa' and _is_iso32002_curve(cert.public_key) ): _ensure_iso32002_ext(pdf_out) def init_signing_session( self, pdf_out: BasePdfFileWriter, existing_fields_only=False ) -> 'PdfSigningSession': """ Initialise a signing session with this :class:`.PdfSigner` for a specified PDF file writer. This step in the signing process handles all field-level operations prior to signing: it creates the target form field if necessary, and makes sure the seed value dictionary gets processed. See also :meth:`digest_doc_for_signing` and :meth:`sign_pdf`. :param pdf_out: The writer containing the PDF file to be signed. :param existing_fields_only: If ``True``, never create a new empty signature field to contain the signature. If ``False``, a new field may be created if no field matching :attr:`~.PdfSignatureMetadata.field_name` exists. :return: A :class:`.PdfSigningSession` object modelling the signing session in its post-setup stage. """ if isinstance(pdf_out, IncrementalPdfFileWriter): # ensure we're not signing a hybrid reference doc prev = pdf_out.prev if prev.strict and prev.xrefs.hybrid_xrefs_present: raise SigningError( "Attempting to sign document with hybrid cross-reference " "sections while hybrid xrefs are disabled" ) timestamper = self.default_timestamper signature_meta: PdfSignatureMetadata = self.signature_meta cms_writer = PdfCMSEmbedder( new_field_spec=self.new_field_spec ).write_cms( field_name=signature_meta.field_name, writer=pdf_out, existing_fields_only=existing_fields_only, ) # let the CMS writer put in a field for us, if necessary sig_field_ref = next(cms_writer) sig_field = sig_field_ref.get_object() # Fetch seed values (if present) to prepare for signing sv_spec = self._retrieve_seed_value_spec(sig_field) # Check DocMDP settings to see if we're allowed to add a signature if isinstance(pdf_out, IncrementalPdfFileWriter): self._enforce_certification_constraints(pdf_out.prev) md_algorithm = self._select_md_algorithm(sv_spec) self.register_extensions(pdf_out, md_algorithm=md_algorithm) # check if a timestamp is required without a timestamper being # available already, and furnish one if necessary if ( sv_spec is not None and sv_spec.timestamp_required and timestamper is None ): timestamper = sv_spec.build_timestamper() # subfilter: try signature_meta and SV dict, fall back # to /adbe.pkcs7.detached by default subfilter = signature_meta.subfilter if subfilter is None: if sv_spec is not None and sv_spec.subfilters: subfilter = sv_spec.subfilters[0] else: subfilter = SigSeedSubFilter.ADOBE_PKCS7_DETACHED session = PdfSigningSession( self, pdf_out, cms_writer, sig_field, md_algorithm, timestamper, subfilter, sv_spec=sv_spec, ) return session def digest_doc_for_signing( self, pdf_out: BasePdfFileWriter, existing_fields_only=False, bytes_reserved=None, *, appearance_text_params=None, in_place=False, output=None, chunk_size=misc.DEFAULT_CHUNK_SIZE, ) -> Tuple[PreparedByteRangeDigest, 'PdfTBSDocument', IO]: """ .. deprecated:: 0.9.0 Use :meth:`async_digest_doc_for_signing` instead. Set up all stages of the signing process up to and including the point where the signature placeholder is allocated, and the document's ``/ByteRange`` digest is computed. See :meth:`sign_pdf` for a less granular, more high-level approach. .. note:: This method is useful in remote signing scenarios, where you might want to free up resources while waiting for the remote signer to respond. The :class:`.PreparedByteRangeDigest` object returned allows you to keep track of the required state to fill the signature container at some later point in time. :param pdf_out: A PDF file writer (usually an :class:`.IncrementalPdfFileWriter`) containing the data to sign. :param existing_fields_only: If ``True``, never create a new empty signature field to contain the signature. If ``False``, a new field may be created if no field matching :attr:`~.PdfSignatureMetadata.field_name` exists. :param bytes_reserved: Bytes to reserve for the CMS object in the PDF file. If not specified, make an estimate based on a dummy signature. .. warning:: Since the CMS object is written to the output file as a hexadecimal string, you should request **twice** the (estimated) number of bytes in the DER-encoded version of the CMS object. :param appearance_text_params: Dictionary with text parameters that will be passed to the signature appearance constructor (if applicable). :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :return: A tuple containing a :class:`.PreparedByteRangeDigest` object, a :class:`.PdfTBSDocument` object and an output handle to which the document in its current state has been written. """ warnings.warn( "'digest_doc_for_signing' is deprecated, use " "'async_digest_doc_for_signing' instead", DeprecationWarning, ) result = asyncio.run( self.async_digest_doc_for_signing( pdf_out, existing_fields_only=existing_fields_only, bytes_reserved=bytes_reserved, appearance_text_params=appearance_text_params, in_place=in_place, output=output, chunk_size=chunk_size, ) ) return result async def async_digest_doc_for_signing( self, pdf_out: BasePdfFileWriter, existing_fields_only=False, bytes_reserved=None, *, appearance_text_params=None, in_place=False, output=None, chunk_size=misc.DEFAULT_CHUNK_SIZE, ) -> Tuple[PreparedByteRangeDigest, 'PdfTBSDocument', IO]: """ .. versionadded:: 0.9.0 Set up all stages of the signing process up to and including the point where the signature placeholder is allocated, and the document's ``/ByteRange`` digest is computed. See :meth:`sign_pdf` for a less granular, more high-level approach. .. note:: This method is useful in remote signing scenarios, where you might want to free up resources while waiting for the remote signer to respond. The :class:`.PreparedByteRangeDigest` object returned allows you to keep track of the required state to fill the signature container at some later point in time. :param pdf_out: A PDF file writer (usually an :class:`.IncrementalPdfFileWriter`) containing the data to sign. :param existing_fields_only: If ``True``, never create a new empty signature field to contain the signature. If ``False``, a new field may be created if no field matching :attr:`~.PdfSignatureMetadata.field_name` exists. :param bytes_reserved: Bytes to reserve for the CMS object in the PDF file. If not specified, make an estimate based on a dummy signature. .. warning:: Since the CMS object is written to the output file as a hexadecimal string, you should request **twice** the (estimated) number of bytes in the DER-encoded version of the CMS object. :param appearance_text_params: Dictionary with text parameters that will be passed to the signature appearance constructor (if applicable). :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :return: A tuple containing a :class:`.PreparedByteRangeDigest` object, a :class:`.PdfTBSDocument` object and an output handle to which the document in its current state has been written. """ signing_session = self.init_signing_session( pdf_out, existing_fields_only=existing_fields_only, ) validation_info = await signing_session.perform_presign_validation( pdf_out ) if bytes_reserved is None: estimation = signing_session.estimate_signature_container_size( validation_info=validation_info, tight=self.signature_meta.tight_size_estimates, ) bytes_reserved = await estimation tbs_document = signing_session.prepare_tbs_document( validation_info=validation_info, bytes_reserved=bytes_reserved, appearance_text_params=appearance_text_params, ) prepared_br_digest, res_output = tbs_document.digest_tbs_document( in_place=in_place, chunk_size=chunk_size, output=output ) return ( prepared_br_digest, tbs_document, misc.finalise_output(output, res_output), ) def sign_pdf( self, pdf_out: BasePdfFileWriter, existing_fields_only=False, bytes_reserved=None, *, appearance_text_params=None, in_place=False, output=None, chunk_size=misc.DEFAULT_CHUNK_SIZE, ): """ .. versionchanged:: 0.9.0 Wrapper around :meth:`async_sign_pdf`. Sign a PDF file using the provided output writer. :param pdf_out: A PDF file writer (usually an :class:`.IncrementalPdfFileWriter`) containing the data to sign. :param existing_fields_only: If ``True``, never create a new empty signature field to contain the signature. If ``False``, a new field may be created if no field matching :attr:`~.PdfSignatureMetadata.field_name` exists. :param bytes_reserved: Bytes to reserve for the CMS object in the PDF file. If not specified, make an estimate based on a dummy signature. :param appearance_text_params: Dictionary with text parameters that will be passed to the signature appearance constructor (if applicable). :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :return: The output stream containing the signed data. """ result = asyncio.run( self.async_sign_pdf( pdf_out, existing_fields_only=existing_fields_only, bytes_reserved=bytes_reserved, appearance_text_params=appearance_text_params, in_place=in_place, output=output, chunk_size=chunk_size, ) ) return result async def async_sign_pdf( self, pdf_out: BasePdfFileWriter, existing_fields_only=False, bytes_reserved=None, *, appearance_text_params=None, in_place=False, output=None, chunk_size=misc.DEFAULT_CHUNK_SIZE, ): """ .. versionadded:: 0.9.0 Sign a PDF file using the provided output writer. :param pdf_out: A PDF file writer (usually an :class:`.IncrementalPdfFileWriter`) containing the data to sign. :param existing_fields_only: If ``True``, never create a new empty signature field to contain the signature. If ``False``, a new field may be created if no field matching :attr:`~.PdfSignatureMetadata.field_name` exists. :param bytes_reserved: Bytes to reserve for the CMS object in the PDF file. If not specified, make an estimate based on a dummy signature. :param appearance_text_params: Dictionary with text parameters that will be passed to the signature appearance constructor (if applicable). :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :return: The output stream containing the signed data. """ signing_session = self.init_signing_session( pdf_out, existing_fields_only=existing_fields_only, ) validation_info = await signing_session.perform_presign_validation( pdf_out ) if bytes_reserved is None: estimation = signing_session.estimate_signature_container_size( validation_info, tight=self.signature_meta.tight_size_estimates ) bytes_reserved = await estimation tbs_document = signing_session.prepare_tbs_document( validation_info=validation_info, bytes_reserved=bytes_reserved, appearance_text_params=appearance_text_params, ) prepared_br_digest, res_output = tbs_document.digest_tbs_document( in_place=in_place, chunk_size=chunk_size, output=output ) post_signing_doc = await tbs_document.perform_signature( document_digest=prepared_br_digest.document_digest, pdf_cms_signed_attrs=PdfCMSSignedAttributes( signing_time=signing_session.system_time, adobe_revinfo_attr=( None if validation_info is None else validation_info.adobe_revinfo_attr ), cades_signed_attrs=self.signature_meta.cades_signed_attr_spec, ), ) await post_signing_doc.post_signature_processing( res_output, chunk_size=chunk_size ) # we put the finalisation step after the DSS manipulations, since # otherwise we'd also run into issues with non-seekable output buffers return misc.finalise_output(output, res_output) @dataclass(frozen=True) class PreSignValidationStatus: """ .. versionadded:: 0.7.0 Container for validation data collected prior to creating a signature, e.g. for later inclusion in a document's DSS, or as a signed attribute on the signature. """ signer_path: ValidationPath """ Validation path for the signer's certificate. """ validation_paths: List[ValidationPath] """ List of other relevant validation paths. """ ts_validation_paths: Optional[List[ValidationPath]] = None """ List of validation paths relevant for embedded timestamps. """ adobe_revinfo_attr: Optional[asn1_pdf.RevocationInfoArchival] = None """ Preformatted revocation info attribute to include, if requested by the settings. """ ocsps_to_embed: List[ocsp.OCSPResponse] = field(default_factory=list) """ List of OCSP responses collected so far. """ crls_to_embed: List[crl.CertificateList] = field(default_factory=list) """ List of CRLS collected so far. """ ac_validation_paths: List[ValidationPath] = field(default_factory=list) """ List of validation paths relevant for embedded attribute certificates. """ class PdfSigningSession: """ .. versionadded:: 0.7.0 Class modelling a PDF signing session in its initial state. The ``__init__`` method is internal API, get an instance using :meth:`.PdfSigner.init_signing_session`. """ def __init__( self, pdf_signer: PdfSigner, pdf_out: BasePdfFileWriter, cms_writer, sig_field, md_algorithm: str, timestamper: Optional[TimeStamper], subfilter: SigSeedSubFilter, system_time: Optional[datetime] = None, sv_spec: Optional[SigSeedValueSpec] = None, ): self.pdf_signer = pdf_signer self.pdf_out = pdf_out self.sig_field = sig_field self.cms_writer = cms_writer self.md_algorithm = md_algorithm self.timestamper = timestamper self.subfilter = subfilter self.use_pades = subfilter == SigSeedSubFilter.PADES self.system_time = system_time or datetime.now( tz=tzlocal.get_localzone() ) self.sv_spec = sv_spec async def perform_presign_validation( self, pdf_out: Optional[BasePdfFileWriter] = None ) -> Optional[PreSignValidationStatus]: """ Perform certificate validation checks for the signer's certificate, including any necessary revocation checks. This function will also attempt to validate & collect revocation information for the relevant TSA (by requesting a dummy timestamp). :param pdf_out: Current PDF writer. Technically optional; only used to look for the end of the timestamp chain in the previous revision when producing a PAdES-LTA signature in a document that is already signed (to ensure that the timestamp chain is uninterrupted). :return: A :class:`PreSignValidationStatus` object, or ``None`` if there is no validation context available. """ pdf_signer = self.pdf_signer validation_paths = [] signature_meta = pdf_signer.signature_meta validation_context = signature_meta.validation_context if signature_meta.embed_validation_info: if self.pdf_signer.signer.signing_cert is None: raise SigningError( "A signer's certificate must be provided if " "validation/revocation info is to be embedded into the " "signature." ) elif validation_context is None: raise SigningError( "A validation context must be provided if " "validation/revocation info is to be embedded into the " "signature." ) elif not validation_context.fetching_allowed: logger.warning( "Validation/revocation info will be embedded, but " "fetching is not allowed. This may give rise to unexpected " "results." ) validation_context = signature_meta.validation_context # if there's no validation context, bail early if validation_context is None: return None signer_path = await self._perform_presign_signer_validation( validation_context, signature_meta.signer_key_usage ) validation_paths.append(signer_path) # If LTA: # if the original document already included a document timestamp, # we need to collect revocation information for it, to preserve # the integrity of the timestamp chain if signature_meta.use_pades_lta and isinstance( pdf_out, IncrementalPdfFileWriter ): prev_tsa_path = await self._perform_prev_ts_validation( validation_context, pdf_out.prev ) if prev_tsa_path is not None: validation_paths.append(prev_tsa_path) timestamper = self.timestamper # Finally, fetch validation information for the TSA that we're going to # use for our own TS if timestamper is not None: async_ts_paths = timestamper.validation_paths(validation_context) ts_paths = [] async for ts_path in async_ts_paths: validation_paths.append(ts_path) ts_paths.append(ts_path) else: ts_paths = None aa_paths: List[ValidationPath] # fetch attribute certificate validation paths if signature_meta.ac_validation_context is not None: async_aa_paths = self._perform_presign_ac_validation( signature_meta.ac_validation_context ) aa_paths = [] async for aa_path in async_aa_paths: validation_paths.append(aa_path) aa_paths.append(aa_path) else: aa_paths = [] # do we need adobe-style revocation info? if signature_meta.embed_validation_info and not self.use_pades: assert validation_context is not None # checked earlier revinfo = Signer.format_revinfo( ocsp_responses=validation_context.ocsps, crls=validation_context.crls, ) else: # PAdES prescribes another mechanism for embedding revocation info revinfo = None return PreSignValidationStatus( validation_paths=validation_paths, signer_path=signer_path, ts_validation_paths=ts_paths, adobe_revinfo_attr=revinfo, ocsps_to_embed=validation_context.ocsps, crls_to_embed=validation_context.crls, ac_validation_paths=aa_paths, ) async def _perform_presign_ac_validation( self, validation_context: ValidationContext ): signer = self.pdf_signer.signer attr_certs = list(signer.attribute_certs) cades_attr_spec = self.pdf_signer.signature_meta.cades_signed_attr_spec # also make sure to pull in the validation chains for all attribute # certificates included in the signer-attributes-v2 attr, if there is # one. if ( cades_attr_spec is not None and cades_attr_spec.signer_attributes is not None ): attr_certs.extend(cades_attr_spec.signer_attributes.certified_attrs) ac_jobs = [ async_validate_ac( ac, validation_context, holder_cert=signer.signing_cert ) for ac in attr_certs ] for ac_job in asyncio.as_completed(ac_jobs): result: ACValidationResult = await ac_job yield result.aa_path async def _perform_presign_signer_validation( self, validation_context, key_usage ): signer = self.pdf_signer.signer # validate cert # (this also keeps track of any validation data automagically) validator = CertificateValidator( signer.signing_cert, intermediate_certs=signer.cert_registry, validation_context=validation_context, ) try: signer_cert_validation_path = await validator.async_validate_usage( key_usage ) except (PathBuildingError, PathValidationError) as e: raise SigningError( "The signer's certificate could not be validated" ) from e return signer_cert_validation_path async def _perform_prev_ts_validation( self, validation_context, prev_reader ): signer = self.pdf_signer.signer from pyhanko.sign.validation import get_timestamp_chain # try to grab the most recent document timestamp last_ts = None try: last_ts = next(get_timestamp_chain(prev_reader)) except StopIteration: pass last_ts_validation_path = None if last_ts is not None: ts_validator = CertificateValidator( last_ts.signer_cert, intermediate_certs=signer.cert_registry, validation_context=validation_context, ) try: validate_coro = ts_validator.async_validate_usage( set(), extended_key_usage={"time_stamping"} ) last_ts_validation_path = await validate_coro except (PathBuildingError, PathValidationError) as e: raise SigningError( "Requested a PAdES-LTA signature on an existing " "document, but the most recent timestamp " "could not be validated.", e, ) return last_ts_validation_path def _apply_locking_rules(self) -> SigMDPSetup: # TODO allow equivalent functionality to the /Lock dictionary # to be specified in PdfSignatureMetadata # this helper method handles /Lock dictionary and certification # semantics. # The fallback rules are messy and ad-hoc; behaviour is mostly # documented by tests. # read recommendations and/or requirements from the SV dictionary sv_spec = self.sv_spec sig_field = self.sig_field signature_meta = self.pdf_signer.signature_meta if sv_spec is not None: sv_lock_lut: Dict[Any, List[MDPPerm]] = { SeedLockDocument.LOCK: [MDPPerm.NO_CHANGES], SeedLockDocument.DO_NOT_LOCK: [ MDPPerm.FILL_FORMS, MDPPerm.ANNOTATE, ], } sv_lock_values: List[MDPPerm] = sv_lock_lut.get( sv_spec.lock_document, [] ) sv_lock_value_req = sv_lock_values is not None and bool( sv_spec.flags & SigSeedValFlags.LOCK_DOCUMENT ) else: sv_lock_values = [] sv_lock_value_req = False lock = lock_dict = None # init the DocMDP value with what the /LockDocument setting in the SV # dict recommends. If the constraint is mandatory, it might conflict # with the /Lock dictionary, but we'll deal with that later. docmdp_perms = sv_lock_values[0] if sv_lock_values else None try: lock_dict = sig_field['/Lock'] lock = FieldMDPSpec.from_pdf_object(lock_dict) docmdp_value = lock_dict['/P'] docmdp_perms = MDPPerm(docmdp_value) if sv_lock_value_req and docmdp_perms not in sv_lock_values: raise SigningError( "Inconsistency in form field data. " "The field lock dictionary imposes the DocMDP policy " f"'{docmdp_perms}', but the seed value " "dictionary's /LockDocument does not allow that." ) except KeyError: pass except ValueError as e: raise SigningError("Failed to read /Lock dictionary", e) meta_perms = signature_meta.docmdp_permissions meta_certify = signature_meta.certify # only pull meta_perms into the validation if we're trying to make a # cert sig, or there already is some other docmdp_perms value available. # (in other words, if there's no SV dict or /Lock, and we're not # certifying, this will be skipped) if meta_perms is not None and ( meta_certify or docmdp_perms is not None ): if sv_lock_value_req and meta_perms not in sv_lock_values: # in this case, we have to override docmdp_perms = sv_lock_values[0] else: # choose the stricter option if both are available docmdp_perms = ( meta_perms if docmdp_perms is None else (min(docmdp_perms, meta_perms)) ) if docmdp_perms != meta_perms: logger.warning( f"DocMDP policy '{meta_perms}', was requested, " f"but the signature field settings do " f"not allow that. Setting '{docmdp_perms}' instead." ) # if not certifying and docmdp_perms is not None, ensure the # appropriate permission in the Lock dictionary is set if not meta_certify and docmdp_perms is not None: if lock_dict is None: # set a field lock that doesn't do anything sig_field['/Lock'] = lock_dict = generic.DictionaryObject( { pdf_name('/Action'): pdf_name('/Include'), pdf_name('/Fields'): generic.ArrayObject(), } ) lock_dict['/P'] = generic.NumberObject(docmdp_perms.value) return SigMDPSetup( certify=meta_certify, field_lock=lock, docmdp_perms=docmdp_perms, md_algorithm=self.md_algorithm, ) def _enforce_seed_value_constraints( self, validation_path: Optional[ValidationPath] ): sv_spec = self.sv_spec pdf_signer = self.pdf_signer signature_meta = pdf_signer.signature_meta assert sv_spec is not None # Enforce mandatory seed values (except LOCK_DOCUMENT, which is handled # elsewhere) flags: SigSeedValFlags = sv_spec.flags if sv_spec.cert is not None: if pdf_signer.signer.signing_cert is None: raise SigningError( "Cannot verify seed value constraints on the signer's " "certificate since it is not available" ) sv_spec.cert.satisfied_by( pdf_signer.signer.signing_cert, validation_path ) if sv_spec.seed_signature_type is not None: sv_certify = sv_spec.seed_signature_type.certification_signature() if sv_certify != signature_meta.certify: def _type(certify): return 'a certification' if certify else 'an approval' raise SigningError( "The seed value dictionary's /MDP entry specifies that " f"this field should contain {_type(sv_certify)} " f"signature, but {_type(signature_meta.certify)} " "was requested." ) sv_mdp_perm = sv_spec.seed_signature_type.mdp_perm if sv_certify and sv_mdp_perm != signature_meta.docmdp_permissions: raise SigningError( "The seed value dictionary specified that this " "certification signature should use the MDP policy " f"'{sv_mdp_perm}', " f"but '{signature_meta.docmdp_permissions}' was " "requested." ) if not flags: return sv_spec selected_sf = signature_meta.subfilter if ( flags & SigSeedValFlags.SUBFILTER ) and sv_spec.subfilters is not None: # empty array = no supported subfilters if not sv_spec.subfilters: raise NotImplementedError( "The signature encodings mandated by the seed value " "dictionary are not supported." ) # standard mandates that we take the first available subfilter mandated_sf: SigSeedSubFilter = sv_spec.subfilters[0] if selected_sf is not None and mandated_sf != selected_sf: raise SigningError( "The seed value dictionary mandates subfilter '%s', " "but '%s' was requested." % (mandated_sf.value, selected_sf.value) ) # SV dict serves as a source of defaults as well if selected_sf is None and sv_spec.subfilters is not None: selected_sf = sv_spec.subfilters[0] if ( flags & SigSeedValFlags.APPEARANCE_FILTER ) and sv_spec.appearance is not None: raise SigningError( "pyHanko does not define any named appearances, but " "the seed value dictionary requires that the named appearance " f"'{sv_spec.appearance}' be used." ) if ( flags & SigSeedValFlags.ADD_REV_INFO ) and sv_spec.add_rev_info is not None: if sv_spec.add_rev_info != signature_meta.embed_validation_info: raise SigningError( "The seed value dict mandates that revocation info %sbe " "added; adjust PdfSignatureMetadata settings accordingly." % ("" if sv_spec.add_rev_info else "not ") ) if ( sv_spec.add_rev_info and selected_sf != SigSeedSubFilter.ADOBE_PKCS7_DETACHED ): raise SigningError( "The seed value dict mandates that Adobe-style revocation " "info be added; this requires subfilter '%s'" % (SigSeedSubFilter.ADOBE_PKCS7_DETACHED.value) ) if ( flags & SigSeedValFlags.DIGEST_METHOD ) and sv_spec.digest_methods is not None: selected_md = pdf_signer.default_md_for_signer if selected_md is not None: selected_md = selected_md.lower() if selected_md not in sv_spec.digest_methods: raise SigningError( "The selected message digest %s is not allowed by the " "seed value dictionary. Please select one of %s." % (selected_md, ", ".join(sv_spec.digest_methods)) ) if flags & SigSeedValFlags.REASONS: reasons = sv_spec.reasons # standard says that omission of the /Reasons key amounts to # a prohibition in this case must_omit = not reasons or reasons == ["."] reason_given = signature_meta.reason if must_omit and reason_given is not None: raise SigningError( "The seed value dictionary prohibits giving a reason " "for signing." ) if not must_omit and reasons and reason_given not in reasons: raise SigningError( "Reason \"%s\" is not a valid reason for signing, " "please choose one of the following: %s." % ( reason_given, ", ".join("\"%s\"" % s for s in reasons), ) ) async def estimate_signature_container_size( self, validation_info: Optional[PreSignValidationStatus], tight=False ): md_algorithm = self.md_algorithm signature_meta = self.pdf_signer.signature_meta signer = self.pdf_signer.signer if signer.signing_cert is None: raise SigningError( "Automatic signature size estimation is not available without " "a signer's certificate. Space must be allocated manually " "using bytes_reserved=..." ) # estimate bytes_reserved by creating a fake CMS object md_spec = get_pyca_cryptography_hash(md_algorithm) test_md = hashes.Hash(md_spec).finalize() signed_attrs = PdfCMSSignedAttributes( signing_time=self.system_time, adobe_revinfo_attr=( None if validation_info is None else validation_info.adobe_revinfo_attr ), cades_signed_attrs=signature_meta.cades_signed_attr_spec, ) test_signature_cms = await signer.async_sign( test_md, md_algorithm, use_pades=self.use_pades, dry_run=True, timestamper=self.timestamper, signed_attr_settings=signed_attrs, ) # Note: multiply by 2 to account for the fact that this byte dump # will be embedded into the resulting PDF as a hexadecimal # string test_len = len(test_signature_cms.dump()) * 2 if tight: bytes_reserved = test_len else: # External actors such as timestamping servers can't be relied on to # always return exactly the same response, so we build in a 50% # error margin (+ ensure that bytes_reserved is even) bytes_reserved = test_len + 2 * (test_len // 4) return bytes_reserved def prepare_tbs_document( self, validation_info: Optional[PreSignValidationStatus], bytes_reserved, appearance_text_params=None, ) -> 'PdfTBSDocument': """ Set up the signature appearance (if necessary) and signature dictionary in the PDF file, to put the document in its final pre-signing state. :param validation_info: Validation information collected prior to signing. :param bytes_reserved: Bytes to reserve for the signature container. :param appearance_text_params: Optional text parameters for the signature appearance content. :return: A :class:`.PdfTBSDocument` describing the document in its final pre-signing state. """ pdf_signer = self.pdf_signer signature_meta = self.pdf_signer.signature_meta if self.sv_spec is not None: # process the field's seed value constraints self._enforce_seed_value_constraints( None if validation_info is None else validation_info.signer_path ) signer = pdf_signer.signer embed_roots = signer.embed_roots # take care of DSS updates, if they have to happen now dss_settings = signature_meta.dss_settings if self.use_pades and validation_info is not None: # Check consistency of settings dss_settings.assert_viable() if ( dss_settings.placement == SigDSSPlacementPreference.TOGETHER_WITH_SIGNATURE ): from pyhanko.sign import validation pdf_out = self.pdf_out # source info directly from the validation_info object # for consistency # NOTE: we have to disable VRI in this scenario validation.DocumentSecurityStore.supply_dss_in_writer( pdf_out, sig_contents=None, paths=validation_info.validation_paths, ocsps=validation_info.ocsps_to_embed, crls=validation_info.crls_to_embed, embed_roots=embed_roots, ) md_algorithm = self.md_algorithm sig_mdp_setup = self._apply_locking_rules() # Prepare instructions to the CMS writer to set up the # (PDF) signature object and its appearance system_time = self.system_time name_specified = signature_meta.name sig_appearance = SigAppearanceSetup( style=pdf_signer.stamp_style, name=name_specified or signer.subject_name, timestamp=system_time, text_params=appearance_text_params, ) sig_obj = SignatureObject( bytes_reserved=bytes_reserved, subfilter=self.subfilter, timestamp=system_time, name=name_specified if name_specified else None, location=signature_meta.location, reason=signature_meta.reason, contact_info=signature_meta.contact_info, prop_auth_time=signature_meta.prop_auth_time, prop_auth_type=signature_meta.prop_auth_type, app_build_props=signature_meta.app_build_props, ) # Pass in the SignatureObject settings self.cms_writer.send( SigObjSetup( sig_placeholder=sig_obj, mdp_setup=sig_mdp_setup, appearance_setup=sig_appearance, ) ) # At this point, the document is in its final pre-signing state # Last job: prepare instructions for the post-signing workflow signature_meta = pdf_signer.signature_meta validation_context = signature_meta.validation_context post_signing_instr = doc_timestamper = None if self.use_pades and signature_meta.embed_validation_info: if validation_info is None: raise SigningError( "Requested embedding of validation info, " "but none was collected" ) if signature_meta.use_pades_lta: doc_timestamper = self.timestamper # if necessary/supported, extract a file access credential # to perform post-signing operations later credential_ser: Optional[SerialisedCredential] = None if self.pdf_out.security_handler is not None: credential = self.pdf_out.security_handler.extract_credential() if credential is not None: credential_ser = credential.serialise() post_signing_instr = PostSignInstructions( validation_info=validation_info, # use the same algorithm # TODO make this configurable? Some TSAs only allow one choice # of MD, and forcing our signers to use the same one to handle # might be overly restrictive (esp. for things like EdDSA where # the MD is essentially fixed) timestamp_md_algorithm=md_algorithm, timestamper=doc_timestamper, timestamp_field_name=signature_meta.timestamp_field_name, dss_settings=signature_meta.dss_settings, tight_size_estimates=signature_meta.tight_size_estimates, embed_roots=embed_roots, file_credential=credential_ser, ) return PdfTBSDocument( cms_writer=self.cms_writer, signer=pdf_signer.signer, md_algorithm=md_algorithm, timestamper=self.timestamper, use_pades=self.use_pades, post_sign_instructions=post_signing_instr, validation_context=validation_context, ) @dataclass(frozen=True) class PostSignInstructions: """ .. versionadded:: 0.7.0 Container class housing instructions for incremental updates to the document after the signature has been put in place. Necessary for PAdES-LT and PAdES-LTA workflows. """ validation_info: PreSignValidationStatus """ Validation information to embed in the DSS (if not already present). """ timestamper: Optional[TimeStamper] = None """ Timestamper to use for produce document timestamps. If ``None``, no timestamp will be added. """ timestamp_md_algorithm: Optional[str] = None """ Digest algorithm to use when producing timestamps. Defaults to :const:`~pyhanko.sign.signers.constants.DEFAULT_MD`. """ timestamp_field_name: Optional[str] = None """ Name of the timestamp field to use. If not specified, a field name will be generated. """ dss_settings: DSSContentSettings = DSSContentSettings() """ .. versionadded:: 0.8.0 Settings to fine-tune DSS generation. """ tight_size_estimates: bool = False """ .. versionadded:: 0.8.0 When estimating the size of a document timestamp container, do not add safety margins. .. note:: External TSAs cannot be relied upon to always produce the exact same output length, which makes this option risky to use. """ embed_roots: bool = True """ .. versionadded:: 0.9.0 Option that controls whether the root certificate of each validation path should be embedded into the DSS. The default is ``True``. .. note:: Trust roots are configured by the validator, so embedding them typically does nothing in a typical validation process. Therefore they can be safely omitted in most cases. Nonetheless, embedding the roots can be useful for documentation purposes. .. note:: This setting is not part of :class:`.DSSContentSettings` because its value is taken from the corresponding property on the :class:`.Signer` involved, not from the initial configuration. """ file_credential: Optional[SerialisedCredential] = None """ .. versionadded:: 0.13.0 Serialised file credential, to update encrypted files. """ class PdfTBSDocument: """ .. versionadded:: 0.7.0 A PDF document in its final pre-signing state. The ``__init__`` method is internal API, get an instance using :meth:`.PdfSigningSession.prepare_tbs_document`. Alternatively, use :meth:`resume_signing` or :meth:`finish_signing` to continue a previously interrupted signing process without instantiating a new :class:`.PdfTBSDocument` object. """ def __init__( self, cms_writer, signer: Signer, md_algorithm: str, use_pades: bool, timestamper: Optional[TimeStamper] = None, post_sign_instructions: Optional[PostSignInstructions] = None, validation_context: Optional[ValidationContext] = None, ): self.cms_writer = cms_writer self.signer = signer self.md_algorithm = md_algorithm self.timestamper = timestamper self.use_pades = use_pades self.post_sign_instructions = post_sign_instructions self.validation_context = validation_context def digest_tbs_document( self, *, output: Optional[IO] = None, in_place: bool = False, chunk_size=misc.DEFAULT_CHUNK_SIZE, ) -> Tuple[PreparedByteRangeDigest, IO]: """ Write the document to an output stream and compute the digest, while keeping track of the (future) location of the signature contents in the output stream. The digest can then be passed to the next part of the signing pipeline. .. warning:: This method can only be called once. :param output: Write the output to the specified output stream. If ``None``, write to a new :class:`.BytesIO` object. Default is ``None``. :param in_place: Sign the original input stream in-place. This parameter overrides ``output``. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. :return: A tuple containing a :class:`.PreparedByteRangeDigest` and the output stream to which the output was written. """ # pass in I/O parameters, get back a hash return self.cms_writer.send( SigIOSetup( md_algorithm=self.md_algorithm, in_place=in_place, chunk_size=chunk_size, output=output, ) ) async def perform_signature( self, document_digest: bytes, pdf_cms_signed_attrs: PdfCMSSignedAttributes, ) -> 'PdfPostSignatureDocument': """ Perform the relevant cryptographic signing operations on the document digest, and write the resulting CMS object to the appropriate location in the output stream. .. warning:: This method can only be called once, and must be invoked after :meth:`digest_tbs_document`. :param document_digest: Digest of the document, as computed over the relevant ``/ByteRange``. :param pdf_cms_signed_attrs: Description of the signed attributes to include. :return: A :class:`.PdfPostSignatureDocument` object. """ signer = self.signer signature_cms = await signer.async_sign( document_digest, self.md_algorithm, use_pades=self.use_pades, timestamper=self.timestamper, signed_attr_settings=pdf_cms_signed_attrs, ) # ... and feed it to the CMS writer sig_contents = self.cms_writer.send(signature_cms) return PdfPostSignatureDocument( sig_contents, post_sign_instr=self.post_sign_instructions, validation_context=self.validation_context, ) @classmethod def resume_signing( cls, output: IO, prepared_digest: PreparedByteRangeDigest, signature_cms: Union[bytes, cms.ContentInfo], post_sign_instr: Optional[PostSignInstructions] = None, validation_context: Optional[ValidationContext] = None, ) -> 'PdfPostSignatureDocument': """ Resume signing after obtaining a CMS object from an external source. This is a class method; it doesn't require a :class:`.PdfTBSDocument` instance. Contrast with :meth:`perform_signature`. :param output: Output stream housing the document in its final pre-signing state. This stream must at least be writable and seekable, and also readable if post-signature processing is required. :param prepared_digest: The prepared digest returned by a prior call to :meth:`digest_tbs_document`. :param signature_cms: CMS object to embed in the signature dictionary. :param post_sign_instr: Instructions for post-signing processing (DSS updates and document timestamps). :param validation_context: Validation context to use in post-signing operations. This is mainly intended for TSA certificate validation, but it can also contain additional validation data to embed in the DSS. :return: A :class:`PdfPostSignatureDocument`. """ sig_contents = prepared_digest.fill_with_cms(output, signature_cms) return PdfPostSignatureDocument( sig_contents, post_sign_instr=post_sign_instr, validation_context=validation_context, ) @classmethod def finish_signing( cls, output: IO, prepared_digest: PreparedByteRangeDigest, signature_cms: Union[bytes, cms.ContentInfo], post_sign_instr: Optional[PostSignInstructions] = None, validation_context: Optional[ValidationContext] = None, chunk_size=misc.DEFAULT_CHUNK_SIZE, ): """ Finish signing after obtaining a CMS object from an external source, and perform any required post-signature processing. This is a class method; it doesn't require a :class:`.PdfTBSDocument` instance. Contrast with :meth:`perform_signature`. :param output: Output stream housing the document in its final pre-signing state. :param prepared_digest: The prepared digest returned by a prior call to :meth:`digest_tbs_document`. :param signature_cms: CMS object to embed in the signature dictionary. :param post_sign_instr: Instructions for post-signing processing (DSS updates and document timestamps). :param validation_context: Validation context to use in post-signing operations. This is mainly intended for TSA certificate validation, but it can also contain additional validation data to embed in the DSS. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. """ asyncio.run( cls.async_finish_signing( output, prepared_digest, signature_cms, post_sign_instr=post_sign_instr, validation_context=validation_context, chunk_size=chunk_size, ) ) @classmethod async def async_finish_signing( cls, output: IO, prepared_digest: PreparedByteRangeDigest, signature_cms: Union[bytes, cms.ContentInfo], post_sign_instr: Optional[PostSignInstructions] = None, validation_context: Optional[ValidationContext] = None, chunk_size=misc.DEFAULT_CHUNK_SIZE, ): """ Finish signing after obtaining a CMS object from an external source, and perform any required post-signature processing. This is a class method; it doesn't require a :class:`.PdfTBSDocument` instance. Contrast with :meth:`perform_signature`. :param output: Output stream housing the document in its final pre-signing state. :param prepared_digest: The prepared digest returned by a prior call to :meth:`digest_tbs_document`. :param signature_cms: CMS object to embed in the signature dictionary. :param post_sign_instr: Instructions for post-signing processing (DSS updates and document timestamps). :param validation_context: Validation context to use in post-signing operations. This is mainly intended for TSA certificate validation, but it can also contain additional validation data to embed in the DSS. :param chunk_size: Size of the internal buffer (in bytes) used to feed data to the message digest function if the input stream does not support ``memoryview``. """ # TODO at this point, the output stream no longer needs to be readable, # just seekable, unless there's a timestamp requirement. # Might want to factor that out for speed at some point. rw_output = misc.prepare_rw_output_stream(output) post_sign = cls.resume_signing( rw_output, prepared_digest=prepared_digest, signature_cms=signature_cms, post_sign_instr=post_sign_instr, validation_context=validation_context, ) await post_sign.post_signature_processing( rw_output, chunk_size=chunk_size ) class PdfPostSignatureDocument: """ .. versionadded:: 0.7.0 Represents the final phase of the PDF signing process """ def __init__( self, sig_contents: bytes, post_sign_instr: Optional[PostSignInstructions] = None, validation_context: Optional[ValidationContext] = None, ): self.sig_contents = sig_contents self.post_sign_instructions = post_sign_instr self.validation_context = validation_context async def post_signature_processing( self, output: IO, chunk_size=misc.DEFAULT_CHUNK_SIZE ): """ Handle DSS updates and LTA timestamps, if applicable. :param output: I/O buffer containing the signed document. Must support reading, writing and seeking. :param chunk_size: Chunk size to use for I/O operations that do not support the buffer protocol. """ instr = self.post_sign_instructions if instr is None: return validation_context = self.validation_context validation_info = instr.validation_info dss_settings = instr.dss_settings from pyhanko.sign import validation # If we're resuming a signing operation, the (new) validation context # might not have all relevant OCSP responses / CRLs available. # Hence why we also pass in the data from the pre-signing check. # The DSS handling code will deal with deduplication. dss_op_kwargs: Dict[str, Any] dss_op_kwargs = dict( paths=validation_info.validation_paths, validation_context=validation_context, ocsps=validation_info.ocsps_to_embed, crls=validation_info.crls_to_embed, embed_roots=instr.embed_roots, ) if dss_settings.include_vri: dss_op_kwargs['sig_contents'] = self.sig_contents else: dss_op_kwargs['sig_contents'] = None timestamper = instr.timestamper # Separate DSS revision if no TS that would otherwise be bundled with it # or explicitly requested as separate dss_placement = dss_settings.placement separate_dss_revision = False if dss_placement == SigDSSPlacementPreference.SEPARATE_REVISION: separate_dss_revision = True elif dss_placement == SigDSSPlacementPreference.TOGETHER_WITH_NEXT_TS: separate_dss_revision = timestamper is None if separate_dss_revision: if not dss_settings.skip_if_unneeded: dss_op_kwargs['force_write'] = True validation.DocumentSecurityStore.add_dss( output_stream=output, **dss_op_kwargs, file_credential=instr.file_credential, ) if timestamper is not None: # append a document timestamp after the DSS update w = IncrementalPdfFileWriter(output) if ( w.security_handler is not None and instr.file_credential is not None ): w.security_handler.authenticate(instr.file_credential) # we let the SH throw errors on access as necessary pdf_timestamper = PdfTimeStamper( timestamper, field_name=instr.timestamp_field_name ) if dss_placement == SigDSSPlacementPreference.TOGETHER_WITH_NEXT_TS: validation.DocumentSecurityStore.supply_dss_in_writer( w, **dss_op_kwargs ) await pdf_timestamper.async_timestamp_pdf( w, instr.timestamp_md_algorithm or constants.DEFAULT_MD, validation_context, validation_paths=validation_info.validation_paths, in_place=True, timestamper=timestamper, chunk_size=chunk_size, dss_settings=dss_settings.get_settings_for_ts(), tight_size_estimates=instr.tight_size_estimates, embed_roots=instr.embed_roots, )