338 lines
9.4 KiB
Python
338 lines
9.4 KiB
Python
import binascii
|
|
import uuid
|
|
from enum import Enum
|
|
from typing import Optional
|
|
|
|
from .generic import (
|
|
DictionaryObject,
|
|
NameObject,
|
|
PdfObject,
|
|
StreamObject,
|
|
pdf_name,
|
|
)
|
|
from .layout import BoxConstraints
|
|
from .reader import PdfFileReader
|
|
from .writer import BasePdfFileWriter
|
|
|
|
__all__ = [
|
|
'ResourceType',
|
|
'ResourceManagementError',
|
|
'PdfResources',
|
|
'PdfContent',
|
|
'RawContent',
|
|
'ImportedPdfPage',
|
|
]
|
|
|
|
# TODO have the merge_resources helper in incremental_writer rely on some
|
|
# of the idioms established here
|
|
|
|
|
|
class ResourceType(Enum):
|
|
"""
|
|
Enum listing resources that can be used as keys in a resource dictionary.
|
|
|
|
See ISO 32000-1, § 7.8.3 Table 34.
|
|
"""
|
|
|
|
EXT_G_STATE = pdf_name('/ExtGState')
|
|
"""
|
|
External graphics state specifications.
|
|
See ISO 32000-1, § 8.4.5.
|
|
"""
|
|
|
|
COLOR_SPACE = pdf_name('/ColorSpace')
|
|
"""
|
|
Colour space definitions.
|
|
See ISO 32000-1, § 8.6.
|
|
"""
|
|
|
|
PATTERN = pdf_name('/Pattern')
|
|
"""
|
|
Pattern definitions.
|
|
See ISO 32000-1, § 8.7.
|
|
"""
|
|
|
|
SHADING = pdf_name('/Shading')
|
|
"""
|
|
Shading definitions.
|
|
See ISO 32000-1, § 8.7.4.3.
|
|
"""
|
|
|
|
XOBJECT = pdf_name('/XObject')
|
|
"""
|
|
External object definitions (images and form XObjects).
|
|
See ISO 32000-1, § 8.8.
|
|
"""
|
|
|
|
FONT = pdf_name('/Font')
|
|
"""
|
|
Font specifications.
|
|
See ISO 32000-1, § 9.
|
|
"""
|
|
|
|
PROPERTIES = pdf_name('/Properties')
|
|
"""
|
|
Marked content properties.
|
|
See ISO 32000-1, § 14.6.2.
|
|
"""
|
|
|
|
|
|
class ResourceManagementError(ValueError):
|
|
"""
|
|
Used to signal problems with resource dictionaries.
|
|
"""
|
|
|
|
pass
|
|
|
|
|
|
def _res_merge_helper(dict1, dict2):
|
|
for k, v2 in dict2.items():
|
|
if k in dict1:
|
|
raise ResourceManagementError(
|
|
f"Resource with name {k} occurs in both dictionaries."
|
|
)
|
|
dict1[k] = v2
|
|
return dict1
|
|
|
|
|
|
class PdfResources:
|
|
"""
|
|
Representation of a PDF resource dictionary.
|
|
|
|
This class implements :meth:`__getitem__` with :class:`.ResourceType` keys
|
|
for dynamic access to its attributes.
|
|
To merge two instances of :class:`.PdfResources` into one another,
|
|
the class overrides :meth:`__iadd__`, so you can write.
|
|
|
|
.. code-block:: python
|
|
|
|
res1 += res2
|
|
|
|
*Note:* Merging two resource dictionaries with conflicting resource names
|
|
will produce a :class:`.ResourceManagementError`.
|
|
|
|
*Note:* This class is currently only used for new resource dictionaries.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.ext_g_state = DictionaryObject()
|
|
self.color_space = DictionaryObject()
|
|
self.pattern = DictionaryObject()
|
|
self.shading = DictionaryObject()
|
|
self.xobject = DictionaryObject()
|
|
self.font = DictionaryObject()
|
|
self.properties = DictionaryObject()
|
|
|
|
def __getitem__(self, item: ResourceType):
|
|
return getattr(self, item.name.lower())
|
|
|
|
def as_pdf_object(self) -> DictionaryObject:
|
|
"""
|
|
Render this instance of :class:`.PdfResources` to an actual resource
|
|
dictionary.
|
|
"""
|
|
|
|
def _gen():
|
|
for k in ResourceType:
|
|
val = self[k]
|
|
if val:
|
|
yield k.value, val
|
|
|
|
return DictionaryObject({k: v for k, v in _gen()})
|
|
|
|
def __iadd__(self, other):
|
|
"""
|
|
Merge another resource dictionary into this one.
|
|
:param other:
|
|
Another instance of :class:`.PdfResources`
|
|
:return:
|
|
Always returns ``self``
|
|
:raises ResourceManagementError:
|
|
Raised when there is a resource name conflict.
|
|
"""
|
|
for k in ResourceType:
|
|
_res_merge_helper(self[k], other[k])
|
|
return self
|
|
|
|
|
|
class PdfContent:
|
|
"""
|
|
Abstract representation of part of a PDF content stream.
|
|
|
|
.. warning::
|
|
|
|
Whether :class:`.PdfContent` instances can be reused or not
|
|
is left up to the subclasses.
|
|
"""
|
|
|
|
writer = None
|
|
"""
|
|
The :meth:`__init__` method comes with an optional ``writer``
|
|
parameter that can be used to let subclasses register external resources
|
|
with the writer by themselves.
|
|
|
|
It can also be set after the fact by calling :meth:`set_writer`.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
resources: Optional[PdfResources] = None,
|
|
box: Optional[BoxConstraints] = None,
|
|
writer: Optional[BasePdfFileWriter] = None,
|
|
):
|
|
self._resources: PdfResources = resources or PdfResources()
|
|
self.box: BoxConstraints = box or BoxConstraints()
|
|
self.writer = writer
|
|
|
|
@property
|
|
def _ensure_writer(self) -> BasePdfFileWriter:
|
|
if self.writer is None:
|
|
raise ValueError("PDF writer is not set")
|
|
return self.writer
|
|
|
|
# TODO support a set-if-not-taken mechanism, that suggests alternative names
|
|
# if necessary.
|
|
def set_resource(
|
|
self, category: ResourceType, name: NameObject, value: PdfObject
|
|
):
|
|
"""Set a value in the resource dictionary associated with this content
|
|
fragment.
|
|
|
|
:param category:
|
|
The resource category to which the resource belongs.
|
|
:param name:
|
|
The resource's (internal) name.
|
|
:param value:
|
|
The resource's value.
|
|
"""
|
|
self._resources[category][name] = value
|
|
|
|
def import_resources(self, resources: PdfResources):
|
|
"""Import resources from another resource dictionary.
|
|
|
|
:param resources:
|
|
An instance of :class:`.PdfResources`.
|
|
:raises ResourceManagementError:
|
|
Raised when there is a resource name conflict.
|
|
"""
|
|
self._resources += resources
|
|
|
|
@property
|
|
def resources(self) -> PdfResources:
|
|
"""
|
|
:return:
|
|
The :class:`.PdfResources` instance associated with this
|
|
content fragment.
|
|
"""
|
|
return self._resources
|
|
|
|
def render(self) -> bytes:
|
|
"""
|
|
Compile the content to graphics operators.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
# TODO allow the bounding box to be overridden/refitted
|
|
# (using matrix transforms)
|
|
def as_form_xobject(self) -> StreamObject:
|
|
"""
|
|
Render the object to a form XObject to be referenced by another
|
|
content stream. See ISO 32000-1, § 8.8.
|
|
|
|
*Note:* Even if :attr:`writer` is set, the resulting form XObject will
|
|
not be registered. This is left up to the caller.
|
|
|
|
:return:
|
|
A :class:`~.generic.StreamObject` instance representing
|
|
the resulting form XObject.
|
|
"""
|
|
from pyhanko.pdf_utils.writer import init_xobject_dictionary
|
|
|
|
command_stream = self.render()
|
|
return init_xobject_dictionary(
|
|
command_stream=command_stream,
|
|
box_width=self.box.width,
|
|
box_height=self.box.height,
|
|
resources=self._resources.as_pdf_object(),
|
|
)
|
|
|
|
def set_writer(self, writer):
|
|
"""
|
|
Override the currently registered writer object.
|
|
|
|
:param writer:
|
|
An instance of :class:`~.writer.BasePdfFileWriter`.
|
|
"""
|
|
self.writer = writer
|
|
|
|
def add_to_page(
|
|
self, writer: BasePdfFileWriter, page_ix: int, prepend: bool = False
|
|
):
|
|
"""
|
|
Convenience wrapper around :meth:`.BasePdfFileWriter.add_stream_to_page`
|
|
to turn a :class:`.PdfContent` instance into a page
|
|
content stream.
|
|
|
|
:param writer:
|
|
A PDF file writer.
|
|
:param page_ix:
|
|
Index of the page to modify.
|
|
The first page has index `0`.
|
|
:param prepend:
|
|
Prepend the content stream to the list of content streams, as
|
|
opposed to appending it to the end.
|
|
This has the effect of causing the stream to be rendered
|
|
underneath the already existing content on the page.
|
|
:return:
|
|
An :class:`~.generic.IndirectObject` reference to the page object
|
|
that was modified.
|
|
"""
|
|
as_stream = StreamObject({}, stream_data=self.render())
|
|
return writer.add_stream_to_page(
|
|
page_ix,
|
|
writer.add_object(as_stream),
|
|
resources=self.resources.as_pdf_object(),
|
|
prepend=prepend,
|
|
)
|
|
|
|
|
|
class RawContent(PdfContent):
|
|
"""Raw byte sequence to be used as PDF content."""
|
|
|
|
def __init__(
|
|
self,
|
|
data: bytes,
|
|
resources: Optional[PdfResources] = None,
|
|
box: Optional[BoxConstraints] = None,
|
|
):
|
|
super().__init__(resources, box)
|
|
self.data = data
|
|
|
|
def render(self) -> bytes:
|
|
return self.data
|
|
|
|
|
|
class ImportedPdfPage(PdfContent):
|
|
"""Import a page from another PDF file (lazily)"""
|
|
|
|
def __init__(self, file_name, page_ix=0):
|
|
self.file_name = file_name
|
|
self.page_ix = page_ix
|
|
super().__init__()
|
|
|
|
def render(self) -> bytes:
|
|
from .writer import BasePdfFileWriter
|
|
|
|
w: BasePdfFileWriter = self._ensure_writer
|
|
with open(self.file_name, 'rb') as inf:
|
|
r = PdfFileReader(inf)
|
|
xobj = w.import_page_as_xobject(r, page_ix=self.page_ix)
|
|
resource_name = b'/Import' + binascii.hexlify(uuid.uuid4().bytes)
|
|
self.resources.xobject[resource_name.decode('ascii')] = xobj
|
|
|
|
# make sure to take the bounding box (i.e. the page's MediaBox)
|
|
# into account when doing layout computations
|
|
x1, y1, x2, y2 = xobj.get_object()['/BBox']
|
|
self.box = BoxConstraints(width=abs(x1 - x2), height=abs(y1 - y2))
|
|
return resource_name + b' Do'
|