225 lines
8.7 KiB
Python
225 lines
8.7 KiB
Python
import struct
|
|
from hashlib import md5
|
|
from typing import Union
|
|
|
|
from pyhanko.pdf_utils import generic
|
|
from pyhanko.pdf_utils.crypt._util import rc4_encrypt
|
|
|
|
_encryption_padding = (
|
|
b'\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56'
|
|
b'\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c'
|
|
b'\xa9\xfe\x64\x53\x69\x7a'
|
|
)
|
|
|
|
|
|
def derive_legacy_file_key(
|
|
password, rev, keylen, owner_entry, p_entry, id1_entry, encrypt_metadata
|
|
):
|
|
"""
|
|
Implementation of algorithm 3.2 of the PDF standard security handler,
|
|
section 3.5.2 of the PDF 1.6 reference.
|
|
"""
|
|
|
|
# 1. Pad or truncate the password string to exactly 32 bytes. If the
|
|
# password string is more than 32 bytes long, use only its first 32 bytes;
|
|
# if it is less than 32 bytes long, pad it by appending the required number
|
|
# of additional bytes from the beginning of the padding string
|
|
# (_encryption_padding).
|
|
password = (password + _encryption_padding)[:32]
|
|
# 2. Initialize the MD5 hash function and pass the result of step 1 as
|
|
# input to this function.
|
|
# NOTE: Suppress LGTM warning here, we have to do what the spec says
|
|
m = md5(password) # lgtm
|
|
# 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
|
|
# function.
|
|
m.update(owner_entry)
|
|
# 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass
|
|
# these bytes to the MD5 hash function, low-order byte first.
|
|
p_entry = struct.pack('<i', p_entry)
|
|
m.update(p_entry)
|
|
# 5. Pass the first element of the file's file identifier array to the MD5
|
|
# hash function.
|
|
m.update(id1_entry)
|
|
# 6. (Revision 4 or greater) If document metadata is not being encrypted,
|
|
# pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function.
|
|
if rev >= 4 and not encrypt_metadata:
|
|
m.update(b"\xff\xff\xff\xff")
|
|
# 7. Finish the hash.
|
|
md5_hash = m.digest()
|
|
# 8. (Revision 3 or greater) Do the following 50 times: Take the output
|
|
# from the previous MD5 hash and pass the first n bytes of the output as
|
|
# input into a new MD5 hash, where n is the number of bytes of the
|
|
# encryption key as defined by the value of the encryption dictionary's
|
|
# /Length entry.
|
|
if rev >= 3:
|
|
for i in range(50):
|
|
md5_hash = md5(md5_hash[:keylen]).digest()
|
|
# 9. Set the encryption key to the first n bytes of the output from the
|
|
# final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
|
|
# greater, depends on the value of the encryption dictionary's /Length
|
|
# entry.
|
|
return md5_hash[:keylen]
|
|
|
|
|
|
def legacy_normalise_pw(password: Union[str, bytes]) -> bytes:
|
|
if isinstance(password, str):
|
|
return generic.encode_pdfdocencoding(password[:32])
|
|
else:
|
|
return password[:32]
|
|
|
|
|
|
def compute_o_value_legacy(owner_pwd, user_pwd, rev, keylen):
|
|
"""
|
|
Implementation of algorithm 3.3 of the PDF standard security handler,
|
|
section 3.5.2 of the PDF 1.6 reference.
|
|
"""
|
|
|
|
# steps 1 - 4
|
|
key = compute_o_value_legacy_prep(owner_pwd, rev, keylen)
|
|
# 5. Pad or truncate the user password string as described in step 1 of
|
|
# algorithm 3.2.
|
|
user_pwd = (user_pwd + _encryption_padding)[:32]
|
|
# 6. Encrypt the result of step 5, using an RC4 encryption function with
|
|
# the encryption key obtained in step 4.
|
|
val = rc4_encrypt(key, user_pwd)
|
|
# 7. (Revision 3 or greater) Do the following 19 times: Take the output
|
|
# from the previous invocation of the RC4 function and pass it as input to
|
|
# a new invocation of the function; use an encryption key generated by
|
|
# taking each byte of the encryption key obtained in step 4 and performing
|
|
# an XOR operation between that byte and the single-byte value of the
|
|
# iteration counter (from 1 to 19).
|
|
if rev >= 3:
|
|
for i in range(1, 20):
|
|
new_key = bytes(b ^ i for b in key)
|
|
val = rc4_encrypt(new_key, val)
|
|
# 8. Store the output from the final invocation of the RC4 as the value of
|
|
# the /O entry in the encryption dictionary.
|
|
return val
|
|
|
|
|
|
def compute_o_value_legacy_prep(password, rev, keylen):
|
|
"""
|
|
Steps 1-4 of algorithm 3.3
|
|
"""
|
|
# 1. Pad or truncate the owner password string as described in step 1 of
|
|
# algorithm 3.2. If there is no owner password, use the user password
|
|
# instead.
|
|
password = (password + _encryption_padding)[:32]
|
|
# 2. Initialize the MD5 hash function and pass the result of step 1 as
|
|
# input to this function.
|
|
# NOTE: Suppress LGTM warning here, we have to do what the spec says
|
|
m = md5(password) # lgtm
|
|
# 3. (Revision 3 or greater) Do the following 50 times: Take the output
|
|
# from the previous MD5 hash and pass it as input into a new MD5 hash.
|
|
md5_hash = m.digest()
|
|
if rev >= 3:
|
|
for i in range(50):
|
|
md5_hash = md5(md5_hash).digest()
|
|
# 4. Create an RC4 encryption key using the first n bytes of the output
|
|
# from the final MD5 hash, where n is always 5 for revision 2 but, for
|
|
# revision 3 or greater, depends on the value of the encryption
|
|
# dictionary's /Length entry.
|
|
key = md5_hash[:keylen]
|
|
return key
|
|
|
|
|
|
def compute_u_value_r2(password, owner_entry, p_entry, id1_entry):
|
|
"""
|
|
Implementation of algorithm 3.4 of the PDF standard security handler,
|
|
section 3.5.2 of the PDF 1.6 reference.
|
|
"""
|
|
# 1. Create an encryption key based on the user password string, as
|
|
# described in algorithm 3.2.
|
|
key = derive_legacy_file_key(
|
|
password, 2, 5, owner_entry, p_entry, id1_entry, encrypt_metadata=True
|
|
)
|
|
# 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
|
|
# using an RC4 encryption function with the encryption key from the
|
|
# preceding step.
|
|
u = rc4_encrypt(key, _encryption_padding)
|
|
# 3. Store the result of step 2 as the value of the /U entry in the
|
|
# encryption dictionary.
|
|
return u, key
|
|
|
|
|
|
def compute_u_value_r34(
|
|
password,
|
|
rev,
|
|
keylen,
|
|
owner_entry,
|
|
p_entry,
|
|
id1_entry,
|
|
encrypt_metadata: bool,
|
|
):
|
|
"""
|
|
Implementation of algorithm 3.4 of the PDF standard security handler,
|
|
section 3.5.2 of the PDF 1.6 reference.
|
|
"""
|
|
|
|
# 1. Create an encryption key based on the user password string, as
|
|
# described in Algorithm 3.2.
|
|
key = derive_legacy_file_key(
|
|
password,
|
|
rev,
|
|
keylen,
|
|
owner_entry,
|
|
p_entry,
|
|
id1_entry,
|
|
encrypt_metadata=encrypt_metadata,
|
|
)
|
|
# 2. Initialize the MD5 hash function and pass the 32-byte padding string
|
|
# shown in step 1 of Algorithm 3.2 as input to this function.
|
|
m = md5()
|
|
m.update(_encryption_padding)
|
|
# 3. Pass the first element of the file's file identifier array (the value
|
|
# of the ID entry in the document's trailer dictionary; see Table 3.13 on
|
|
# page 73) to the hash function and finish the hash. (See implementation
|
|
# note 25 in Appendix H.)
|
|
m.update(id1_entry)
|
|
md5_hash = m.digest()
|
|
# 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
|
|
# function with the encryption key from step 1.
|
|
val = rc4_encrypt(key, md5_hash)
|
|
# 5. Do the following 19 times: Take the output from the previous
|
|
# invocation of the RC4 function and pass it as input to a new invocation
|
|
# of the function; use an encryption key generated by taking each byte of
|
|
# the original encryption key (obtained in step 2) and performing an XOR
|
|
# operation between that byte and the single-byte value of the iteration
|
|
# counter (from 1 to 19).
|
|
for i in range(1, 20):
|
|
new_key = bytes(b ^ i for b in key)
|
|
val = rc4_encrypt(new_key, val)
|
|
# 6. Append 16 bytes of arbitrary padding to the output from the final
|
|
# invocation of the RC4 function and store the 32-byte result as the value
|
|
# of the U entry in the encryption dictionary.
|
|
# (implementer note: I don't know what "arbitrary padding" is supposed to
|
|
# mean, so I have used null bytes. This seems to match a few other
|
|
# people's implementations)
|
|
return val + (b'\x00' * 16), key
|
|
|
|
|
|
def legacy_derive_object_key(
|
|
shared_key: bytes, idnum: int, generation: int, use_aes=False
|
|
) -> bytes:
|
|
"""
|
|
Function that does the key derivation for PDF's legacy security handlers.
|
|
|
|
:param shared_key:
|
|
Global file encryption key.
|
|
:param idnum:
|
|
ID of the object being written.
|
|
:param generation:
|
|
Generation number of the object being written.
|
|
:param use_aes:
|
|
Boolean indicating whether the security handler uses RC4 or AES(-128).
|
|
:return:
|
|
"""
|
|
pack1 = struct.pack("<i", idnum)[:3]
|
|
pack2 = struct.pack("<i", generation)[:2]
|
|
key = shared_key + pack1 + pack2
|
|
assert len(key) == (len(shared_key) + 5)
|
|
if use_aes:
|
|
key += b'sAlT'
|
|
md5_hash = md5(key).digest()
|
|
return md5_hash[: min(16, len(shared_key) + 5)]
|