import struct from hashlib import md5 from typing import Union from pyhanko.pdf_utils import generic from pyhanko.pdf_utils.crypt._util import rc4_encrypt _encryption_padding = ( b'\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56' b'\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c' b'\xa9\xfe\x64\x53\x69\x7a' ) def derive_legacy_file_key( password, rev, keylen, owner_entry, p_entry, id1_entry, encrypt_metadata ): """ Implementation of algorithm 3.2 of the PDF standard security handler, section 3.5.2 of the PDF 1.6 reference. """ # 1. Pad or truncate the password string to exactly 32 bytes. If the # password string is more than 32 bytes long, use only its first 32 bytes; # if it is less than 32 bytes long, pad it by appending the required number # of additional bytes from the beginning of the padding string # (_encryption_padding). password = (password + _encryption_padding)[:32] # 2. Initialize the MD5 hash function and pass the result of step 1 as # input to this function. # NOTE: Suppress LGTM warning here, we have to do what the spec says m = md5(password) # lgtm # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash # function. m.update(owner_entry) # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass # these bytes to the MD5 hash function, low-order byte first. p_entry = struct.pack('= 4 and not encrypt_metadata: m.update(b"\xff\xff\xff\xff") # 7. Finish the hash. md5_hash = m.digest() # 8. (Revision 3 or greater) Do the following 50 times: Take the output # from the previous MD5 hash and pass the first n bytes of the output as # input into a new MD5 hash, where n is the number of bytes of the # encryption key as defined by the value of the encryption dictionary's # /Length entry. if rev >= 3: for i in range(50): md5_hash = md5(md5_hash[:keylen]).digest() # 9. Set the encryption key to the first n bytes of the output from the # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or # greater, depends on the value of the encryption dictionary's /Length # entry. return md5_hash[:keylen] def legacy_normalise_pw(password: Union[str, bytes]) -> bytes: if isinstance(password, str): return generic.encode_pdfdocencoding(password[:32]) else: return password[:32] def compute_o_value_legacy(owner_pwd, user_pwd, rev, keylen): """ Implementation of algorithm 3.3 of the PDF standard security handler, section 3.5.2 of the PDF 1.6 reference. """ # steps 1 - 4 key = compute_o_value_legacy_prep(owner_pwd, rev, keylen) # 5. Pad or truncate the user password string as described in step 1 of # algorithm 3.2. user_pwd = (user_pwd + _encryption_padding)[:32] # 6. Encrypt the result of step 5, using an RC4 encryption function with # the encryption key obtained in step 4. val = rc4_encrypt(key, user_pwd) # 7. (Revision 3 or greater) Do the following 19 times: Take the output # from the previous invocation of the RC4 function and pass it as input to # a new invocation of the function; use an encryption key generated by # taking each byte of the encryption key obtained in step 4 and performing # an XOR operation between that byte and the single-byte value of the # iteration counter (from 1 to 19). if rev >= 3: for i in range(1, 20): new_key = bytes(b ^ i for b in key) val = rc4_encrypt(new_key, val) # 8. Store the output from the final invocation of the RC4 as the value of # the /O entry in the encryption dictionary. return val def compute_o_value_legacy_prep(password, rev, keylen): """ Steps 1-4 of algorithm 3.3 """ # 1. Pad or truncate the owner password string as described in step 1 of # algorithm 3.2. If there is no owner password, use the user password # instead. password = (password + _encryption_padding)[:32] # 2. Initialize the MD5 hash function and pass the result of step 1 as # input to this function. # NOTE: Suppress LGTM warning here, we have to do what the spec says m = md5(password) # lgtm # 3. (Revision 3 or greater) Do the following 50 times: Take the output # from the previous MD5 hash and pass it as input into a new MD5 hash. md5_hash = m.digest() if rev >= 3: for i in range(50): md5_hash = md5(md5_hash).digest() # 4. Create an RC4 encryption key using the first n bytes of the output # from the final MD5 hash, where n is always 5 for revision 2 but, for # revision 3 or greater, depends on the value of the encryption # dictionary's /Length entry. key = md5_hash[:keylen] return key def compute_u_value_r2(password, owner_entry, p_entry, id1_entry): """ Implementation of algorithm 3.4 of the PDF standard security handler, section 3.5.2 of the PDF 1.6 reference. """ # 1. Create an encryption key based on the user password string, as # described in algorithm 3.2. key = derive_legacy_file_key( password, 2, 5, owner_entry, p_entry, id1_entry, encrypt_metadata=True ) # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2, # using an RC4 encryption function with the encryption key from the # preceding step. u = rc4_encrypt(key, _encryption_padding) # 3. Store the result of step 2 as the value of the /U entry in the # encryption dictionary. return u, key def compute_u_value_r34( password, rev, keylen, owner_entry, p_entry, id1_entry, encrypt_metadata: bool, ): """ Implementation of algorithm 3.4 of the PDF standard security handler, section 3.5.2 of the PDF 1.6 reference. """ # 1. Create an encryption key based on the user password string, as # described in Algorithm 3.2. key = derive_legacy_file_key( password, rev, keylen, owner_entry, p_entry, id1_entry, encrypt_metadata=encrypt_metadata, ) # 2. Initialize the MD5 hash function and pass the 32-byte padding string # shown in step 1 of Algorithm 3.2 as input to this function. m = md5() m.update(_encryption_padding) # 3. Pass the first element of the file's file identifier array (the value # of the ID entry in the document's trailer dictionary; see Table 3.13 on # page 73) to the hash function and finish the hash. (See implementation # note 25 in Appendix H.) m.update(id1_entry) md5_hash = m.digest() # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption # function with the encryption key from step 1. val = rc4_encrypt(key, md5_hash) # 5. Do the following 19 times: Take the output from the previous # invocation of the RC4 function and pass it as input to a new invocation # of the function; use an encryption key generated by taking each byte of # the original encryption key (obtained in step 2) and performing an XOR # operation between that byte and the single-byte value of the iteration # counter (from 1 to 19). for i in range(1, 20): new_key = bytes(b ^ i for b in key) val = rc4_encrypt(new_key, val) # 6. Append 16 bytes of arbitrary padding to the output from the final # invocation of the RC4 function and store the 32-byte result as the value # of the U entry in the encryption dictionary. # (implementer note: I don't know what "arbitrary padding" is supposed to # mean, so I have used null bytes. This seems to match a few other # people's implementations) return val + (b'\x00' * 16), key def legacy_derive_object_key( shared_key: bytes, idnum: int, generation: int, use_aes=False ) -> bytes: """ Function that does the key derivation for PDF's legacy security handlers. :param shared_key: Global file encryption key. :param idnum: ID of the object being written. :param generation: Generation number of the object being written. :param use_aes: Boolean indicating whether the security handler uses RC4 or AES(-128). :return: """ pack1 = struct.pack("