
templates.data-delivery-pyspark.encryption.py.vm Maven / Gradle / Ivy
${step.encryptionSignature}:
"""
Checks for encryption policies and applies encryption to the designated fields.
If no policies are found then the original data is returned.
"""
#if (${step.hasMessagingInbound()} && !${step.hasInboundRecordType()})
${step.capitalizedName}Base.logger.warn('Encryption is not yet supported for messaging without specifying an inbound record type!')
${step.capitalizedName}Base.logger.warn('If desired, please add encryption manually by overriding check_and_apply_encryption_policy()!')
return inbound
#else
return_payload = inbound
${step.capitalizedName}Base.logger.info('Checking encryption policies')
# Check if the KRAUSENING_BASE is set in the environment and use a default if it isn't
if not os.environ.get('KRAUSENING_BASE'):
${step.capitalizedName}Base.logger.warn('KRAUSENING_BASE environment variable was not set. Using default path -> ./config')
os.environ['KRAUSENING_BASE'] = 'resources/krausening/base/'
directory = PolicyConfiguration().policiesLocation()
policy_manager = DataEncryptionPolicyManager.getInstance()
retrieved_policies = policy_manager.policies
for key, encrypt_policy in retrieved_policies.items():
# Encryption policies have a property called encryptPhase.
# If that property is missing then we should ignore the policy.
if encrypt_policy.encryptPhase:
if self.step_phase.lower() == encrypt_policy.encryptPhase.lower():
encrypt_fields = encrypt_policy.encryptFields
input_fields = self.get_fields_list(inbound)
field_intersection = list(set(encrypt_fields) & set(input_fields))
return_payload = self.apply_encryption_to_dataset(inbound, field_intersection, encrypt_policy.encryptAlgorithm)
else:
${step.capitalizedName}Base.logger.info('Encryption policy does not apply to this phase: ' + self.step_phase)
return return_payload
#end
#if (!$step.hasInboundRecordType())
def aissemble_encrypt_aes_udf(self):
return udf(lambda text: aissemble_encrypt_simple_aes(text))
def aissemble_encrypt_vault_udf(self, key):
return udf(lambda text: aissemble_encrypt_with_vault_key(key, text))
#end
${step.applyEncryptionSignature}:
'''
This method applies encryption to the given fields
'''
${step.capitalizedName}Base.logger.info('applying encryption')
#if ($step.hasInboundNativeCollectionType() && !$step.hasInboundRecordType())
## If inbound is a set of dataframes we will create another method to loop over the dataframes
## and call a method within the loop to process a single dataframe
# return type is a set
return_dataframe_set = set([])
# loop over dataframe set
for dataframe in inbound:
encrypted_dataframe = self.apply_encryption_when_native_collection_is_supplied(dataframe, fields_to_update, algorithm)
return_dataframe_set.add(encrypted_dataframe)
return return_dataframe_set
def apply_encryption_when_native_collection_is_supplied(self, inbound: Set[DataFrame], fields_to_update: List[str], algorithm: str):
#end
#if ($step.hasInboundNativeCollectionType() && $step.hasInboundRecordType())
## process a set[CustomRecord]
# return type is a set
return_payload = set([])
aissemble_encrypt = AesCbcEncryptionStrategy()
if(algorithm == 'VAULT_ENCRYPT'):
aissemble_encrypt = VaultRemoteEncryptionStrategy()
for record in inbound:
for column in fields_to_update:
encrypted_column_value = aissemble_encrypt.encrypt(getattr(record, column))
# Depending on the encryption algorithm the return value may be bytes or bytesarray which requires decoding
try:
encrypted_column_value = encrypted_column_value.decode('utf-8')
except (UnicodeDecodeError, AttributeError):
pass
setattr(record, column, encrypted_column_value)
return_payload.add(record)
#elseif (!$step.hasInboundNativeCollectionType() && $step.hasInboundRecordType())
## process a CustomRecord
# return type is a single custom data type
aissemble_encrypt = AesCbcEncryptionStrategy()
if(algorithm == 'VAULT_ENCRYPT'):
aissemble_encrypt = VaultRemoteEncryptionStrategy()
for column in fields_to_update:
encrypted_column_value = aissemble_encrypt.encrypt(getattr(inbound, column))
# Depending on the encryption algorithm the return value may be bytes or bytesarray which requires decoding
try:
encrypted_column_value = encrypted_column_value.decode('utf-8')
except (UnicodeDecodeError, AttributeError):
pass
setattr(inbound, column, encrypted_column_value)
return_payload = inbound
#elseif (!$step.hasInboundRecordType())
## process a dataframe or set[dataframe]
# some other text
return_payload = []
for encrypt_field in fields_to_update:
if(algorithm == 'VAULT_ENCRYPT'):
# Because of the restrictive nature of PySpark UDF's we have to retrieve the Vault key outside of
# the udf call to avoid threading errors
vault_key_util = VaultKeyUtil.get_instance()
vault_key = vault_key_util.get_vault_key_encoded()
return_payload = inbound.withColumn(encrypt_field, self.aissemble_encrypt_vault_udf(vault_key)(col(encrypt_field)))
else:
return_payload = inbound.withColumn(encrypt_field, self.aissemble_encrypt_aes_udf()(col(encrypt_field)))
return_payload.show()
#end
return return_payload
${step.fieldListSignature}:
'''
This method gets the field names from the given data type
'''
#if ($step.hasInboundRecordType())
return [p for p in dir(${step.inbound.recordType.name}) if isinstance(getattr(${step.inbound.recordType.name},p),property)]
#elseif (!$step.hasInboundRecordType())
# Get the column names
return inbound.columns
#else
return []
#end
© 2015 - 2025 Weber Informatics LLC | Privacy Policy