templates.general-mlflow.training.impl.py.vm Maven / Gradle / Ivy
"""
Implementation of this pipeline.
GENERATED STUB CODE - PLEASE ***DO*** MODIFY
Originally generated from: ${templateName}
"""
import json
from aissemble_security.pdp_client import AissembleSecurityException
from typing import List
from ..generated.${pipeline.getSnakeCaseName()}_base import ${pipeline.capitalizedName}Base, DataFrame, DatasetSplits
import mlflow
class ${pipeline.capitalizedName}(${pipeline.capitalizedName}Base):
"""
Implementation of this pipeline.
"""
def __init__(self):
super().__init__('${pipeline.getSnakeCaseName()}')
#if ($pipeline.trainingStep.isModelLineageEnabled())
# Set information about the ownership, documentation, and hardware details of the pipeline for tagging purposes.
#end
# Implement any additional initialization for the pipeline.
#if ($autoTrain)
def acknowledge_training_alert(self, alert: any) -> None:
# Implement acknowledgment of the training alert.
self.training_alert = alert
message_json = json.loads(alert.value)
# TODO: if you don't want to use authorization in your pipeline just remove the following lines
decision = self.authorize(message_json['token'], 'data-access')
if 'PERMIT' == decision:
print('User is authorized to run training')
else:
raise AissembleSecurityException('User is not authorized')
#end
def load_dataset(self) -> DataFrame:
# Implement loading of the dataset for training,
# and return the dataset as a DataFrame.
# TODO: load dataset and convert to DataFrame, and set information about the origin of the dataset for tagging purposes.
dataset = DataFrame()
self.set_dataset_origin(None)
return dataset
def prep_dataset(self, dataset: DataFrame) -> DataFrame:
# Implement last-mile data preparation on the loaded dataset that has been
# passed through the dataset parameter, and return the prepped dataset.
# If last-mile data preparation is not required, then just return the dataset.
return dataset
def select_features(self, dataset: DataFrame) -> List[str]:
# Implement feature selection using the prepped dataset that has been passed
# through the dataset parameter, and return the list of selected features.
# If feature selection is not required, then return all features in the dataset.
# TODO: select features from the dataset
selected_features = dataset.columns.tolist()
return selected_features
def split_dataset(self, dataset: DataFrame) -> DatasetSplits:
# Implement the creation of the training and testing splits using the dataset
# with selected features that has been passed in through the dataset parameter,
# and return a DatasetSplits object containing the splits.
# TODO: create training/testing splits
train_split = None
test_split = None
return DatasetSplits(train_split, test_split)
# TODO: Update the return type of this to be match your expected model type such as LogisticRegression or Sequential.
def train_model(self, train_dataset: any) -> any:
# Implement the training of the model using the training split
# that has been passed in through the train_dataset parameter,
# and return the trained model.
# TODO: create model based on what return type is needed and set model information for tagging purposes.
self.set_model_information(None, None)
model = None
# TODO: train model using model.fit(...) on the training split
return model
# TODO: Update the model type to match what is returned in the train_model method
def evaluate_model(self, model: any, test_dataset: any) -> float:
# Implement the evaluation of the trained model using the testing split that has
# been passed through the test_dataset parameter, and return the evaluation score.
# TODO: evaluate model using model.predict(...) on the testing split
# TODO: obtain the evaluation score using model.score(...) on the testing split
score = 0
# Log the score of the model
mlflow.log_metric('score', score)
return score
# TODO: Update the model type to match what is returned in the train_model method
def save_model(self, model: any) -> None:
# Implement saving of the trained model.
# TODO: save model using the correct mlflow module such as sklearn or keras. Below is an example using sklearn:
# mlflow.sklearn.log_model(model, "model")
pass
# TODO: Update the model type to match what is returned in the train_model method
def deploy_model(self, score: float, model: any) -> None:
# Implement deploying of the trained model based on the evaluation score, if needed.
pass
© 2015 - 2025 Weber Informatics LLC | Privacy Policy