org.jpmml.sparkml.ModelConverter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pmml-sparkml Show documentation
Show all versions of pmml-sparkml Show documentation
JPMML Apache Spark ML to PMML converter
The newest version!
/*
* Copyright (c) 2016 Villu Ruusmann
*
* This file is part of JPMML-SparkML
*
* JPMML-SparkML is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* JPMML-SparkML is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with JPMML-SparkML. If not, see .
*/
package org.jpmml.sparkml;
import java.util.List;
import org.apache.spark.ml.Model;
import org.apache.spark.ml.param.shared.HasPredictionCol;
import org.dmg.pmml.MiningFunction;
import org.dmg.pmml.Output;
import org.dmg.pmml.OutputField;
import org.jpmml.converter.Feature;
import org.jpmml.converter.FeatureUtil;
import org.jpmml.converter.Label;
import org.jpmml.converter.ModelUtil;
import org.jpmml.converter.ScalarLabel;
import org.jpmml.converter.Schema;
import org.jpmml.converter.mining.MiningModelUtil;
abstract
public class ModelConverter & HasPredictionCol> extends TransformerConverter {
public ModelConverter(T model){
super(model);
}
abstract
public MiningFunction getMiningFunction();
abstract
public List getFeatures(SparkMLEncoder encoder);
abstract
public org.dmg.pmml.Model encodeModel(Schema schema);
public Schema encodeSchema(SparkMLEncoder encoder){
Label label = getLabel(encoder);
List features = getFeatures(encoder);
Schema result = new Schema(encoder, label, features);
checkSchema(result);
return result;
}
public Label getLabel(SparkMLEncoder encoder){
return null;
}
public void checkSchema(Schema schema){
Label label = schema.getLabel();
List extends Feature> features = schema.getFeatures();
MiningFunction miningFunction = getMiningFunction();
switch(miningFunction){
case ASSOCIATION_RULES:
case CLUSTERING:
if(label != null){
throw new IllegalArgumentException("Expected no label, got " + label);
}
break;
case CLASSIFICATION:
case REGRESSION:
if(label == null){
throw new IllegalArgumentException("Expected a label, got no label");
}
break;
default:
break;
}
if(label instanceof ScalarLabel){
ScalarLabel scalarLabel = (ScalarLabel)label;
Feature labelFeature = FeatureUtil.findLabelFeature(features, scalarLabel);
if(labelFeature != null){
throw new IllegalArgumentException("Label column '" + scalarLabel.getName() + "' is contained in the list of feature columns");
}
}
}
public List registerOutputFields(Label label, org.dmg.pmml.Model model, SparkMLEncoder encoder){
return null;
}
public org.dmg.pmml.Model registerModel(SparkMLEncoder encoder){
Schema schema = encodeSchema(encoder);
Label label = schema.getLabel();
org.dmg.pmml.Model model = encodeModel(schema);
List sparkOutputFields = registerOutputFields(label, model, encoder);
if(sparkOutputFields != null && !sparkOutputFields.isEmpty()){
org.dmg.pmml.Model finalModel = MiningModelUtil.getFinalModel(model);
Output output = ModelUtil.ensureOutput(finalModel);
List outputFields = output.getOutputFields();
outputFields.addAll(sparkOutputFields);
}
return model;
}
public T getModel(){
return getObject();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy