
org.jpmml.model.visitors.DataDictionaryCleaner Maven / Gradle / Ivy
/*
* Copyright (c) 2016 Villu Ruusmann
*/
package org.jpmml.model.visitors;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.dmg.pmml.DataDictionary;
import org.dmg.pmml.DataField;
import org.dmg.pmml.Field;
import org.dmg.pmml.FieldName;
import org.dmg.pmml.FieldUsageType;
import org.dmg.pmml.MiningField;
import org.dmg.pmml.MiningSchema;
import org.dmg.pmml.Model;
import org.dmg.pmml.PMML;
import org.dmg.pmml.PMMLObject;
import org.dmg.pmml.Visitable;
import org.jpmml.model.FieldUtil;
/**
*
* A Visitor that removes redundant {@link DataField data fields} from the {@link DataDictionary data dictionary}.
*
*/
public class DataDictionaryCleaner extends ModelCleaner {
private Set targetFields = new HashSet<>();
@Override
public void applyTo(Visitable visitable){
this.targetFields.clear();
super.applyTo(visitable);
}
@Override
public PMMLObject popParent(){
PMMLObject parent = super.popParent();
if(parent instanceof Model){
Model model = (Model)parent;
processModel(model);
} else
if(parent instanceof PMML){
PMML pmml = (PMML)parent;
DataDictionary dataDictionary = pmml.getDataDictionary();
if(dataDictionary != null){
processDataDictionary(dataDictionary);
}
}
return parent;
}
private void processModel(Model model){
Set targetFields = getTargetFields();
MiningSchema miningSchema = model.getMiningSchema();
if(miningSchema != null && miningSchema.hasMiningFields()){
Set targetFieldNames = new LinkedHashSet<>();
List miningFields = miningSchema.getMiningFields();
for(MiningField miningField : miningFields){
FieldName name = miningField.getName();
FieldUsageType fieldUsage = miningField.getUsageType();
switch(fieldUsage){
case TARGET:
case PREDICTED:
targetFieldNames.add(name);
break;
default:
break;
}
}
if(targetFieldNames.size() > 0){
Set modelFields = getFields(model);
targetFields.addAll(FieldUtil.selectAll(modelFields, targetFieldNames));
}
}
}
private void processDataDictionary(DataDictionary dataDictionary){
if(dataDictionary.hasDataFields()){
List dataFields = dataDictionary.getDataFields();
Set usedDataFields = getUsedDataFields();
dataFields.retainAll(usedDataFields);
}
}
private Set getUsedDataFields(){
FieldDependencyResolver fieldDependencyResolver = getFieldDependencyResolver();
Set usedFields = new HashSet<>(getActiveFields());
usedFields.addAll(getTargetFields());
fieldDependencyResolver.expand(usedFields, fieldDependencyResolver.getLocalDerivedFields());
fieldDependencyResolver.expand(usedFields, fieldDependencyResolver.getGlobalDerivedFields());
return (Set)usedFields;
}
public Set getTargetFields(){
return this.targetFields;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy