org.jpmml.converter.visitors.DataDictionaryCleaner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pmml-converter Show documentation
Show all versions of pmml-converter Show documentation
JPMML class model converters
/*
* Copyright (c) 2016 Villu Ruusmann
*
* This file is part of JPMML-Converter
*
* JPMML-Converter is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* JPMML-Converter is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with JPMML-Converter. If not, see .
*/
package org.jpmml.converter.visitors;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.dmg.pmml.DataDictionary;
import org.dmg.pmml.DataField;
import org.dmg.pmml.Field;
import org.dmg.pmml.MiningField;
import org.dmg.pmml.MiningSchema;
import org.dmg.pmml.Model;
import org.dmg.pmml.PMML;
import org.dmg.pmml.PMMLObject;
/**
*
* A Visitor that removes redundant {@link DataField data fields} from the {@link DataDictionary data dictionary}.
*
*/
public class DataDictionaryCleaner extends ActiveFieldFinder {
private Set> nonActiveFields = new LinkedHashSet<>();
@Override
public void reset(){
super.reset();
this.nonActiveFields.clear();
}
@Override
public PMMLObject popParent(){
PMMLObject parent = super.popParent();
if(parent instanceof Model){
Model model = (Model)parent;
processModel(model);
} else
if(parent instanceof PMML){
PMML pmml = (PMML)parent;
DataDictionary dataDictionary = pmml.requireDataDictionary();
processDataDictionary(dataDictionary);
}
return parent;
}
private void processModel(Model model){
Set> nonActiveFields = getNonActiveFields();
MiningSchema miningSchema = model.requireMiningSchema();
if(miningSchema.hasMiningFields()){
Set fieldNames = new LinkedHashSet<>();
List miningFields = miningSchema.getMiningFields();
for(MiningField miningField : miningFields){
String fieldName = miningField.getName();
MiningField.UsageType usageType = miningField.getUsageType();
switch(usageType){
case ACTIVE:
break;
default:
fieldNames.add(fieldName);
break;
}
}
if(!fieldNames.isEmpty()){
Collection> modelFields = getFields(model);
nonActiveFields.addAll(FieldUtil.selectAll(modelFields, fieldNames));
}
}
}
private void processDataDictionary(DataDictionary dataDictionary){
if(dataDictionary.hasDataFields()){
List dataFields = dataDictionary.getDataFields();
Set referencedDataFields = getReferencedDataFields();
dataFields.retainAll(referencedDataFields);
}
}
private Set getReferencedDataFields(){
FieldDependencyResolver fieldDependencyResolver = getFieldDependencyResolver();
Set> fields = new HashSet<>(getActiveFields());
fields.addAll(getNonActiveFields());
fieldDependencyResolver.expand(fields, fieldDependencyResolver.getLocalDerivedFields());
fieldDependencyResolver.expand(fields, fieldDependencyResolver.getGlobalDerivedFields());
return (Set)fields;
}
private Set> getNonActiveFields(){
return this.nonActiveFields;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy