net.librec.data.model.AbstractDataModel Maven / Gradle / Ivy
Show all versions of librec-core Show documentation
/**
* Copyright (C) 2016 LibRec
*
* This file is part of LibRec.
* LibRec is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* LibRec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with LibRec. If not, see .
*/
package net.librec.data.model;
import com.google.common.collect.BiMap;
import net.librec.common.LibrecException;
import net.librec.conf.Configured;
import net.librec.data.*;
import net.librec.math.structure.DataFrame;
import net.librec.math.structure.DataSet;
import net.librec.util.DriverClassUtil;
import net.librec.util.ReflectionUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
/**
* A AbstractDataModel represents a data access class to the input
* file.
*
* @author WangYuFeng
*/
public abstract class AbstractDataModel extends Configured implements DataModel {
/**
* LOG
*/
protected final Log LOG = LogFactory.getLog(this.getClass());
/**
* context
*/
protected DataContext context;
/**
* train DataSet
*/
protected DataSet trainDataSet;
/**
* test DataSet
*/
protected DataSet testDataSet;
/**
* valid DataSet
*/
protected DataSet validDataSet;
/**
* The convertor of the model {@link net.librec.data.DataConvertor}
*/
protected DataConvertor dataConvertor;
/**
* Data Splitter {@link net.librec.data.DataSplitter}
*/
public DataSplitter dataSplitter;
/**
* Data Splitter {@link DataAppender}
*/
public DataAppender dataAppender;
/**
* Data Appender {@link FeatureAppender}
*/
public FeatureAppender featureAppender;
/**
* Build Convert.
*
* @throws LibrecException if error occurs when building convert.
*/
protected abstract void buildConvert() throws LibrecException;
/**
* Build Splitter.
*
* @throws LibrecException if error occurs when building splitter.
*/
protected void buildSplitter() throws LibrecException {
String splitter = conf.get("data.model.splitter");
try {
if (dataSplitter == null) {
dataSplitter = (DataSplitter) ReflectionUtil.newInstance(DriverClassUtil.getClass(splitter), conf);
}
if (dataSplitter != null) {
dataSplitter.setDataConvertor(dataConvertor);
dataSplitter.splitData();
trainDataSet = dataSplitter.getTrainData();
testDataSet = dataSplitter.getTestData();
}
} catch (ClassNotFoundException e) {
throw new LibrecException(e);
}
}
/**
* Build appender data.
*
* @throws LibrecException if error occurs when building appender.
*/
protected void buildFeature() throws LibrecException {
String feature = conf.get("data.appender.class");
if (StringUtils.isNotBlank(feature)) {
try {
dataAppender = (DataAppender) ReflectionUtil.newInstance(DriverClassUtil.getClass(feature), conf);
dataAppender.setUserMappingData(getUserMappingData());
dataAppender.setItemMappingData(getItemMappingData());
dataAppender.processData();
} catch (ClassNotFoundException e) {
throw new LibrecException(e);
} catch (IOException e) {
throw new LibrecException(e);
}
}
}
protected void buildFeatureAppender() throws LibrecException {
String featureAppenderClass = conf.get("feature.appender.class");
if (StringUtils.isNotBlank(featureAppenderClass)) {
try {
featureAppender = (FeatureAppender) ReflectionUtil.newInstance(DriverClassUtil.getClass(featureAppenderClass), conf);
featureAppender.setUserMappingData(getUserMappingData());
featureAppender.setItemMappingData(getItemMappingData());
featureAppender.processData();
} catch (ClassNotFoundException e) {
throw new LibrecException(e);
} catch (IOException e) {
throw new LibrecException(e);
}
}
}
/**
* Build data model.
*
* @throws LibrecException if error occurs when building model.
*/
@Override
public void buildDataModel() throws LibrecException {
context = new DataContext(conf);
if (!conf.getBoolean("data.convert.read.ready")) {
buildConvert();
LOG.info("Transform data to Convertor successfully!");
conf.setBoolean("data.convert.read.ready", true);
}
buildSplitter();
if (StringUtils.isNotBlank(conf.get("data.appender.class")) && !conf.getBoolean("data.appender.read.ready")) {
buildFeature();
LOG.info("Transform data to Feature successfully!");
conf.setBoolean("data.appender.read.ready", true);
}
LOG.info("Split data to train Set and test Set successfully!");
if (trainDataSet != null && trainDataSet.size() > 0 && testDataSet != null && testDataSet.size() > 0) {
LOG.info("Data cardinality of training is " + trainDataSet.size());
LOG.info("Data cardinality of testing is " + testDataSet.size());
}
if (StringUtils.isNotBlank(conf.get("feature.appender.class")) && !conf.getBoolean("feature.appender.read.ready")) {
buildFeatureAppender();
LOG.info("Feature appender loaded successfully!");
conf.setBoolean("feature.appender.read.ready", true);
}
}
@Override
public boolean hasNextFold(){
// where or not has next fold( decided by Splitter
return dataSplitter.nextFold();
}
@Override
public void nextFold(){
trainDataSet = dataSplitter.getTrainData();
testDataSet = dataSplitter.getTestData();
validDataSet = dataSplitter.getValidData();
// generate next fold by Splitter
}
/**
* Load data model.
*
* @throws LibrecException if error occurs during loading
*/
@Override
public void loadDataModel() throws LibrecException {
// TODO Auto-generated method stub
}
/**
* Save data model.
*
* @throws LibrecException if error occurs during saving
*/
@Override
public void saveDataModel() throws LibrecException {
// TODO Auto-generated method stub
}
/**
* Get train data set.
*
* @return the train data set of data model.
*/
@Override
public DataSet getTrainDataSet() {
return trainDataSet;
}
/**
* Get test data set.
*
* @return the test data set of data model.
*/
@Override
public DataSet getTestDataSet() {
return testDataSet;
}
/**
* Get valid data set.
*
* @return the valid data set of data model.
*/
@Override
public DataSet getValidDataSet() {
return validDataSet;
}
/**
* Get data splitter.
*
* @return the splitter of data model.
*/
@Override
public DataSplitter getDataSplitter() {
return dataSplitter;
}
/**
* Get data appender.
*
* @return the appender of data model.
*/
@Override
public DataAppender getDataAppender() {
return dataAppender;
}
/**
* Get feature appender
*
* @return feature appender of data model
*/
public FeatureAppender getFeatureAppender() {
return featureAppender;
}
/**
* Get data context.
*
* @return the context see {@link net.librec.data.DataContext}.
*/
@Override
public DataContext getContext() {
return context;
}
@Override
public DataSet getDatetimeDataSet() {
return null;
}
@Override
public BiMap getUserMappingData(){
return DataFrame.getInnerMapping("user");
}
@Override
public BiMap getItemMappingData(){
return DataFrame.getInnerMapping("item");
}
}