cc.mallet.fst.CRFTrainerByLabelLikelihood Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
package cc.mallet.fst;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Random;
import java.util.logging.Logger;

import cc.mallet.optimize.LimitedMemoryBFGS;
import cc.mallet.optimize.Optimizer;
import cc.mallet.types.ExpGain;
import cc.mallet.types.FeatureInducer;
import cc.mallet.types.FeatureSelection;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.GradientGain;
import cc.mallet.types.InfoGain;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.types.LabelVector;
import cc.mallet.types.RankedFeatureVector;
import cc.mallet.types.Sequence;
import cc.mallet.util.MalletLogger;

/**
 * Unlike ClassifierTrainer, TransducerTrainer is not "stateless" between calls to train. A TransducerTrainer is
 * constructed paired with a specific Transducer, and can only train that Transducer. CRF stores and has methods for
 * FeatureSelection and weight freezing. CRFTrainer stores and has methods for determining the
 * contents/dimensions/sparsity/FeatureInduction of the CRF's weights as determined by training data.
 * 
 * Note: In the future this class may go away in favor of some default version of CRFTrainerByValueGradients.
 */
public class CRFTrainerByLabelLikelihood extends TransducerTrainer implements TransducerTrainer.ByOptimization {
    /*
     * KT: 14.08.2008 the number of rounds of successive convergence used by the trainOptimized* methods
     */
    private int minConvRounds = 5;

    public void setMinConvRounds(int rounds) {
        this.minConvRounds = rounds;
    }

    public int getMinConvRounds() {
        return this.minConvRounds;
    }

    private static Logger logger = MalletLogger.getLogger(CRFTrainerByLabelLikelihood.class.getName());

    static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = 1.0;
    static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2;
    static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 10.0;

    CRF crf;
    // OptimizableCRF ocrf;
    CRFOptimizableByLabelLikelihood ocrf;
    Optimizer opt;
    int iterationCount = 0;
    boolean converged;

    boolean usingHyperbolicPrior = false;
    double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE;
    double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE;
    double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS;
    boolean useSparseWeights = true;
    boolean useNoWeights = false; // TODO remove this; it is just for debugging
    private transient boolean useSomeUnsupportedTrick = true;

    // Various values from CRF acting as indicators of when we need to ...
    private int cachedValueWeightsStamp = -1; // ... re-calculate expectations and values to getValue() because weights'
                                              // values changed
    private int cachedGradientWeightsStamp = -1; // ... re-calculate to getValueGradient() because weights' values
                                                 // changed
    private int cachedWeightsStructureStamp = -1; // ... re-allocate crf.weights, expectations & constraints because new
                                                  // states, transitions
    // Use mcrf.trainingSet to see when we need to re-allocate crf.weights, expectations & constraints because we are
    // using a different TrainingList than last time

    // xxx temporary hack. This is quite useful to have, though!! -cas
    public boolean printGradient = false;

    public CRFTrainerByLabelLikelihood(CRF crf) {
        this.crf = crf;
    }

    @Override
    public Transducer getTransducer() {
        return crf;
    }

    public CRF getCRF() {
        return crf;
    }

    @Override
    public Optimizer getOptimizer() {
        return opt;
    }

    public boolean isConverged() {
        return converged;
    }

    @Override
    public boolean isFinishedTraining() {
        return converged;
    }

    @Override
    public int getIteration() {
        return iterationCount;
    }

    /**
     * Use this method to specify whether or not factors are added to the CRF by this trainer. If you have already setup
     * the factors in your CRF, you may not want the trainer to add additional factors.
     * 
     * @param flag
     *            If true, this trainer adds no factors to the CRF.
     */
    public void setAddNoFactors(boolean flag) {
        this.useNoWeights = flag;
    }

    public CRFOptimizableByLabelLikelihood getOptimizableCRF(InstanceList trainingSet) {
        if (cachedWeightsStructureStamp != crf.weightsStructureChangeStamp) {
            if (!useNoWeights) {
                if (useSparseWeights) {
                    crf.setWeightsDimensionAsIn(trainingSet, useSomeUnsupportedTrick);
                } else {
                    crf.setWeightsDimensionDensely();
                }
            }
            // reallocateSufficientStatistics(); // Not necessary here because it is done in the constructor for
            // OptimizableCRF
            ocrf = null;
            cachedWeightsStructureStamp = crf.weightsStructureChangeStamp;
        }
        if (ocrf == null || ocrf.trainingSet != trainingSet) {
            // ocrf = new OptimizableCRF (crf, trainingSet);
            ocrf = new CRFOptimizableByLabelLikelihood(crf, trainingSet);
            ocrf.setGaussianPriorVariance(gaussianPriorVariance);
            ocrf.setHyperbolicPriorSharpness(hyperbolicPriorSharpness);
            ocrf.setHyperbolicPriorSlope(hyperbolicPriorSlope);
            ocrf.setUseHyperbolicPrior(usingHyperbolicPrior);
            opt = null;
        }
        return ocrf;
    }

    public Optimizer getOptimizer(InstanceList trainingSet) {
        getOptimizableCRF(trainingSet); // this will set this.mcrf if necessary
        if (opt == null || ocrf != opt.getOptimizable()) {
            opt = new LimitedMemoryBFGS(ocrf); // Alternative: opt = new ConjugateGradient (0.001);
        }
        return opt;
    }

    // Java question:
    // If I make a non-static inner class CRF.Trainer,
    // can that class by subclassed in another .java file,
    // and can that subclass still have access to all the CRF's
    // instance variables?
    // ANSWER: Yes and yes, but you have to use special syntax in the subclass ctor (see mallet-dev archive) -cas

    public boolean trainIncremental(InstanceList training) {
        return train(training, Integer.MAX_VALUE);
    }

    /**
     * KT, 14.10.2008
     *
     * "optimized" training where termination criterion is later that in "normal" train functions. Optimizer must
     * converge n successive rounds. This avoid early stopping due to flip on gradient.
     *
     * This method is an extention of the respective train method.
     *
     * Number of successive rounds can be set by setMinConvRounds(int rounds). Default is set to 5!
     */
    public boolean trainOptimized(InstanceList trainingSet) {
        return trainOptimized(trainingSet, Integer.MAX_VALUE);
    }

    public boolean trainOptimized(InstanceList trainingSet, int numIterations) {
        if (numIterations <= 0) {
            return false;
        }
        assert trainingSet.size() > 0;

        getOptimizableCRF(trainingSet); // This will set this.mcrf if necessary
        getOptimizer(trainingSet); // This will set this.opt if necessary

        int succConv = 0;
        boolean convergedByOptimizer = false;
        boolean convergedByException = false;

        logger.info("CRF about to train with " + numIterations + " iterations");
        for (int i = 0; i < numIterations; i++) {
            try {
                convergedByOptimizer = opt.optimize(1);
                iterationCount++;
                logger.info("CRF finished one iteration of maximizer, i=" + i);
                runEvaluators();
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
                logger.info("Catching exception; saying converged.");
                convergedByException = true;
            }

            if (convergedByOptimizer == true) {
                if (succConv >= this.minConvRounds) {
                    logger.info("CRF training has converged by optimizer after " + succConv
                            + " successive convergence rounds, i=" + i);
                    converged = true;
                    break;
                } else {
                    logger.info("CRF optimizer converged, but need more successive convergence rounds, succConv="
                            + succConv);
                    succConv++;
                }
            } else {
                // if in a round optimizer has not converged, reset variable
                succConv = 0;
            }

            if (convergedByException) {
                logger.info("CRF training has converged by exception, i=" + i);
                converged = true;
                break;
            }
        }
        return converged;
    }

    public boolean trainOptimized(InstanceList training, int numIterationsPerProportion, double[] trainingProportions) {
        int trainingIteration = 0;
        assert trainingProportions.length > 0;
        boolean converged = false;
        for (int i = 0; i < trainingProportions.length; i++) {
            assert trainingProportions[i] <= 1.0;
            logger.info("Training on " + trainingProportions[i] + "% of the data this round.");
            if (trainingProportions[i] == 1.0) {
                converged = this.trainOptimized(training, numIterationsPerProportion);
            } else {
                converged = this.trainOptimized(
                        training.split(new Random(1),
                                new double[] { trainingProportions[i], 1 - trainingProportions[i] })[0],
                        numIterationsPerProportion);
            }
            trainingIteration += numIterationsPerProportion;
        }
        return converged;
    }

    @Override
    public boolean train(InstanceList trainingSet, int numIterations) {
        if (numIterations <= 0) {
            return false;
        }
        assert trainingSet.size() > 0;

        getOptimizableCRF(trainingSet); // This will set this.mcrf if necessary
        getOptimizer(trainingSet); // This will set this.opt if necessary

        boolean converged = false;
        logger.info("CRF about to train with " + numIterations + " iterations");
        for (int i = 0; i < numIterations; i++) {
            try {
                converged = opt.optimize(1);
                iterationCount++;
                logger.info("CRF finished one iteration of maximizer, i=" + i);
                runEvaluators();
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
                logger.info("Catching exception; saying converged.");
                converged = true;
            } catch (Exception e) {
                e.printStackTrace();
                logger.info("Catching exception; saying converged.");
                converged = true;
            }
            if (converged) {
                logger.info("CRF training has converged, i=" + i);
                break;
            }
        }
        return converged;
    }

    /**
     * Train a CRF on various-sized subsets of the data. This method is typically used to accelerate training by quickly
     * getting to reasonable parameters on only a subset of the parameters first, then on progressively more data.
     * 
     * @param training
     *            The training Instances.
     * @param numIterationsPerProportion
     *            Maximum number of Maximizer iterations per training proportion.
     * @param trainingProportions
     *            If non-null, train on increasingly larger portions of the data, e.g. new double[] {0.2, 0.5, 1.0}.
     *            This can sometimes speedup convergence. Be sure to end in 1.0 if you want to train on all the data in
     *            the end.
     * @return True if training has converged.
     */
    public boolean train(InstanceList training, int numIterationsPerProportion, double[] trainingProportions) {
        int trainingIteration = 0;
        assert trainingProportions.length > 0;
        boolean converged = false;
        for (int i = 0; i < trainingProportions.length; i++) {
            assert trainingProportions[i] <= 1.0;
            logger.info("Training on " + trainingProportions[i] + "% of the data this round.");
            if (trainingProportions[i] == 1.0) {
                converged = this.train(training, numIterationsPerProportion);
            } else {
                converged = this.train(
                        training.split(new Random(1),
                                new double[] { trainingProportions[i], 1 - trainingProportions[i] })[0],
                        numIterationsPerProportion);
            }
            trainingIteration += numIterationsPerProportion;
        }
        return converged;
    }

    public boolean trainWithFeatureInduction(InstanceList trainingData, InstanceList validationData,
            InstanceList testingData, TransducerEvaluator eval, int numIterations,
            int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction,
            double trueLabelProbThreshold, boolean clusteredFeatureInduction, double[] trainingProportions) {
        return trainWithFeatureInduction(trainingData, validationData, testingData, eval, numIterations,
                numIterationsBetweenFeatureInductions, numFeatureInductions, numFeaturesPerFeatureInduction,
                trueLabelProbThreshold, clusteredFeatureInduction, trainingProportions, "exp");
    }

    /**
     * Train a CRF using feature induction to generate conjunctions of features. Feature induction is run periodically
     * during training. The features are added to improve performance on the mislabeled instances, with the specific
     * scoring criterion given by the {@link FeatureInducer} specified by gainName
     *
     * @param training
     *            The training Instances.
     * @param validation
     *            The validation Instances.
     * @param testing
     *            The testing instances.
     * @param eval
     *            For evaluation during training.
     * @param numIterations
     *            Maximum number of Maximizer iterations.
     * @param numIterationsBetweenFeatureInductions
     *            Number of maximizer iterations between each call to the Feature Inducer.
     * @param numFeatureInductions
     *            Maximum number of rounds of feature induction.
     * @param numFeaturesPerFeatureInduction
     *            Maximum number of features to induce at each round of induction.
     * @param trueLabelProbThreshold
     *            If the model's probability of the true Label of an Instance is less than this value, it is added as an
     *            error instance to the {@link FeatureInducer}.
     * @param clusteredFeatureInduction
     *            If true, a separate {@link FeatureInducer} is constructed for each label pair. This can avoid inducing
     *            a disproportionate number of features for a single label.
     * @param trainingProportions
     *            If non-null, train on increasingly larger portions of the data (e.g. [0.2, 0.5, 1.0]. This can
     *            sometimes speedup convergence.
     * @param gainName
     *            The type of {@link FeatureInducer} to use. One of "exp", "grad", or "info" for {@link ExpGain},
     *            {@link GradientGain}, or {@link InfoGain}.
     * @return True if training has converged.
     */
    public boolean trainWithFeatureInduction(InstanceList trainingData, InstanceList validationData,
            InstanceList testingData, TransducerEvaluator eval, int numIterations,
            int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction,
            double trueLabelProbThreshold, boolean clusteredFeatureInduction, double[] trainingProportions,
            String gainName) {
        int trainingIteration = 0;
        int numLabels = crf.outputAlphabet.size();

        crf.globalFeatureSelection = trainingData.getFeatureSelection();
        if (crf.globalFeatureSelection == null) {
            // Mask out all features; some will be added later by FeatureInducer.induceFeaturesFor(.)
            crf.globalFeatureSelection = new FeatureSelection(trainingData.getDataAlphabet());
            trainingData.setFeatureSelection(crf.globalFeatureSelection);
        }
        // TODO Careful! If validationData and testingData get removed as arguments to this method
        // then the next two lines of work will have to be done somewhere.
        if (validationData != null) {
            validationData.setFeatureSelection(crf.globalFeatureSelection);
        }
        if (testingData != null) {
            testingData.setFeatureSelection(crf.globalFeatureSelection);
        }

        for (int featureInductionIteration = 0; featureInductionIteration < numFeatureInductions; featureInductionIteration++) {
            // Print out some feature information
            logger.info("Feature induction iteration " + featureInductionIteration);

            // Train the CRF
            InstanceList theTrainingData = trainingData;
            if (trainingProportions != null && featureInductionIteration < trainingProportions.length) {
                logger.info(
                        "Training on " + trainingProportions[featureInductionIteration] + "% of the data this round.");
                InstanceList[] sampledTrainingData = trainingData.split(new Random(1),
                        new double[] { trainingProportions[featureInductionIteration],
                                1 - trainingProportions[featureInductionIteration] });
                theTrainingData = sampledTrainingData[0];
                theTrainingData.setFeatureSelection(crf.globalFeatureSelection); // xxx necessary?
                logger.info("  which is " + theTrainingData.size() + " instances");
            }
            boolean converged = false;
            if (featureInductionIteration != 0) {
                // Don't train until we have added some features
                converged = this.train(theTrainingData, numIterationsBetweenFeatureInductions);
            }
            trainingIteration += numIterationsBetweenFeatureInductions;

            logger.info("Starting feature induction with " + crf.inputAlphabet.size() + " features.");

            // Create the list of error tokens, for both unclustered and clustered feature induction
            InstanceList errorInstances = new InstanceList(trainingData.getDataAlphabet(),
                    trainingData.getTargetAlphabet());
            // This errorInstances.featureSelection will get examined by FeatureInducer,
            // so it can know how to add "new" singleton features
            errorInstances.setFeatureSelection(crf.globalFeatureSelection);
            ArrayList errorLabelVectors = new ArrayList();
            InstanceList clusteredErrorInstances[][] = new InstanceList[numLabels][numLabels];
            ArrayList clusteredErrorLabelVectors[][] = new ArrayList[numLabels][numLabels];

            for (int i = 0; i < numLabels; i++) {
                for (int j = 0; j < numLabels; j++) {
                    clusteredErrorInstances[i][j] = new InstanceList(trainingData.getDataAlphabet(),
                            trainingData.getTargetAlphabet());
                    clusteredErrorInstances[i][j].setFeatureSelection(crf.globalFeatureSelection);
                    clusteredErrorLabelVectors[i][j] = new ArrayList();
                }
            }

            for (int i = 0; i < theTrainingData.size(); i++) {
                logger.info("instance=" + i);
                Instance instance = theTrainingData.get(i);
                Sequence input = (Sequence) instance.getData();
                Sequence trueOutput = (Sequence) instance.getTarget();
                assert input.size() == trueOutput.size();
                SumLattice lattice = crf.sumLatticeFactory.newSumLattice(crf, input, (Sequence) null,
                        (Transducer.Incrementor) null, (LabelAlphabet) theTrainingData.getTargetAlphabet());
                int prevLabelIndex = 0; // This will put extra error instances in this cluster
                for (int j = 0; j < trueOutput.size(); j++) {
                    Label label = ((LabelSequence) trueOutput).getLabelAtPosition(j);
                    assert label != null;
                    // System.out.println ("Instance="+i+" position="+j+"
                    // fv="+lattice.getLabelingAtPosition(j).toString(true));
                    LabelVector latticeLabeling = lattice.getLabelingAtPosition(j);
                    double trueLabelProb = latticeLabeling.value(label.getIndex());
                    int labelIndex = latticeLabeling.getBestIndex();
                    // System.out.println ("position="+j+" trueLabelProb="+trueLabelProb);
                    if (trueLabelProb < trueLabelProbThreshold) {
                        logger.info("Adding error: instance=" + i + " position=" + j + " prtrue=" + trueLabelProb
                                + (label == latticeLabeling.getBestLabel() ? "  " : " *") + " truelabel=" + label
                                + " predlabel=" + latticeLabeling.getBestLabel() + " fv="
                                + ((FeatureVector) input.get(j)).toString(true));
                        errorInstances.add(input.get(j), label, null, null);
                        errorLabelVectors.add(latticeLabeling);
                        clusteredErrorInstances[prevLabelIndex][labelIndex].add(input.get(j), label, null, null);
                        clusteredErrorLabelVectors[prevLabelIndex][labelIndex].add(latticeLabeling);
                    }
                    prevLabelIndex = labelIndex;
                }
            }
            logger.info("Error instance list size = " + errorInstances.size());
            if (clusteredFeatureInduction) {
                FeatureInducer[][] klfi = new FeatureInducer[numLabels][numLabels];
                for (int i = 0; i < numLabels; i++) {
                    for (int j = 0; j < numLabels; j++) {
                        // Note that we may see some "impossible" transitions here (like O->I in a OIB model)
                        // because we are using lattice gammas to get the predicted label, not Viterbi.
                        // I don't believe this does any harm, and may do some good.
                        logger.info("Doing feature induction for " + crf.outputAlphabet.lookupObject(i) + " -> "
                                + crf.outputAlphabet.lookupObject(j) + " with " + clusteredErrorInstances[i][j].size()
                                + " instances");
                        if (clusteredErrorInstances[i][j].size() < 20) {
                            logger.info(
                                    "..skipping because only " + clusteredErrorInstances[i][j].size() + " instances.");
                            continue;
                        }
                        int s = clusteredErrorLabelVectors[i][j].size();
                        LabelVector[] lvs = new LabelVector[s];
                        for (int k = 0; k < s; k++) {
                            lvs[k] = (LabelVector) clusteredErrorLabelVectors[i][j].get(k);
                        }
                        RankedFeatureVector.Factory gainFactory = null;
                        if (gainName.equals("exp")) {
                            gainFactory = new ExpGain.Factory(lvs, gaussianPriorVariance);
                        } else if (gainName.equals("grad")) {
                            gainFactory = new GradientGain.Factory(lvs);
                        } else if (gainName.equals("info")) {
                            gainFactory = new InfoGain.Factory();
                        }
                        klfi[i][j] = new FeatureInducer(gainFactory, clusteredErrorInstances[i][j],
                                numFeaturesPerFeatureInduction, 2 * numFeaturesPerFeatureInduction,
                                2 * numFeaturesPerFeatureInduction);
                        crf.featureInducers.add(klfi[i][j]);
                    }
                }
                for (int i = 0; i < numLabels; i++) {
                    for (int j = 0; j < numLabels; j++) {
                        logger.info("Adding new induced features for " + crf.outputAlphabet.lookupObject(i) + " -> "
                                + crf.outputAlphabet.lookupObject(j));
                        if (klfi[i][j] == null) {
                            logger.info("...skipping because no features induced.");
                            continue;
                        }
                        // Note that this adds features globally, but not on a per-transition basis
                        klfi[i][j].induceFeaturesFor(trainingData, false, false);
                        if (testingData != null) {
                            klfi[i][j].induceFeaturesFor(testingData, false, false);
                        }
                    }
                }
                klfi = null;
            } else {
                int s = errorLabelVectors.size();
                LabelVector[] lvs = new LabelVector[s];
                for (int i = 0; i < s; i++) {
                    lvs[i] = (LabelVector) errorLabelVectors.get(i);
                }

                RankedFeatureVector.Factory gainFactory = null;
                if (gainName.equals("exp")) {
                    gainFactory = new ExpGain.Factory(lvs, gaussianPriorVariance);
                } else if (gainName.equals("grad")) {
                    gainFactory = new GradientGain.Factory(lvs);
                } else if (gainName.equals("info")) {
                    gainFactory = new InfoGain.Factory();
                }
                FeatureInducer klfi = new FeatureInducer(gainFactory, errorInstances, numFeaturesPerFeatureInduction,
                        2 * numFeaturesPerFeatureInduction, 2 * numFeaturesPerFeatureInduction);
                crf.featureInducers.add(klfi);
                // Note that this adds features globally, but not on a per-transition basis
                klfi.induceFeaturesFor(trainingData, false, false);
                if (testingData != null) {
                    klfi.induceFeaturesFor(testingData, false, false);
                }
                logger.info(
                        "CRF4 FeatureSelection now includes " + crf.globalFeatureSelection.cardinality() + " features");
                klfi = null;
            }
            // This is done in CRF4.train() anyway
            // this.setWeightsDimensionAsIn (trainingData);
            //// this.growWeightsDimensionToInputAlphabet ();
        }
        return this.train(trainingData, numIterations - trainingIteration);
    }

    public void setUseHyperbolicPrior(boolean f) {
        usingHyperbolicPrior = f;
    }

    public void setHyperbolicPriorSlope(double p) {
        hyperbolicPriorSlope = p;
    }

    public void setHyperbolicPriorSharpness(double p) {
        hyperbolicPriorSharpness = p;
    }

    public double getUseHyperbolicPriorSlope() {
        return hyperbolicPriorSlope;
    }

    public double getUseHyperbolicPriorSharpness() {
        return hyperbolicPriorSharpness;
    }

    public void setGaussianPriorVariance(double p) {
        gaussianPriorVariance = p;
    }

    public double getGaussianPriorVariance() {
        return gaussianPriorVariance;
    }
    // public int getDefaultFeatureIndex () { return defaultFeatureIndex;}

    public void setUseSparseWeights(boolean b) {
        useSparseWeights = b;
    }

    public boolean getUseSparseWeights() {
        return useSparseWeights;
    }

    /**
     * Sets whether to use the 'some unsupported trick.' This trick is, if training a CRF where some training has been
     * done and sparse weights are used, to add a few weights for feaures that do not occur in the tainig data.
     * 
     * This generally leads to better accuracy at only a small memory cost.
     *
     * @param b
     *            Whether to use the trick
     */
    public void setUseSomeUnsupportedTrick(boolean b) {
        useSomeUnsupportedTrick = b;
    }

    // Serialization for CRFTrainerByLikelihood

    private static final long serialVersionUID = 1;
    private static final int CURRENT_SERIAL_VERSION = 1;
    static final int NULL_INTEGER = -1;

    /* Need to check for null pointers. */
    private void writeObject(ObjectOutputStream out) throws IOException {
        int i, size;
        out.writeInt(CURRENT_SERIAL_VERSION);
        // out.writeInt(defaultFeatureIndex);
        out.writeBoolean(usingHyperbolicPrior);
        out.writeDouble(gaussianPriorVariance);
        out.writeDouble(hyperbolicPriorSlope);
        out.writeDouble(hyperbolicPriorSharpness);
        out.writeInt(cachedGradientWeightsStamp);
        out.writeInt(cachedValueWeightsStamp);
        out.writeInt(cachedWeightsStructureStamp);
        out.writeBoolean(printGradient);
        out.writeBoolean(useSparseWeights);
        throw new IllegalStateException("Implementation not yet complete.");
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        int size, i;
        int version = in.readInt();
        // defaultFeatureIndex = in.readInt();
        usingHyperbolicPrior = in.readBoolean();
        gaussianPriorVariance = in.readDouble();
        hyperbolicPriorSlope = in.readDouble();
        hyperbolicPriorSharpness = in.readDouble();
        printGradient = in.readBoolean();
        useSparseWeights = in.readBoolean();
        throw new IllegalStateException("Implementation not yet complete.");
    }

}