Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*******************************************************************************
* Copyright (c) 2015-2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/
package org.nd4j.autodiff.validation;
import lombok.extern.slf4j.Slf4j;
import org.nd4j.autodiff.functions.DifferentialFunction;
import org.nd4j.autodiff.samediff.SDVariable;
import org.nd4j.autodiff.samediff.SameDiff;
import org.nd4j.autodiff.samediff.VariableType;
import org.nd4j.autodiff.samediff.internal.SameDiffOp;
import org.nd4j.autodiff.samediff.internal.Variable;
import org.nd4j.base.Preconditions;
import org.nd4j.linalg.api.buffer.DataType;
import org.nd4j.linalg.api.iter.NdIndexIterator;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.factory.Nd4j;
import java.lang.reflect.Field;
import java.util.*;
/**
* Gradient check utility
*
* @author Adam Gibson
*/
@Slf4j
public class GradCheckUtil {
private static final boolean DEFAULT_PRINT = true;
private static final boolean DEFAULT_EXIT_FIRST_FAILURE = false;
private static final boolean DEFAULT_DEBUG_MODE = false;
private static final double DEFAULT_EPS = 1e-5;
private static final double DEFAULT_MAX_REL_ERROR = 1e-5;
private static final double DEFAULT_MIN_ABS_ERROR = 1e-6;
public static boolean checkGradients(TestCase t){
return checkGradients(t.sameDiff(), t.placeholderValues(), t.gradCheckEpsilon(), t.gradCheckMaxRelativeError(), t.gradCheckMinAbsError(),
t.gradCheckPrint(), t.gradCheckDefaultExitFirstFailure(), false, t.gradCheckDebugMode(), t.gradCheckSkipVariables(), t.gradCheckMask());
}
public static boolean checkGradients(SameDiff sd, Map placeholderValues, String... skipVariables){
Set skip = null;
if(skipVariables != null){
skip = new HashSet<>();
Collections.addAll(skip, skipVariables);
}
return checkGradients(sd, placeholderValues, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, DEFAULT_PRINT, DEFAULT_EXIT_FIRST_FAILURE,
false, DEFAULT_DEBUG_MODE, skip, null);
}
public static boolean checkGradients(SameDiff sd, Map placeholderValues, boolean print, boolean exitOnFirstFailure){
return checkGradients(sd, placeholderValues, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, print, exitOnFirstFailure);
}
public static boolean checkGradients(SameDiff sd, Map placeholderValues, double eps, double maxRelError, double minAbsError, boolean print,
boolean exitOnFirstFailure) {
return checkGradients(sd, placeholderValues, eps, maxRelError, minAbsError, print, exitOnFirstFailure, false, DEFAULT_DEBUG_MODE, null, null);
}
public static boolean checkGradients(SameDiff sd, Map placeholderValues, double eps, double maxRelError, double minAbsError, boolean print,
boolean exitOnFirstFailure, boolean skipValidation, boolean debugMode, Set skipVariables, Map gradCheckMask){
boolean debugBefore = sd.isDebugMode();
if(debugMode){
sd.enableDebugMode();
}
//Validation sanity checks:
if(!skipValidation){
validateInternalState(sd, true);
}
//Check data type:
if(Nd4j.dataType() != DataType.DOUBLE){
throw new IllegalStateException("Data type must be set to double");
}
Set fnOutputs = new HashSet<>();
for(DifferentialFunction f : sd.functions()){
for(SDVariable s : f.outputVariables()){
fnOutputs.add(s.getVarName());
}
}
//Check that all non-Array type SDVariables have arrays associated with them
for(Variable v : sd.getVariables().values()){
if(v.getVariable().getVariableType() == VariableType.ARRAY){
//OK if variable is not available for this, it'll be created during forward pass
continue;
}
if(v.getVariable().getArr(true) == null){
throw new IllegalStateException("Variable \"" + v.getName() + "\" does not have array associated with it");
}
}
//Do forward pass, check that output is a scalar:
List lossFnVariables = sd.getLossVariables();
Preconditions.checkState(lossFnVariables != null && !lossFnVariables.isEmpty(), "Expected 1 or more loss function variables for gradient check, got %s", lossFnVariables);
//TODO also check that all inputs are non-zero (otherwise: consider out = sum(x * y) with all x and y being 0
// in this case, gradients of x and y are all 0 too
//Collect variables to get gradients for - we want placeholders AND variables
Set gradVarNames = new HashSet<>();
for(Variable v : sd.getVariables().values()){
if(v.getVariable().dataType().isFPType() && (v.getVariable().getVariableType() == VariableType.VARIABLE || v.getVariable().getVariableType() == VariableType.PLACEHOLDER)){
SDVariable g = v.getVariable().getGradient();
Preconditions.checkNotNull(g, "No gradient variable found for variable %s", v.getVariable());
gradVarNames.add(g.getVarName());
}
}
sd.execBackwards(placeholderValues, new ArrayList<>(gradVarNames));
Map grad = new HashMap<>();
for(SDVariable v : sd.variables()){
if (fnOutputs.contains(v.getVarName())) {
//This is not an input to the graph
continue;
}
if(!v.hasGradient()){
//Skip non-fp variables, or variables that don't impact loss function value
continue;
}
SDVariable g = sd.grad(v.getVarName());
if(g == null){
throw new IllegalStateException("Null gradient variable for \"" + v.getVarName() + "\"");
}
INDArray ga = g.getArr();
if(ga == null){
throw new IllegalStateException("Null gradient array encountered for variable: " + v.getVarName());
}
if(!Arrays.equals(v.getArr().shape(), g.getArr().shape())){
throw new IllegalStateException("Gradient shape does not match variable shape for variable \"" +
v.getVarName() + "\": shape " + Arrays.toString(v.getArr().shape()) + " vs. gradient shape " +
Arrays.toString(ga.shape()));
}
grad.put(v.getVarName(), ga.dup());
}
//Validate gradients for each variable:
int totalNFailures = 0;
int totalCount = 0;
double maxError = 0.0;
for(SDVariable s : sd.variables()){
if (fnOutputs.contains(s.getVarName())) {
//This is not an input to the graph
continue;
}
if(skipVariables != null && skipVariables.contains(s.getVarName())){
log.info("Grad check: skipping variable \"{}\"", s.getVarName());
continue;
}
String name = s.getVarName();
INDArray a = s.getArr();
long n = a.length();
if(print){
log.info("Starting test for variable \"{}\" with {} values", s.getVarName(), n);
}
NdIndexIterator iter = new NdIndexIterator('c',a.shape());
INDArray varMask = (gradCheckMask == null ? null : gradCheckMask.get(s.getVarName()));
if(varMask != null){
Preconditions.checkState(a.equalShapes(varMask), "Variable \"%s\": Gradient check mask and array shapes must be equal: got %s vs. mask shape %s", s.getVarName(), a.shape(), varMask.shape());
Preconditions.checkState(varMask.dataType() == DataType.BOOL, "Variable \"%s\": Gradient check mask must be BOOLEAN datatype, got %s", s.getVarName(), varMask.dataType());
}
int i=0;
while(iter.hasNext()){
long[] idx = iter.next();
String strIdx = null;
if(print){
strIdx = Arrays.toString(idx).replaceAll(" ","");
}
boolean maskValue = (varMask == null || (varMask.getDouble(idx) != 0));
if(!maskValue){
//Skip this specific entry (masked out)
continue;
}
totalCount++;
double orig = a.getDouble(idx);
a.putScalar(idx, orig+eps);
double scorePlus = 0.0;
Map m = sd.exec(placeholderValues, lossFnVariables);//.get(outName).sumNumber().doubleValue();
for(INDArray arr : m.values()){
scorePlus += arr.sumNumber().doubleValue();
}
a.putScalar(idx, orig-eps);
m = sd.exec(placeholderValues, lossFnVariables);
double scoreMinus = 0.0;
for(INDArray arr : m.values()){
scoreMinus += arr.sumNumber().doubleValue();
}
a.putScalar(idx, orig);
double numericalGrad = (scorePlus - scoreMinus) / (2 * eps);
INDArray aGrad = grad.get(s.getVarName());
double analyticGrad = aGrad.getDouble(idx);
if (Double.isInfinite(numericalGrad) || Double.isNaN(numericalGrad)) {
throw new IllegalStateException("Numerical gradient was " + numericalGrad + " for variable \"" + name
+ "\", parameter " + i + " of " + n + " (position: " + strIdx + ")");
}
if (Double.isInfinite(analyticGrad) || Double.isNaN(analyticGrad)) {
throw new IllegalStateException("Analytic (SameDiff) gradient was " + analyticGrad + " for variable \"" + name
+ "\", parameter " + i + " of " + n + " (position: " + strIdx + ")");
}
double relError;
if(numericalGrad == 0.0 && analyticGrad == 0.0){
relError = 0.0;
} else {
relError = Math.abs(analyticGrad - numericalGrad) / (Math.abs(Math.abs(analyticGrad) + Math.abs(numericalGrad)));
}
if (relError > maxError)
maxError = relError;
if (relError > maxRelError || Double.isNaN(relError)) {
double absError = Math.abs(analyticGrad - numericalGrad);
if (absError < minAbsError) {
if(print) {
log.info("Param " + i + " (" + name + strIdx + ") passed: grad= " + analyticGrad
+ ", numericalGrad= " + numericalGrad + ", relError= " + relError
+ "; absolute error = " + absError + " < minAbsoluteError = " + minAbsError);
}
} else {
if (print)
log.info("Param " + i + " (" + name + strIdx + ") FAILED: grad= " + analyticGrad
+ ", numericalGrad= " + numericalGrad + ", relError= " + relError
+ ", absError=" + absError
+ ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
if (exitOnFirstFailure)
return false;
totalNFailures++;
}
} else if (print) {
log.info("Param " + i + " (" + name + strIdx + ") passed: grad= " + analyticGrad + ", numericalGrad= "
+ numericalGrad + ", relError= " + relError);
}
i++;
}
}
if (print) {
int nPass = totalCount - totalNFailures;
log.info("GradCheckUtil.checkGradients(): " + totalCount + " params checked, " + nPass + " passed, "
+ totalNFailures + " failed. Largest relative error = " + maxError);
}
if(debugMode && !debugBefore){
sd.disableDebugging();
}
return totalNFailures == 0;
}
public static void validateInternalState(SameDiff sd, boolean generateAndCheckGradFn){
/*
Some conditions that should always hold:
1. incomingArgsReverse and outgoingArgsReverse:
(a) all differential functions should be present here exactly once
(b) The values should be valid variable names
2. variableMap: should contain all variables, and only all variables
3. functionArgsFor should contain all variables, all functions... same for functionOutputsFor
4. Gradient function: should contain all of the existing functions, and more
*/
DifferentialFunction[] dfs = sd.functions();
List vars = sd.variables();
Set varsSet = new HashSet<>(vars);
Preconditions.checkState(vars.size() == varsSet.size(), "Duplicate variables in variables() list");
Set varSetStr = new HashSet<>();
for(SDVariable v : vars){
if(varSetStr.contains(v.getVarName())){
throw new IllegalStateException("Variable with name " + v.getVarName() + " already encountered");
}
varSetStr.add(v.getVarName());
}
//1. Check incomingArgsReverse and outgoingArgsReverse
Map ops = sd.getOps();
Preconditions.checkState(dfs.length == ops.size(), "All functions not present in incomingArgsReverse");
for(DifferentialFunction df : dfs){
Preconditions.checkState(ops.containsKey(df.getOwnName()), df.getOwnName() + " not present in ops map");
List str = ops.get(df.getOwnName()).getInputsToOp();
if(str != null) {
for (String s : str) {
Preconditions.checkState(varSetStr.contains(s), "Variable " + s + " in op inputs not a known variable name");
}
}
str = ops.get(df.getOwnName()).getOutputsOfOp();
if(str != null) {
for (String s : str) {
Preconditions.checkState(varSetStr.contains(s), "Variable " + s + " in op outputs not a known variable name");
}
}
}
//Also check that outgoingArgsReverse values are unique: i.e., shouldn't have the same op appearing multiple times
Map seen = new HashMap<>();
for(Map.Entry e : ops.entrySet()){
List varNames = e.getValue().getOutputsOfOp();
if(varNames != null) {
for (String s : varNames) {
if (seen.containsKey(s)) {
throw new IllegalStateException("Already saw variable \"" + s + "\" as output for op \"" + seen.get(s)
+ "\": expected variables to be present as an output only once; also seen as output for op \"" +
e.getKey() + "\"");
}
seen.put(s, e.getKey());
}
}
}
//2. Check variableMap
Map variableMap = sd.getVariables();
Preconditions.checkState(vars.size() == variableMap.size(), "Variable map size check failed");
for(Map.Entry e : variableMap.entrySet()){
Preconditions.checkState(e.getKey().equals(e.getValue().getVariable().getVarName()), "Name not equal");
}
if(generateAndCheckGradFn) {
//3. Check gradient function
if(sd.getFunction("grad") == null){
sd.createGradFunction();
}
SameDiff gradFn = sd.getFunction("grad");
//Run same validation for gradient fn...
validateInternalState(gradFn, false);
//Check that all original functions are present in the gradient function
for(DifferentialFunction dfOrig : dfs){
Preconditions.checkNotNull(gradFn.getFunctionById(dfOrig.getOwnName()), "DifferentialFunction " + dfOrig.getOwnName()
+ " from original SameDiff instance not present in grad fn");
}
}
}
private static T getObject(String fieldName, Object from, Class fromClass){
try {
Field f = fromClass.getDeclaredField(fieldName);
f.setAccessible(true);
return (T)f.get(from);
} catch (Exception e){
throw new RuntimeException(e);
}
}
}