com.expleague.ml.models.gpf.GPFGbrtModel Maven / Gradle / Ivy
package com.expleague.ml.models.gpf;
import com.expleague.commons.math.vectors.Mx;
import com.expleague.commons.math.vectors.VecIterator;
import com.expleague.commons.math.vectors.impl.vectors.ArrayVec;
import org.jetbrains.annotations.NotNull;
import com.expleague.commons.math.vectors.Vec;
import com.expleague.commons.math.vectors.impl.mx.VecBasedMx;
import com.expleague.commons.math.vectors.impl.vectors.SparseVec;
import java.util.*;
/**
* User: irlab
* Date: 14.05.14
*/
public class GPFGbrtModel extends GPFModel.Stub implements GPFModel {
// parameters for eval_L_and_Gradient
public double PRUNE_A_THRESHOLD = 1E-5; //0.01;
private ClickProbabilityModel clickProbabilityModel;
private AttractivenessModel attractivenessModel;
public GPFGbrtModel() {
}
public GPFGbrtModel(final GPFGbrtModel model) {
this.MAX_PATH_LENGTH = model.MAX_PATH_LENGTH;
this.PRUNE_A_THRESHOLD = model.PRUNE_A_THRESHOLD;
this.clickProbabilityModel = model.clickProbabilityModel;
this.attractivenessModel = model.attractivenessModel;
}
public static class SessionFeatureRepresentation {
final Session ses;
public final int f_count; // number of different attractiveness functions "f", roughly it is (number of edges) * (2 = click_s)
final ArrayList keys; // f_count rows, 4 columns; keys.get(i) == [start, end, click_s]
public final VecBasedMx features; // f_count rows, NFEATS columns; features.row(i) is a feature representation of attractiveness
final int blocks_length; // ses.getBlocks().length;
private final Map keys_hash; // i == keys_hash.get(keys.get(i))
static class FeatureKey {
int s;
int e;
int click_s;
FeatureKey(final int s, final int e, final int click_s) {
this.s = s;
this.e = e;
this.click_s = click_s;
}
@Override
public String toString() {
return "FeatureKey{(" + s + "," + click_s + "->" + e + '}';
}
@Override
public boolean equals(final Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
final FeatureKey that = (FeatureKey) o;
if (click_s != that.click_s) return false;
if (e != that.e) return false;
if (s != that.s) return false;
return true;
}
@Override
public int hashCode() {
int result = s;
result = 31 * result + e;
result = 31 * result + click_s;
return result;
}
}
public SessionFeatureRepresentation(final Session ses, final AttractivenessModel fmodel) {
this.ses = ses;
keys = new ArrayList();
for (int i = 0; i < ses.getBlocks().length; i++) {
for (final int j: ses.getEdgesFrom(i))
keys.add(new FeatureKey(i, j, 0));
if (ses.hasClickOn(i)) {
for (final int j: ses.getEdgesFrom(i))
keys.add(new FeatureKey(i, j, 1));
}
}
keys.trimToSize();
f_count = keys.size();
features = new VecBasedMx(f_count, fmodel.getEdgeFeatCount());
for (int i = 0; i < keys.size(); i++) {
final FeatureKey key = keys.get(i);
final SparseVec edgeFeatures = fmodel.feats(ses, key.s, key.e, key.click_s);
for (final VecIterator it = edgeFeatures.nonZeroes(); it.advance(); )
features.set(i, it.index(), it.value());
}
keys_hash = new HashMap();
for (int i = 0; i < keys.size(); i++) {
keys_hash.put(keys.get(i), i);
}
blocks_length = ses.getBlocks().length;
}
}
static class SessionGradientValue {
double loglikelihood = 0.; // log(P(session)) == sum of loglikelihood over observations
ArrayVec gradient;
int nObservations = 0;
}
SessionGradientValue eval_L_and_dL_df(final SessionFeatureRepresentation sesf, final boolean do_eval_gradient, @NotNull final Vec f) {
final SessionGradientValue ret = new SessionGradientValue();
final Session ses = sesf.ses;
ret.nObservations = ses.getClick_indexes().length + 1;
if (do_eval_gradient)
ret.gradient = new ArrayVec(sesf.f_count);
// 1 & для каждой пары блоков $i$, $j$ вычислить $f(i,j)$; третья координата - наличие клика c_i
if (f.dim() != sesf.f_count)
throw new IllegalArgumentException("f.xdim() != sesf.f_count:" + f.dim() + " != " + sesf.f_count);
// f = new ArrayVec(sesf.f_count);
// for (int i = 0; i < sesf.f_count; i++)
// f.set(i, f_model.value(sesf.features.row(i)));
// 2 & для каждого блока $i$ вычислить норму $\sum_k f(i, k)$; Вторая координата - наличие клика c_i
final Mx sum_f_i_k = new VecBasedMx(sesf.blocks_length, 2);
for (int i = 0; i < sesf.f_count; i++) {
final SessionFeatureRepresentation.FeatureKey key = sesf.keys.get(i);
sum_f_i_k.adjust(key.s, key.click_s, f.get(i));
}
// 3 & для каждого блока $i$ вычислить $P(c=0|r_i)$
final Session.Block[] blocks = ses.getBlocks();
final double[] P_noclick_i = new double[sesf.blocks_length];
for (int i = 0; i < sesf.blocks_length; i++)
P_noclick_i[i] = 1. - getClickGivenViewProbability((Blk)blocks[i]);
// 4 & для каждой пары блоков $i$, $j$ вычислить $P(i \to j)$; третья координата - наличие клика c_i
final ArrayVec P_i_j = new ArrayVec(sesf.f_count);
for (int i = 0; i < sesf.f_count; i++) {
final SessionFeatureRepresentation.FeatureKey key = sesf.keys.get(i);
P_i_j.set(i, f.get(i) / sum_f_i_k.get(key.s, key.click_s));
}
VecBasedMx dPji_dfjm = null; // dPji_dfjm(pi, fi) = dP(sesf.keys[pi]{ s, click_s -> e }) / df(sesf.keys[fi]{ s, click_s -> e })
if (do_eval_gradient) {
// 8 & для каждой пары блоков $i$, $j$ вычислить $\frac{d}{df}P(i,j)$
dPji_dfjm = new VecBasedMx(sesf.f_count, sesf.f_count); // fPji_dfjm(pi, fi) = dP(sesf.keys[pi]{ s, click_s -> e }) / df(sesf.keys[fi]{ s, click_s -> e })
for (int pi = 0; pi < sesf.f_count; pi++) {
final double p_pi = P_i_j.get(pi);
final double val2 = - p_pi * p_pi / f.get(pi);
final SessionFeatureRepresentation.FeatureKey pi_key = sesf.keys.get(pi);
for (final int j: ses.getEdgesFrom(pi_key.s)) {
final int fi = sesf.keys_hash.get(new SessionFeatureRepresentation.FeatureKey(pi_key.s, j, pi_key.click_s));
dPji_dfjm.set(pi, fi, val2);
}
dPji_dfjm.adjust(pi, pi, p_pi / f.get(pi));
}
}
// далее -- вычисления для каждого клика в отдельности
final int[] observations = new int[ses.getClick_indexes().length + 2];
observations[0] = Session.Q_INDEX;
for (int i = 0; i < ses.getClick_indexes().length; i++)
observations[i+1] = ses.getClick_indexes()[i];
observations[observations.length - 1] = Session.E_INDEX;
for (int eindex = 1; eindex < observations.length; eindex++) {
// 5 & для всех блоков $i$ и длин $t$ вычислить $A(s,i,t)$
final int s = observations[eindex - 1];
final int e = observations[eindex];
final int click_s = s == 0 ? 0 : 1;
// $A(i,j,t)$ вероятность пройти без из блока $i$ в блок $j$ за $t$ шагов не сделав по пути ни одного клика
final VecBasedMx A = new VecBasedMx(blocks.length, MAX_PATH_LENGTH + 1);
// Критерий ранней остановки вычисления матрицы $A$: если
// A(s,i,t+1) < \varepsilon \max_{i,u<=t} A(s,i,u)
// то все пути длины $>t$ маловероятны по сравнению с путями длины $\le t$, и значения $A$ и $\frac{dA}{d\Theta}$ можно не вычислять
int max_path_length_pruned = MAX_PATH_LENGTH;
// A(s,i,1) =& P(s\to i) \cdot P(c=0 | i) &\quad \forall i
double max_A_lte_t = 0.;
for (final int i: ses.getEdgesFrom(s)) {
final double val = P_i_j.get(sesf.keys_hash.get(new SessionFeatureRepresentation.FeatureKey(s, i, click_s))) * P_noclick_i[i];
A.set(i, 1, val);
max_A_lte_t = Math.max(max_A_lte_t, val);
}
for (int t = 1; t < MAX_PATH_LENGTH; t++) {
double max_A_tp1 = 0;
for (int i = 0; i < blocks.length; i++) {
double val = 0;
for (final int j: ses.getEdgesTo(i))
val += A.get(j, t) * P_i_j.get(sesf.keys_hash.get(new SessionFeatureRepresentation.FeatureKey(j, i, 0)));
val *= P_noclick_i[i];
A.set(i, t + 1, val);
max_A_tp1 = Math.max(max_A_tp1, val);
}
if (max_A_lte_t * PRUNE_A_THRESHOLD > max_A_tp1) {
max_path_length_pruned = t;
break;
}
}
// 6 & вычислить $P(O_{d,\nu})$
double sumA_e_t = 0.;
for (int t = 1; t <= max_path_length_pruned; t++)
sumA_e_t += A.get(e, t);
final double observation_prob = sumA_e_t * (1 - P_noclick_i[e]) / P_noclick_i[e];
ret.loglikelihood += Math.log(observation_prob);
if (do_eval_gradient) {
// 9 & для всех блоков $i$ и длин $t$ вычислить $\frac{dA}{df_{fi}}(s,i,t)$
final Tensor3 dA_df = new Tensor3(blocks.length, max_path_length_pruned + 1, sesf.f_count);
// \frac{dA}{d\Theta}(s,i,1) =& P(c=0 | i) \cdot \frac{d}{d\Theta}P(s\to i) + P(s\to i) \cdot \frac{d}{d\Theta} P(c=0|i) \qquad \forall i
for (final int i: ses.getEdgesFrom(s)) {
final ArrayVec val = (ArrayVec)dPji_dfjm.row(sesf.keys_hash.get(new SessionFeatureRepresentation.FeatureKey(s, i, click_s)));
val.scale(P_noclick_i[i]);
dA_df.setRow(i, 1, val);
}
// \frac{dA}{df}(s,i,t+1) =&
// P(c=0|i) \sum_j \left( \frac{dA}{df}(s,j,t) \cdot P(j\to i)
// + A(s,j,t) \cdot \frac{d}{d\Theta} P(j\to i) \right)
final ArrayVec sum = new ArrayVec(sesf.f_count);
final ArrayVec val1 = new ArrayVec(sesf.f_count);
final ArrayVec val2 = new ArrayVec(sesf.f_count);
for (int t = 1; t < max_path_length_pruned; t++) {
for (int i = 0; i < blocks.length; i++) {
sum.fill(0);
for (final int j: ses.getEdgesTo(i)) {
final int f_ji0_index = sesf.keys_hash.get(new SessionFeatureRepresentation.FeatureKey(j, i, 0));
// sum += A.get(j, t) * P_i_j.get(j, i, 0);
val1.assign(dA_df.getRow(j, t));
val1.scale(P_i_j.get(f_ji0_index));
val2.assign((ArrayVec)dPji_dfjm.row(f_ji0_index));
val2.scale(A.get(j, t));
sum.add(val1);
sum.add(val2);
}
sum.scale(P_noclick_i[i]);
// A.set(i, t + 1, sum * P_noclick_i[i]);
dA_df.setRow(i, t + 1, sum);
}
}
// 10 & вычислить градиент $\frac{d}{d\Theta} \log P(O_{d,\nu})$
final ArrayVec dPlogO_df = new ArrayVec(sesf.f_count);
// for (int t = 1; t <= MAX_PATH_LENGTH; t++)
// observation_prob += A.get(e, t);
// observation_prob *= (1 - P_noclick_i[e]) / P_noclick_i[e];
// ret.observation_probabilities.set(e-1, observation_prob);
for (int t = 1; t <= max_path_length_pruned; t++)
dPlogO_df.add(dA_df.getRow(e, t));
dPlogO_df.scale( 1./sumA_e_t );
ret.gradient.add(dPlogO_df);
}
}
return ret;
}
@Override
public String explainTheta() {
return "NOT IMPLEMENTED";
}
@Override
public String toString() {
return "NOT IMPLEMENTED";
}
public ClickProbabilityModel getClickProbabilityModel() {
return clickProbabilityModel;
}
public void setClickProbabilityModel(final ClickProbabilityModel clickProbabilityModel) {
this.clickProbabilityModel = clickProbabilityModel;
}
public AttractivenessModel getAttractivenessModel() {
return attractivenessModel;
}
public void setAttractivenessModel(final AttractivenessModel attractivenessModel) {
this.attractivenessModel = attractivenessModel;
}
@Override
public double getClickGivenViewProbability(final Blk b) {
return clickProbabilityModel.getClickGivenViewProbability(b);
}
@Override
public double eval_f(final Session ses, final int s, final int e, final int click_s) {
return attractivenessModel.eval_f(ses, s, e, click_s);
}
@Override
public SparseVec feats(final Session ses, final int s, final int e, final int click_s) {
return attractivenessModel.feats(ses, s, e, click_s);
}
@Override
public int getEdgeFeatCount() {
return attractivenessModel.getEdgeFeatCount();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy