com.github.steveash.jg2p.align.AlignerInferencer Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2014 Steve Ash
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.steveash.jg2p.align;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.math.DoubleMath;
import com.github.steveash.jg2p.Word;
import java.util.Collections;
import java.util.List;
/**
* Runs inference on a sequence X to determine the top-k probable alignment(s)
*
* @author Steve Ash
*/
public class AlignerInferencer {
private final GramOptions opts;
private final ProbTable probs;
private final ProbTable.Marginals margs;
public AlignerInferencer(GramOptions opts, ProbTable probs) {
this.opts = opts;
this.probs = probs;
this.margs = probs.calculateMarginals();
}
public List bestGraphemes(Word x, int bestPathCount) {
PathXTable t = new PathXTable(x.unigramCount() + 1, bestPathCount);
t.offer(0, t.make(0, -1, -1));
for (int xx = 1; xx < x.unigramCount() + 1; xx++) {
for (int i = 1; (i <= opts.getMaxXGram()) && (xx - i >= 0); i++) {
String xGram = x.gram(xx - i, i);
double margX = margs.probX(xGram);
double score = DoubleMath.log2(margX) * i;
t.extendPath(xx, xx - i, PathXTable.Entry.sample(score, i));
}
}
return createAlignments(x, t, bestPathCount);
}
private List createAlignments(Word x, PathXTable t, int bestPathCount) {
List results = Lists.newArrayListWithCapacity(bestPathCount);
Iterable lastEntries = t.get(x.unigramCount());
for (PathXTable.Entry lastEntry : lastEntries) {
if (lastEntry.score < ProbTable.minLogProb) {
continue;
}
results.add(decodePathFrom(x, t, lastEntry));
}
Collections.sort(results, Ordering.natural().reverse());
return results;
}
private Alignment decodePathFrom(Word x, PathXTable t, PathXTable.Entry entry) {
int xx = x.unigramCount();
Alignment a = new Alignment(x, entry.score);
while (xx > 0) {
String xGram = x.gram(xx - entry.xBackRef, entry.xBackRef);
a.append(xGram, "");
xx -= entry.xBackRef;
entry = t.get(xx, entry.pathBackRef);
}
return a.finish();
}
}