dist.edu.umd.hooka.Alignment Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cloud9 Show documentation
Show all versions of cloud9 Show documentation
University of Maryland's Hadoop Library
package edu.umd.hooka;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ShortBuffer;
import java.util.ArrayList;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class Alignment implements org.apache.hadoop.io.Writable,
java.lang.Iterable,
Cloneable {
public class IntPair {
public int f;
public int e;
public IntPair(int f,int e) { this.f = f; this.e = e; }
public String toString() {
return f + "-" + e;
}
}
public class AIterator implements java.util.Iterator {
int cur;
boolean[] d;
int w;
protected AIterator(Alignment a) {
this.d = a._aligned._data;
this.w = a._aligned._w;
this.cur = 0;
advance();
}
protected void advance() {
while (cur < d.length && !d[cur]) { cur++; }
}
public boolean hasNext() {
return cur < d.length;
}
public IntPair next() {
IntPair res = new IntPair(cur % w, cur / w);
cur++; advance();
return res;
}
public void remove() {
return;
}
}
protected final static class M2 implements Cloneable{
public short _w;
public boolean[] _data;
public Object clone() {
M2 res = new M2();
res._data = _data.clone();
res._w = _w;
return res;
}
public M2() { _w =0; _data = null; }
public M2(int f, int e) {
//System.err.println("x:"+x+"y:"+y);
_data = new boolean[f*e];
_w = (short)f;
}
void eraseFirstEWord() {
boolean[] nd = new boolean[_data.length - _w];
System.arraycopy(nd, 0, _data, 0, _data.length-_w);
_data = nd;
}
boolean equals(M2 other) {
if (other._w != _w) { return false; }
return java.util.Arrays.equals(_data, other._data);
}
public boolean get(int f,int e)
{
return _data[_w*e + f];
}
public void set(int f, int e)
{
//System.out.println("Set("+x+", "+y+")");
try {
_data[_w*e + f] = true;
} catch (ArrayIndexOutOfBoundsException ee) {
throw new RuntimeException("Set(" + f + ", " + e + "): caught " + ee);
}
}
public void reset(int f, int e)
{
_data[_w*e + f] = false;
}
public void readFields(DataInput in) throws IOException {
_w = in.readShort();
int size = in.readChar();
if (size < 1)
throw new RuntimeException("Error: " + size + " is not good for alignment!");
_data = new boolean[size];
int bbLen = in.readInt();
short[] faps = new short[bbLen/2];
short[] eaps = new short[bbLen/2];
ByteBuffer bb=ByteBuffer.allocate(bbLen);
in.readFully(bb.array());
ShortBuffer sb = bb.asShortBuffer();
sb.get(faps);
bb.clear();
in.readFully(bb.array());
sb = bb.asShortBuffer();
sb.get(eaps);
for (int i = 0; i i) ee--;
res.align(a, ee);
}
return res;
}
public Alignment splitEnglishWords(int i) {
Alignment res = new Alignment(_flen, _elen + 1);
for (int a = 0; a < _flen; a++)
for (int b = 0; b < _elen; b++)
if (this.aligned(a, b)) {
int ee = b;
if (b == i)
res.align(a, i);
if (b >= i)
ee++;
res.align(a, ee);
}
return res;
}
public Alignment splitForeignWords(int j) {
Alignment res = new Alignment(_flen + 1, _elen);
for (int a = 0; a < _flen; a++)
for (int b = 0; b < _elen; b++)
if (this.aligned(a, b)) {
int ee = a;
if (a == j)
res.align(j, b);
if (a >= j)
ee++;
res.align(ee, b);
}
return res;
}
public void readFields(DataInput in) throws IOException {
if (_aligned == null)
_aligned = new M2();
_aligned.readFields(in);
_flen = _aligned._w;
_elen = (short)(_aligned._data.length / _flen);
faligned = new boolean[_flen];
ealigned = new boolean[_elen];
for (int f=0; f<_flen; f++)
for (int e=0; e<_elen; e++)
if (aligned(f,e)) {
faligned[f]=true;
ealigned[e]=true;
}
}
public byte getType() {
return 1;
}
public Object clone() {
Alignment res = new Alignment();
res._aligned = (M2)_aligned.clone();
res._elen = _elen;
res._flen = _flen;
res.ealigned = ealigned.clone();
res.faligned = faligned.clone();
return res;
}
public void write(DataOutput out) throws IOException {
_aligned.write(out);
}
public boolean equals(Object o) {
if (!(o instanceof Alignment)) { return false; }
return _aligned.equals(((Alignment)o)._aligned);
}
public boolean neighborAligned(int i, int j)
{
return countNeighbors(i, j, DIAG_NEIGHBORS) > 0;
}
public boolean lneighborAligned(int i, int j)
{
for (int x=0;x= 2)
return true;
}
return false;
}
public java.util.Iterator iterator() {
return new AIterator(this);
}
public final int countNeighbors(int f, int e, int[][] rels)
{
int res = 0;
for (int x=0; x= 0 && cf < _flen &&
ce >= 0 && ce < _elen && aligned(cf, ce)) {
res++; }
}
return res;
}
public final boolean rookAligned(int i, int j)
{
return faligned[i] || ealigned[j];
}
public final boolean doubleRookAligned(int i, int j)
{
return faligned[i] && ealigned[j];
}
public final int getELength()
{
return _elen;
}
public final int getFLength()
{
return _flen;
}
public Alignment()
{
_elen = 0;
_flen = 0;
_aligned = null;
}
public Alignment(int flen, int elen)
{
_elen = (short)(elen);
_flen = (short)(flen);
alloc();
}
public Alignment(int flen, int elen, String pa) {
_elen = (short)elen;
_flen = (short)flen;
alloc();
if (pa == null || pa.length() == 0) return;
String[] aps = pa.split("\\s+");
for (String ap : aps) {
String[] pair = ap.split("-");
if (pair.length != 2)
throw new IllegalArgumentException("Malformed alignment string: " + pa);
int f = Integer.parseInt(pair[0]);
int e = Integer.parseInt(pair[1]);
if (f >= _flen || e >= _elen)
throw new IndexOutOfBoundsException("out of bounds: " + f + "," + e);
align(f, e);
}
}
private void alloc()
{
faligned = new boolean[_flen];
ealigned = new boolean[_elen];
_aligned = new M2(_flen,_elen);
}
public final boolean aligned(int f, int e)
{
return _aligned.get(f,e);
}
public final void align(int f, int e)
{
_aligned.set(f,e);
faligned[f] = true;
ealigned[e] = true;
}
public final boolean isEAligned(int e) {
return ealigned[e];
}
public final boolean isFAligned(int f) {
return faligned[f];
}
public final void unalignF(int f) {
faligned[f] = false;
for (int i=0; i<_elen; i++)
_aligned.reset(f, i);
}
public final void unalignE(int e) {
ealigned[e] = false;
for (int i=0; i<_flen; i++)
_aligned.reset(i, e);
}
public static Alignment fromGiza(String eline, String fline, boolean transpose) {
Matcher es = eline_re.matcher(fline);
es.find();
boolean skipNull = false;
if (es.group(1).equals("NULL")) {
skipNull = true;
} else {
es.reset();
}
ArrayList afwords = new ArrayList();
while (es.find()) {
// System.out.format("Str: %s aligns: '%s'\n", es.group(1), es.group(2));
afwords.add(es.group(1));
}
String[] ewords = eline.split("\\s+");
Alignment al = null;
if (transpose) {
al = new Alignment(ewords.length, afwords.size());
} else {
al = new Alignment(afwords.size(), ewords.length);
}
es.reset();
if (skipNull) { es.find(); }
int i = 0;
while (es.find()) {
String saligns = es.group(2);
if (!saligns.matches("^\\s*$")) {
String[] aligns = saligns.split("\\s+");
for (int k=0; k 0)
sb.delete(sb.length()-1, sb.length());
return sb.toString();
}
public static Alignment intersect(Alignment a1, Alignment a2)
{
Alignment a = new Alignment(a1._flen, a1._elen);
for (int i=0; i