All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dist.edu.umd.hooka.Alignment Maven / Gradle / Ivy

There is a newer version: 2.0.1
Show newest version
package edu.umd.hooka;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ShortBuffer;
import java.util.ArrayList;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

public class Alignment implements org.apache.hadoop.io.Writable, 
  java.lang.Iterable,
  Cloneable {
	public class IntPair {
		public int f;
		public int e;
		public IntPair(int f,int e) { this.f = f; this.e = e; }
		public String toString() {
			return f + "-" + e;
		}
	}
	public class AIterator implements java.util.Iterator {
		int cur;
		boolean[] d;
		int w;
	
		protected AIterator(Alignment a) {
			this.d = a._aligned._data;
			this.w = a._aligned._w;
			this.cur = 0;
			advance();
		}
		
		protected void advance() {
			while (cur < d.length && !d[cur]) { cur++; }
		}
		
		public boolean hasNext() {
			return cur < d.length;
		}
		
		public IntPair next() {
			IntPair res = new IntPair(cur % w, cur / w);
			cur++; advance();
			return res;
		}
		
		public void remove() {
			return;
		}
		
	}
	
	protected final static class M2 implements Cloneable{
		public short _w;
		public boolean[] _data;
		public Object clone() {
			M2 res = new M2();
			res._data = _data.clone();
			res._w = _w;
			return res;
		}
		public M2() { _w =0; _data = null; }
		public M2(int f, int e) {
			//System.err.println("x:"+x+"y:"+y);
			_data = new boolean[f*e];
			_w = (short)f;
		}
		void eraseFirstEWord() {
			boolean[] nd = new boolean[_data.length - _w];
			System.arraycopy(nd, 0, _data, 0, _data.length-_w);
			_data = nd;
		}
		boolean equals(M2 other) {
			if (other._w != _w) { return false; }
			return java.util.Arrays.equals(_data, other._data);
		}
		public boolean get(int f,int e)
		{
			return _data[_w*e + f];
		}
		public void set(int f, int e)
		{
			//System.out.println("Set("+x+", "+y+")");
			try {
			 _data[_w*e + f] = true;
			} catch (ArrayIndexOutOfBoundsException ee) {
				throw new RuntimeException("Set(" + f + ", " + e + "): caught " + ee);
			}
		}
		public void reset(int f, int e)
		{
			_data[_w*e + f] = false;
		}
		public void readFields(DataInput in) throws IOException {
			_w = in.readShort();
			int size = in.readChar();
			if (size < 1)
				throw new RuntimeException("Error: " + size + " is not good for alignment!");
			_data = new boolean[size];
			int bbLen = in.readInt();
			short[] faps = new short[bbLen/2];
			short[] eaps = new short[bbLen/2];
			ByteBuffer bb=ByteBuffer.allocate(bbLen);
			in.readFully(bb.array());
			ShortBuffer sb = bb.asShortBuffer();
			sb.get(faps);
			bb.clear();
			in.readFully(bb.array());
			sb = bb.asShortBuffer();
			sb.get(eaps);
			for (int i = 0; i i) ee--;
					res.align(a, ee);
				}
		return res;
	}
	public Alignment splitEnglishWords(int i) {
		Alignment res = new Alignment(_flen, _elen + 1);
		for (int a = 0; a < _flen; a++)
			for (int b = 0; b < _elen; b++)
				if (this.aligned(a, b)) {
					int ee = b;
					if (b == i)
						res.align(a, i);
					if (b >= i)
						ee++;
					res.align(a, ee);
				}
		return res;
	}	
	public Alignment splitForeignWords(int j) {
		Alignment res = new Alignment(_flen + 1, _elen);
		for (int a = 0; a < _flen; a++)
			for (int b = 0; b < _elen; b++)
				if (this.aligned(a, b)) {
					int ee = a;
					if (a == j)
						res.align(j, b);
					if (a >= j)
						ee++;
					res.align(ee, b);
				}
		return res;
	}	
	public void readFields(DataInput in) throws IOException {
		if (_aligned == null)
			_aligned = new M2();
		_aligned.readFields(in);
		_flen = _aligned._w;
		_elen = (short)(_aligned._data.length / _flen);
		faligned = new boolean[_flen];
		ealigned = new boolean[_elen];
		for (int f=0; f<_flen; f++)
			for (int e=0; e<_elen; e++)
				if (aligned(f,e)) {
					faligned[f]=true;
					ealigned[e]=true;
				}
		
	}
	
	public byte getType() {
		return 1;
	}
	
	public Object clone() {
		Alignment res = new Alignment();
		res._aligned = (M2)_aligned.clone();
		res._elen = _elen;
		res._flen = _flen;
		res.ealigned = ealigned.clone();
		res.faligned = faligned.clone();
		return res;
	}

	public void write(DataOutput out) throws IOException {
		_aligned.write(out);
	}

	public boolean equals(Object o) {
		if (!(o instanceof Alignment)) { return false; }
		return _aligned.equals(((Alignment)o)._aligned);
	}
	public boolean neighborAligned(int i, int j)
	{
		return countNeighbors(i, j, DIAG_NEIGHBORS) > 0;
	}
	public boolean lneighborAligned(int i, int j)
	{
		for (int x=0;x= 2)
				return true;
		}
		return false;
	}
	
	public java.util.Iterator iterator() {
		return new AIterator(this);
	}

	public final int countNeighbors(int f, int e, int[][] rels)
	{
		int res = 0;
		for (int x=0; x= 0 && cf < _flen && 
				ce >= 0 && ce < _elen && aligned(cf, ce)) {
				res++; }
		} 
		return res;
	}
	public final boolean rookAligned(int i, int j)
	{
		return faligned[i] || ealigned[j]; 
	}
	public final boolean doubleRookAligned(int i, int j)
	{
		return faligned[i] && ealigned[j]; 
	}
	public final int getELength()
	{
		return _elen;
	}
	public final int getFLength()
	{
		return _flen;
	}
	public Alignment()
	{
		_elen = 0;
		_flen = 0;
		_aligned = null;
	}
	public Alignment(int flen, int elen)
	{		
		_elen = (short)(elen);
		_flen = (short)(flen);
		alloc();
	}
	public Alignment(int flen, int elen, String pa) {
		_elen = (short)elen;
		_flen = (short)flen;
		alloc();
		if (pa == null || pa.length() == 0) return;
		String[] aps = pa.split("\\s+");
		for (String ap : aps) {
			String[] pair = ap.split("-");
			if (pair.length != 2)
				throw new IllegalArgumentException("Malformed alignment string: " + pa);
			int f = Integer.parseInt(pair[0]);
			int e = Integer.parseInt(pair[1]);
			if (f >= _flen || e >= _elen)
				throw new IndexOutOfBoundsException("out of bounds: " + f + "," + e);
			align(f, e);
		}
	}
	private void alloc()
	{
		faligned = new boolean[_flen];
		ealigned = new boolean[_elen];
		_aligned = new M2(_flen,_elen);
	}
	public final boolean aligned(int f, int e)
	{
		return _aligned.get(f,e);
	}
	public final void align(int f, int e)
	{
		_aligned.set(f,e);
		faligned[f] = true;
		ealigned[e] = true;
	}
	public final boolean isEAligned(int e) {
		return ealigned[e];
	}
	public final boolean isFAligned(int f) {
		return faligned[f];
	}
	
	public final void unalignF(int f) {
		faligned[f] = false;
		for (int i=0; i<_elen; i++)
			_aligned.reset(f, i);
	}

	public final void unalignE(int e) {
		ealigned[e] = false;
		for (int i=0; i<_flen; i++)
			_aligned.reset(i, e);
	}

	public static Alignment fromGiza(String eline, String fline, boolean transpose) {
		Matcher es = eline_re.matcher(fline);
		es.find();
		boolean skipNull = false;
		if (es.group(1).equals("NULL")) {
			skipNull = true;
		} else {
			es.reset();
		}
		ArrayList afwords = new ArrayList();
		while (es.find()) {
//			System.out.format("Str: %s  aligns: '%s'\n", es.group(1), es.group(2));
			afwords.add(es.group(1));
		}
		String[] ewords = eline.split("\\s+");
		Alignment al = null;
		if (transpose) {
			al = new Alignment(ewords.length, afwords.size());
		} else {
			al = new Alignment(afwords.size(), ewords.length);
		}
		es.reset();
		if (skipNull) { es.find(); }
		int i = 0;
		while (es.find()) {
			String saligns = es.group(2);
			if (!saligns.matches("^\\s*$")) {
				String[] aligns = saligns.split("\\s+");
				for (int k=0; k 0)
			sb.delete(sb.length()-1, sb.length());
		return sb.toString();
	}
	
	public static Alignment intersect(Alignment a1, Alignment a2)
	{
		Alignment a = new Alignment(a1._flen, a1._elen);
		for (int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy