All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.steps.gcaligner.DpMatrix Maven / Gradle / Ivy

There is a newer version: 1.47.0
Show newest version
/**
 *  Copyright 2009 Welocalize, Inc. 
 *  
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  
 *  You may obtain a copy of the License at 
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 */

/*===========================================================================
 Additonal changes Copyright (C) 2009 by the Okapi Framework contributors
 ===========================================================================*/

package net.sf.okapi.steps.gcaligner;

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

/**
 * DpMatrix is a matrix used in dynamic programming. Two sequences to be aligned are passed to this class in a form of
 * List of Object. Each cell (DpMatrixCell) of the matrix records the alignment score of the two elements. The score is
 * calculated by DpFunction class that is passed to the constructor. The alignment result is returned as a List of
 * DpMatrixCell.
 */

public class DpMatrix {
	// matrix
	private DpMatrixCell[][] m_matrix;

	// sequences to be aligned
	private List sequenceX;
	private List sequenceY;

	// DpFunction class
	private DpFunction m_dpFunction;

	// constructor
	public DpMatrix(List sequenceX, List sequenceY, DpFunction p_dpFunction) {
		this.sequenceX = sequenceX;
		this.sequenceY = sequenceY;
		m_dpFunction = p_dpFunction;
		m_matrix = new DpMatrixCell[sequenceX.size() + 1][sequenceY.size() + 1];
		initMatrix();
	}

	/**
	 * Align the two sequences. The result is returned as a List of DpMatrixCell in order from the start to the end.
	 * Each DpMatrixCell may indicate the match, insertion or deletion of the elements. Some may even indicate the 1:n,
	 * n:1 or n:n match. The match state should be queried to DpMatrixCell.
	 * 
	 * @return List of DpMatrixCell
	 */
	public List align() {
		fillMatrix();
		return alignedResult();
	}

	/**
	 * Returns an element of X sequence.
	 * 
	 * @param p_xIdx
	 *            index of X sequence. The index starts with 1. The rule of sum is to specify the same index as the
	 *            cell's X index, then you'll get what you want. If 0 is specified, IndexOutOfBoundsException will be
	 *            thrown.
	 * @return {@link T}
	 */
	public T getAlignmentElementX(int p_xIdx) {
		return sequenceX.get(p_xIdx - 1);
	}

	/**
	 * Returns an element of Y sequence.
	 * 
	 * @param p_yIdx
	 *            index of Y sequence. The index starts with 1. The rule of sum is to specify the same index as the
	 *            cell's Y index, then you'll get what you want. If 0 is specified, IndexOutOfBoundsException will be
	 *            thrown.
	 * @return {@link T}
	 */
	public T getAlignmentElementY(int p_yIdx) {
		return sequenceY.get(p_yIdx - 1);
	}

	/**
	 * Returns a List of X sequence elements starting at p_xFromIdx (inclusive) and ending at p_xToIdx (exclusive).
	 * 
	 * @param p_xFromIdx
	 *            start index of X sequence (inclusive). The index starts with 1. Usually, a return value of
	 *            DpMatrixCell#getMultiMatchXIndexBegin() is specified.
	 * @param p_xToIdx
	 *            end index of X sequence (exclusive). The index starts with 1. Usually, a return value of
	 *            DpMatrixCell#getMultiMatchXIndexEnd() is specified.
	 * @return List of X sequence elements
	 */
	public List getAlignmentElementsX(int p_xFromIdx, int p_xToIdx) {
		return new ArrayList<>(sequenceX.subList(p_xFromIdx - 1, p_xToIdx - 1));
	}

	/**
	 * Returns a List of Y sequence elements starting at p_yFromIdx (inclusive) and ending at p_yToIdx (exclusive).
	 * 
	 * @param p_yFromIdx
	 *            start index of Y sequence (inclusive). The index starts with 1. Usually, a return value of
	 *            DpMatrixCell#getMultiMatchYIndexBegin() is specified.
	 * @param p_yToIdx
	 *            end index of Y sequence (exclusive). The index starts with 1. Usually, a return value of
	 *            DpMatrixCell#getMultiMatchYIndexEnd() is specified.
	 * @return List of Y sequence elements
	 */
	public List getAlignmentElementsY(int p_yFromIdx, int p_yToIdx) {
		return new ArrayList<>(sequenceY.subList(p_yFromIdx - 1, p_yToIdx - 1));
	}

	/**
	 * Return a cell of the matrix.
	 * 
	 * @param p_xIdx
	 *            Index of X axis. The index starts with 0.
	 * @param p_yIdx
	 *            Index of Y axis. The index starts with 0.
	 * @return DpMatrixCell object
	 */
	public DpMatrixCell getCell(int p_xIdx, int p_yIdx) {
		return m_matrix[p_xIdx][p_yIdx];
	}

	// initialize the matrix. Set DpMatrixCell in each matrix cell and
	// initialize cells (0, y) and (x, 0).
	private void initMatrix() {
		for (int x = 0; x < m_matrix.length; x++) {
			for (int y = 0; y < m_matrix[x].length; y++) {
				m_matrix[x][y] = new DpMatrixCell(x, y);
			}
		}

		// // initialize cells (0,y) and (x,0)
		// for(int x = 1; x < m_matrix.length; x++)
		// {
		// m_matrix[x][0].setScoreAndLink(0, m_matrix[x - 1][0]);
		// }

		// for(int y = 1; y < m_matrix[0].length; y++)
		// {
		// m_matrix[0][y].setScoreAndLink(0, m_matrix[0][y - 1]);
		// }
	}

	// Fill the matrix with score. The initial stage of dynamic
	// programming. Fill the matrix cell from (1, 1) to (n, m). Cells
	// (0, x) and (y, 0) are already initialized in initMatrix()
	// method.
	private void fillMatrix() {
		for (int x = 0; x < m_matrix.length; x++) {
			for (int y = 0; y < m_matrix[x].length; y++) {
				m_dpFunction.setCellScore(x, y, this);
			}
		}
	}

	// get the alignment result
	private List alignedResult() {
		LinkedList result = new LinkedList<>();

		// get the right bottom cell
		DpMatrixCell cell = m_matrix[sequenceX.size()][sequenceY.size()];

		while (cell.hasNext()) {
			result.addFirst(cell);
			cell = cell.nextCell();
		}

		return result;
	}

	public String toString() {
		StringBuffer sb = new StringBuffer();

		for (int y = 0; y < m_matrix[0].length; y++) {
			for (int x = 0; x < m_matrix.length; x++) {
				if (x == 0) {
					sb.append("\r\n");
				}

				DpMatrixCell cell = m_matrix[x][y];
				sb.append(cell.getScore()).append("\t");
			}
		}

		return sb.toString();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy