All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.pdf.structure.Paragraph Maven / Gradle / Ivy

/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.scie.pdf.structure;

import java.util.ArrayList;
import java.util.Objects;

/**
 * This represents a paragraph of text that is defined as a sequence of Text
 * objects that syntactically were grouped in a paragraph.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class Paragraph extends LineSegment {

	/**
	 * This is the Text content of this Paragraph.
	 */
	public final ArrayList content = new ArrayList<>();

	public Paragraph() {
	}

	/**
	 * {@inheritDoc } 
	 */
	@Override
	public int hashCode() {
		int hash = 5;
		hash = 61 * hash + Objects.hashCode(this.content);
		return hash;
	}

	/**
	 * {@inheritDoc } 
	 */
	@Override
	public boolean equals(Object obj) {
		if (obj == null) {
			return false;
		}
		if (getClass() != obj.getClass()) {
			return false;
		}
		final Paragraph other = (Paragraph) obj;
		if (!Objects.equals(this.content, other.content)) {
			return false;
		}
		return true;
	}

	/**
	 * Converts this object to a string by going recursively through the
	 * underlying text objects and calling their respective toString
	 * methods.
	 *
	 * @return a string showing the plain text content of this paragraph.
	 */
	@Override
	public String toString() {
		final StringBuilder output = new StringBuilder();
		for (int i = 0; i < content.size(); i++) {
			output.append(content.get(i).toString());
			if (i < content.size() - 1) {
				output.append(' ');
			}
		}
		return output.toString();
	}

		/**
	 * Does the same as toString but also inserts the beginning and end index of
	 * each objects respective text representation to this objects
	 * attributes (which is retrievable by getBegin and getEnd).
	 *
	 * @param currentIdx the current index in the plain text representation.
	 * If you are calling this as a user you should insert 0 here.
	 * @return the plainText representation of this Paragraph, same as for the
	 * toString method.
	 */
	public String indexedToString(int currentIdx) {
		setBegin(currentIdx);
		final StringBuilder output = new StringBuilder();
		for (int i = 0; i < content.size(); i++) {
			final String textStr = content.get(i).indexedToString(currentIdx);
			output.append(textStr);
			currentIdx += textStr.length();
			if (i < content.size() - 1) {
				output.append(' ');
				currentIdx++;
			}
		}
		setEnd(currentIdx);
		return output.toString();
	}
/**
	 * Returns a XML representation of this paragraph by going recursively
	 * through the underlying text objects and calling their respective toXML
	 * methods.
	 *
	 * @return a string containing a XML representation of this paragraph.
	 */
	public String toXML() {
		final StringBuilder output = new StringBuilder();
		output.append("\t\t\t\n");
		for (int i = 0; i < content.size(); i++) {
			output.append(content.get(i).toXML());
			if (i < content.size() - 1) {
				output.append("\n");
			}
		}
		output.append("\n\t\t\t");
		return output.toString();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy