All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.pdf.structure.TextBlock Maven / Gradle / Ivy

/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.scie.pdf.structure;

import java.util.ArrayList;
import java.util.Objects;

/**
 * This represents a syntatic block of Text, which can be a column on a page, a
 * header or something similar. Formally it is defined as a sequence of
 * paragraphs, where the number of paragraphs might very well be only one.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class TextBlock extends LineSegment {

	/**
	 * This is the actual content of the TextBlock.
	 */
	public final ArrayList content = new ArrayList<>();
	private double relativeFontSize;

	public TextBlock() {
	}

	/**
	 * The font size of this TextBlocks content relative to the page-wide
	 * average.
	 *
	 * @return font size of this TextBlocks content relative to the
	 * page-wide average.
	 */
	public double getRelativeFontSize() {
		return relativeFontSize;
	}

	/**
	 * The font size of this TextBlocks content relative to the page-wide
	 * average.
	 *
	 * @param relativeFontSize the font size of this TextBlocks content relative
	 * to the page-wide average.
	 */
	public void setRelativeFontSize(double relativeFontSize) {
		this.relativeFontSize = relativeFontSize;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public int hashCode() {
		int hash = 5;
		hash = 67 * hash + Objects.hashCode(this.content);
		return hash;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public boolean equals(Object obj) {
		if (obj == null) {
			return false;
		}
		if (getClass() != obj.getClass()) {
			return false;
		}
		final TextBlock other = (TextBlock) obj;
		if (!Objects.equals(this.content, other.content)) {
			return false;
		}
		return true;
	}

	/**
	 * Converts this object to a string by going recursively through the
	 * underlying paragraph structure and calling their respective toString
	 * methods.
	 *
	 * @return a string showing the plain text content of this TextBlock.
	 */
	@Override
	public String toString() {
		final StringBuilder output = new StringBuilder();
		for (int i = 0; i < content.size(); i++) {
			output.append('\t');
			output.append(content.get(i).toString());
			if (i < content.size() - 1) {
				output.append("\n\n");
			}
		}
		return output.toString();
	}

	/**
	 * Does the same as toString but also inserts the beginning and end index of
	 * each objects respective text representation to this objects
	 * attributes (which is retrievable by getBegin and getEnd).
	 *
	 * @param currentIdx the current index in the plain text representation.
	 * If you are calling this as a user you should insert 0 here.
	 * @return the plainText representation of this TextBlock, same as for the
	 * toString method.
	 */
	public String indexedToString(int currentIdx) {
		setBegin(currentIdx);
		final StringBuilder output = new StringBuilder();
		for (int i = 0; i < content.size(); i++) {
			output.append('\t');
			currentIdx++;
			final String parStr = content.get(i).indexedToString(currentIdx);
			output.append(parStr);
			currentIdx += parStr.length();
			if (i < content.size() - 1) {
				output.append("\n\n");
				currentIdx += 2;
			}
		}
		setEnd(currentIdx);
		return output.toString();
	}

	/**
	 * Returns a XML representation of this block by going recursively
	 * through the underlying paragraph structure and calling their respective
	 * toXML methods. This also adds the relative font size of this TextBlock
	 * as an XML attribute.
	 *
	 *
	 * @return a string containing a XML representation of this Block.
	 */
	public String toXML() {
		final StringBuilder output = new StringBuilder();
		output.append("\t\t\n");
		for (int i = 0; i < content.size(); i++) {
			output.append(content.get(i).toXML());
			if (i < content.size() - 1) {
				output.append("\n");
			}
		}
		output.append("\n\t\t");
		return output.toString();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy