All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.pdf.structure.Page Maven / Gradle / Ivy

/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.scie.pdf.structure;

import java.util.ArrayList;
import java.util.Objects;

/**
 * This represents one Page of a document, consisting of a (syntactically
 * meaningful) sequence of TextBlock instances (e.g. columns in a two-column
 * formatted Text).
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class Page extends LineSegment {

	public final ArrayList content = new ArrayList<>();
	private int pageNumber;

	public Page() {
	}

	/**
	 * Get the value of pageNumber
	 *
	 * @return the value of pageNumber
	 */
	public int getPageNumber() {
		return pageNumber;
	}

	/**
	 * Set the value of pageNumber
	 *
	 * @param pageNumber new value of pageNumber
	 */
	public void setPageNumber(int pageNumber) {
		this.pageNumber = pageNumber;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public int hashCode() {
		int hash = 7;
		hash = 53 * hash + Objects.hashCode(this.content);
		return hash;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public boolean equals(Object obj) {
		if (obj == null) {
			return false;
		}
		if (getClass() != obj.getClass()) {
			return false;
		}
		final Page other = (Page) obj;
		if (!Objects.equals(this.content, other.content)) {
			return false;
		}
		return true;
	}

	/**
	 * Converts this object to a string by going recursively through the
	 * underlying block structure and calling their respective toString
	 * methods.
	 *
	 * @return a string showing the plain text content of this Page.
	 */
	@Override
	public String toString() {
		final StringBuilder output = new StringBuilder();
		for (int i = 0; i < content.size(); i++) {
			output.append(content.get(i).toString());
			if (i < content.size() - 1) {
				output.append("\n\n");
			}
		}
		return output.toString();
	}

	/**
	 * Does the same as toString but also inserts the beginning and end index of
	 * each objects respective text representation to this objects
	 * attributes (which is retrievable by getBegin and getEnd).
	 *
	 * @param currentIdx the current index in the plain text representation.
	 * If you are calling this as a user you should insert 0 here.
	 * @return the plainText representation of this Page, same as for the
	 * toString method.
	 */
	public String indexedToString(int currentIdx) {
		setBegin(currentIdx);
		final StringBuilder output = new StringBuilder();
		for (int i = 0; i < content.size(); i++) {
			final String blockStr = content.get(i).indexedToString(currentIdx);
			output.append(blockStr);
			currentIdx += blockStr.length();
			if (i < content.size() - 1) {
				output.append("\n\n");
				currentIdx += 2;
			}
		}
		setEnd(currentIdx);
		return output.toString();
	}
/**
	 * Returns a XML representation of this page by going recursively
	 * through the underlying block structure and calling their respective toXML
	 * methods.
	 *
	 * @return a string containing a XML representation of this Page.
	 */
	public String toXML() {
		final StringBuilder output = new StringBuilder();
		output.append("\t\n");
		for (int i = 0; i < content.size(); i++) {
			output.append(content.get(i).toXML());
			if (i < content.size() - 1) {
				output.append("\n");
			}
		}
		output.append("\n\t");
		return output.toString();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy