de.citec.scie.pdf.structure.Page Maven / Gradle / Ivy
/*
* SCIE -- Spinal Cord Injury Information Extraction
* Copyright (C) 2013, 2014
* Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.scie.pdf.structure;
import java.util.ArrayList;
import java.util.Objects;
/**
* This represents one Page of a document, consisting of a (syntactically
* meaningful) sequence of TextBlock instances (e.g. columns in a two-column
* formatted Text).
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
*/
public class Page extends LineSegment {
public final ArrayList content = new ArrayList<>();
private int pageNumber;
public Page() {
}
/**
* Get the value of pageNumber
*
* @return the value of pageNumber
*/
public int getPageNumber() {
return pageNumber;
}
/**
* Set the value of pageNumber
*
* @param pageNumber new value of pageNumber
*/
public void setPageNumber(int pageNumber) {
this.pageNumber = pageNumber;
}
/**
* {@inheritDoc }
*/
@Override
public int hashCode() {
int hash = 7;
hash = 53 * hash + Objects.hashCode(this.content);
return hash;
}
/**
* {@inheritDoc }
*/
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final Page other = (Page) obj;
if (!Objects.equals(this.content, other.content)) {
return false;
}
return true;
}
/**
* Converts this object to a string by going recursively through the
* underlying block structure and calling their respective toString
* methods.
*
* @return a string showing the plain text content of this Page.
*/
@Override
public String toString() {
final StringBuilder output = new StringBuilder();
for (int i = 0; i < content.size(); i++) {
output.append(content.get(i).toString());
if (i < content.size() - 1) {
output.append("\n\n");
}
}
return output.toString();
}
/**
* Does the same as toString but also inserts the beginning and end index of
* each objects respective text representation to this objects
* attributes (which is retrievable by getBegin and getEnd).
*
* @param currentIdx the current index in the plain text representation.
* If you are calling this as a user you should insert 0 here.
* @return the plainText representation of this Page, same as for the
* toString method.
*/
public String indexedToString(int currentIdx) {
setBegin(currentIdx);
final StringBuilder output = new StringBuilder();
for (int i = 0; i < content.size(); i++) {
final String blockStr = content.get(i).indexedToString(currentIdx);
output.append(blockStr);
currentIdx += blockStr.length();
if (i < content.size() - 1) {
output.append("\n\n");
currentIdx += 2;
}
}
setEnd(currentIdx);
return output.toString();
}
/**
* Returns a XML representation of this page by going recursively
* through the underlying block structure and calling their respective toXML
* methods.
*
* @return a string containing a XML representation of this Page.
*/
public String toXML() {
final StringBuilder output = new StringBuilder();
output.append("\t\n");
for (int i = 0; i < content.size(); i++) {
output.append(content.get(i).toXML());
if (i < content.size() - 1) {
output.append("\n");
}
}
output.append("\n\t ");
return output.toString();
}
}