de.citec.scie.pdf.structure.TextBlock Maven / Gradle / Ivy
/*
* SCIE -- Spinal Cord Injury Information Extraction
* Copyright (C) 2013, 2014
* Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.scie.pdf.structure;
import java.util.ArrayList;
import java.util.Objects;
/**
* This represents a syntatic block of Text, which can be a column on a page, a
* header or something similar. Formally it is defined as a sequence of
* paragraphs, where the number of paragraphs might very well be only one.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
*/
public class TextBlock extends LineSegment {
/**
* This is the actual content of the TextBlock.
*/
public final ArrayList content = new ArrayList<>();
private double relativeFontSize;
public TextBlock() {
}
/**
* The font size of this TextBlocks content relative to the page-wide
* average.
*
* @return font size of this TextBlocks content relative to the
* page-wide average.
*/
public double getRelativeFontSize() {
return relativeFontSize;
}
/**
* The font size of this TextBlocks content relative to the page-wide
* average.
*
* @param relativeFontSize the font size of this TextBlocks content relative
* to the page-wide average.
*/
public void setRelativeFontSize(double relativeFontSize) {
this.relativeFontSize = relativeFontSize;
}
/**
* {@inheritDoc }
*/
@Override
public int hashCode() {
int hash = 5;
hash = 67 * hash + Objects.hashCode(this.content);
return hash;
}
/**
* {@inheritDoc }
*/
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final TextBlock other = (TextBlock) obj;
if (!Objects.equals(this.content, other.content)) {
return false;
}
return true;
}
/**
* Converts this object to a string by going recursively through the
* underlying paragraph structure and calling their respective toString
* methods.
*
* @return a string showing the plain text content of this TextBlock.
*/
@Override
public String toString() {
final StringBuilder output = new StringBuilder();
for (int i = 0; i < content.size(); i++) {
output.append('\t');
output.append(content.get(i).toString());
if (i < content.size() - 1) {
output.append("\n\n");
}
}
return output.toString();
}
/**
* Does the same as toString but also inserts the beginning and end index of
* each objects respective text representation to this objects
* attributes (which is retrievable by getBegin and getEnd).
*
* @param currentIdx the current index in the plain text representation.
* If you are calling this as a user you should insert 0 here.
* @return the plainText representation of this TextBlock, same as for the
* toString method.
*/
public String indexedToString(int currentIdx) {
setBegin(currentIdx);
final StringBuilder output = new StringBuilder();
for (int i = 0; i < content.size(); i++) {
output.append('\t');
currentIdx++;
final String parStr = content.get(i).indexedToString(currentIdx);
output.append(parStr);
currentIdx += parStr.length();
if (i < content.size() - 1) {
output.append("\n\n");
currentIdx += 2;
}
}
setEnd(currentIdx);
return output.toString();
}
/**
* Returns a XML representation of this block by going recursively
* through the underlying paragraph structure and calling their respective
* toXML methods. This also adds the relative font size of this TextBlock
* as an XML attribute.
*
*
* @return a string containing a XML representation of this Block.
*/
public String toXML() {
final StringBuilder output = new StringBuilder();
output.append("\t\t\n");
for (int i = 0; i < content.size(); i++) {
output.append(content.get(i).toXML());
if (i < content.size() - 1) {
output.append("\n");
}
}
output.append("\n\t\t ");
return output.toString();
}
}