de.citec.scie.pdf.TextBlockRankEstimator Maven / Gradle / Ivy
/*
* SCIE -- Spinal Cord Injury Information Extraction
* Copyright (C) 2013, 2014
* Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.scie.pdf;
import de.citec.scie.pdf.structure.TextBlock;
import java.util.ArrayList;
/**
* This estimator has the purpose to determine if a TextBlock has a larger usual
* Font Size as the usual Font Size for the whole page, an equal or a smaller
* one. A larger one is interpreted as a header, a smaller one as a footnote, an
* equal one as body text.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
*/
public class TextBlockRankEstimator {
private final Histogramm pageFontSizeHisto = new Histogramm<>();
private final ArrayList blocks = new ArrayList<>();
private final ArrayList> blockFontSizeHistos = new ArrayList<>();
public TextBlockRankEstimator() {
}
public void addBlock(final TextBlock block, final PreTextBlock preTextBlock) {
blocks.add(block);
final Histogramm blockFontSizeHisto = new Histogramm<>();
for (final PreTextLine line : preTextBlock.lines) {
blockFontSizeHisto.addAll(line.fontSizeHisto);
}
blockFontSizeHistos.add(blockFontSizeHisto);
pageFontSizeHisto.addAll(blockFontSizeHisto);
}
/**
* Returns the relativ font size of this block in relation to the whole
* page.
*
* @param block a textBlock
* @return the relativ font size of this block in relation to the whole
* page.
*/
public double getRelativeFontSize(final TextBlock block) {
final int blockIdx = blocks.indexOf(block);
if (blockIdx == -1) {
return -1;
}
final Histogramm blockFontSizeHisto = blockFontSizeHistos.get(blockIdx);
final double blockFontSize = blockFontSizeHisto.getMaxElement();
final double pageFontSize = pageFontSizeHisto.getMaxElement();
return blockFontSize / pageFontSize;
}
}