org.eadge.extractpdfexcel.data.ExtractedData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdf-to-excel Show documentation
Show all versions of pdf-to-excel Show documentation
A Java library used to convert PDF to Excel.
The newest version!
package org.eadge.extractpdfexcel.data;
import org.eadge.extractpdfexcel.data.block.Block;
import org.eadge.extractpdfexcel.exception.NoPageAtIndexException;
import java.util.*;
/**
* Created by eadgyo on 12/07/16.
*
* Holds extracted data separated in pages.
*/
public class ExtractedData
{
/**
* ExtractedPage link to his pageIndex
*/
private Map extractedPages;
/**
* Name of the file.
*/
private String fileName = "";
public ExtractedData()
{
this.extractedPages = new HashMap();
}
public void setFileName(String fileName)
{
this.fileName = fileName;
}
public String getFileName()
{
return fileName;
}
/**
* Insert an extractedPage
*
* @param pageIndex index of concerned page
* @param extractedPage inserted extractedPage
*/
public void insertPage(int pageIndex, ExtractedPage extractedPage)
{
this.extractedPages.put(pageIndex, extractedPage);
}
/**
* Get all blocks in all page
*
* @return all blocks separated in page
*/
public Map getPages()
{
return extractedPages;
}
/**
* Get blocks in on page
*
* @param pageIndex index of concerned page
*
* @return all block in defined page, or null if page does not exist
*/
public Collection getBlocksInPage(int pageIndex)
{
return extractedPages.get(pageIndex).getBlocks();
}
/**
* Get page containing blocks, and her lengths, throw NoPageAtIndexException if page does not exist
*
* @param pageIndex index of concerned page
*
* @return page or null if it does not exist
*/
private ExtractedPage getPageWithException(int pageIndex)
{
ExtractedPage extractedPage = getExtractedPage(pageIndex);
try
{
if (extractedPage != null)
{
return extractedPage;
}
else
{
throw new NoPageAtIndexException(pageIndex);
}
}
catch (NoPageAtIndexException e)
{
e.printStackTrace();
}
return null;
}
/**
* Get extracted page using his page index
*
* @param pageIndex index of page
* @return page with extracted data
*/
public ExtractedPage getExtractedPage(int pageIndex)
{
return extractedPages.get(pageIndex);
}
public int numberOfPages()
{
return extractedPages.size();
}
public int numberOfBlocksInPage(int pageIndex)
{
ExtractedPage extractedPage = getPageWithException(pageIndex);
return (extractedPage != null) ? extractedPage.numberOfBlocks() : 0;
}
public float getPageWidth(int pageIndex)
{
ExtractedPage extractedPage = getPageWithException(pageIndex);
return (extractedPage != null) ? extractedPage.getWidth() : 0;
}
public float getPageHeight(int pageIndex)
{
ExtractedPage extractedPage = getPageWithException(pageIndex);
return (extractedPage != null) ? extractedPage.getHeight() : 0;
}
public Collection getPagesCollection()
{
return extractedPages.values();
}
public void cleanDuplicatedData()
{
for (ExtractedPage extractedPage : extractedPages.values())
{
extractedPage.cleanDuplicatedBlocks();
}
}
public void mergeBlocks(double mergeFactor)
{
for (ExtractedPage extractedPage : extractedPages.values())
{
extractedPage.mergeNearBlocks(mergeFactor);
}
}
}