
net.sf.mmm.content.parser.impl.poi.ContentParserXls Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mmm-content-parser-impl-poi Show documentation
Show all versions of mmm-content-parser-impl-poi Show documentation
Content parsers for MS office documents using apache POI.
The newest version!
/* $Id: ContentParserXls.java 859 2010-11-18 00:21:03Z hohwille $
* Copyright (c) The m-m-m Team, Licensed under the Apache License, Version 2.0
* http://www.apache.org/licenses/LICENSE-2.0 */
package net.sf.mmm.content.parser.impl.poi;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.inject.Named;
import javax.inject.Singleton;
import net.sf.mmm.content.parser.api.ContentParserOptions;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.DrawingRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.TextObjectRecord;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* This is the implementation of the
* {@link net.sf.mmm.content.parser.api.ContentParser} interface for binary
* MS-Excel documents.
*
* @author Joerg Hohwiller (hohwille at users.sourceforge.net)
*/
@Singleton
@Named
public class ContentParserXls extends AbstractContentParserPoi {
/** The mimetype. */
public static final String KEY_MIMETYPE = "application/vnd.ms-excel";
/** The default extension. */
public static final String KEY_EXTENSION = "xls";
/**
* The constructor.
*/
public ContentParserXls() {
super();
}
/**
* {@inheritDoc}
*/
public String getExtension() {
return KEY_EXTENSION;
}
/**
* {@inheritDoc}
*/
public String getMimetype() {
return KEY_MIMETYPE;
}
/**
* {@inheritDoc}
*/
@Override
public String[] getAlternativeKeyArray() {
return new String[] { "xlt", "application/excel", "application/msexcel" };
}
/**
* {@inheritDoc}
*/
@Override
protected String extractText(POIFSFileSystem poiFs, long filesize, ContentParserOptions options)
throws Exception {
int maxBufferSize = options.getMaximumBufferSize();
int maxCharSize = maxBufferSize / 2;
InputStream documentInputStream = poiFs.createDocumentInputStream(POIFS_EXCEL_DOC);
// actually there seems no smart guess for the initial capacity of
// textBuffer
// the text length can have any ration to documentInputStream.available()
// the only possibility would be to create the string buffer in the listener
// from the size of the SSTRecord. In this case stable code is better than
// saving a tiny percent of performance...
StringBuffer textBuffer = new StringBuffer(1024);
try {
HSSFRequest req = new HSSFRequest();
req.addListenerForAllRecords(new ExcelListener(textBuffer, maxCharSize));
HSSFEventFactory factory = new HSSFEventFactory();
factory.processEvents(req, documentInputStream);
} finally {
documentInputStream.close();
}
return textBuffer.toString();
}
/**
* This inner class acts as listener for HSSF events and appends the received
* text to a string-buffer.
*/
protected static class ExcelListener implements HSSFListener {
/** the buffer where to append the text */
private final StringBuffer buffer;
/** the maximum capacity */
private final int bufferLimit;
/** list with the sheet names */
private final List sheetNames;
/** current SST record (table with unique strings) */
private SSTRecord sstrec;
/** current row */
private int row;
/** current sheet */
private int sheet;
/**
* The constructor.
*
* @param textBuffer is the buffer where to append the text to.
* @param maximumBufferSize is the maximum allowed size of the
* textBuffer
.
*/
public ExcelListener(StringBuffer textBuffer, int maximumBufferSize) {
super();
this.buffer = textBuffer;
this.bufferLimit = maximumBufferSize;
this.sheetNames = new ArrayList();
this.sstrec = null;
this.row = 0;
this.sheet = 0;
}
/**
* This method appends the given text
to the buffer.
*
* @param text is the text to append.
*/
private void append(String text) {
this.buffer.append(text);
this.buffer.append(' ');
}
/**
* {@inheritDoc}
*/
public void processRecord(Record record) {
if (this.buffer.length() < this.bufferLimit) {
switch (record.getSid()) {
case BOFRecord.sid:
BOFRecord bof = (BOFRecord) record;
if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
if (this.sheet < this.sheetNames.size()) {
if (this.sheet > 0) {
this.buffer.append("\n\n");
}
this.buffer.append("== ");
this.buffer.append(this.sheetNames.get(this.sheet));
this.buffer.append(" ==\n");
}
this.sheet++;
this.row = 0;
}
break;
case BoundSheetRecord.sid:
BoundSheetRecord bsr = (BoundSheetRecord) record;
this.sheetNames.add(bsr.getSheetname());
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
append(Double.toString(numrec.getValue()));
break;
case SSTRecord.sid:
this.sstrec = (SSTRecord) record;
break;
case LabelSSTRecord.sid:
if (this.sstrec != null) {
LabelSSTRecord lrec = (LabelSSTRecord) record;
int newRow = lrec.getRow();
if (this.row != newRow) {
this.buffer.append('\n');
this.row = newRow;
}
append(this.sstrec.getString(lrec.getSSTIndex()).getString());
}
break;
case DrawingRecord.sid:
break;
case TextObjectRecord.sid:
TextObjectRecord txo = (TextObjectRecord) record;
append(txo.getStr().getString());
break;
default :
break;
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy