All Downloads are FREE. Search and download functionalities are using the official Maven repository.

metridoc.plugins.impl.iterators.XlsxIterator.groovy Maven / Gradle / Ivy

There is a newer version: 0.30
Show newest version
/*
 * Copyright 2010 Trustees of the University of Pennsylvania Licensed under the
 * Educational Community License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may
 * obtain a copy of the License at
 *
 * http://www.osedu.org/licenses/ECL-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an "AS IS"
 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package metridoc.plugins.impl.iterators

import groovy.util.logging.Slf4j
import javax.xml.stream.XMLInputFactory
import javax.xml.stream.XMLStreamReader
import metridoc.plugins.Plugin
import org.apache.poi.openxml4j.opc.OPCPackage
import org.apache.poi.xssf.eventusermodel.XSSFReader
import org.apache.poi.xssf.model.SharedStringsTable
import org.apache.poi.xssf.usermodel.XSSFRichTextString

/**
 * Created by IntelliJ IDEA.
 * User: tbarker
 * Date: 9/16/11
 * Time: 9:13 AM
 * To change this template use File | Settings | File Templates.
 */
@Slf4j
@Plugin(category = "grid", name = "xlsx")
class XlsxIterator extends BaseExcelIterator {

    XMLStreamReader reader

    SharedStringsTable stringLookup
    XSSFReader xssfReader

    XSSFReader getXssfReader() {
        if (xssfReader) {
            return xssfReader
        }

        OPCPackage pkg = OPCPackage.open(inputStream)
        xssfReader = new XSSFReader(pkg)
    }

    SharedStringsTable getStringLookup() {

        if (stringLookup) {
            return stringLookup
        }

        stringLookup = getXssfReader().getSharedStringsTable()
    }

    XMLStreamReader getReader() {

        if (reader) {
            return reader
        }

        def sheetReference = getSheetReference()
        def sheet = getXssfReader().getSheet(sheetReference)
        reader = XMLInputFactory.newInstance().createXMLStreamReader(sheet)
    }

    /**
     *
     * @return a fresh workbook reader.  Calling this multiple times will return a different, fresh one starting at the
     * top of the workbook document
     */
    XMLStreamReader getWorkbookReader() {
        def workbook = getXssfReader().getWorkbookData()
        return XMLInputFactory.newInstance().createXMLStreamReader(workbook)
    }

    private static closeXmlStreamReader(XMLStreamReader xmlReader) {
        try {
            xmlReader.close()
        } catch (Exception e) {
            metridoc.plugins.impl.iterators.XlsxIterator.log.warn("An exception occurred closing the xml reader", e)
        }
    }

    private List getNextRow(XMLStreamReader reader) {
        getToNextRow(reader)

        if (reader.hasNext()) {
            return getRow(reader)
        }

        return null
    }

    private static void getToNextRow(XMLStreamReader reader) {
        while (!atRowOrEnd(reader)) {
            reader.next()
        }
    }

    private static boolean atRowOrEnd(XMLStreamReader reader) {
        boolean atEnd = !reader.hasNext()
        boolean atRow = false
        if (reader.startElement) {
            atRow = reader.localName == "row"
        }
        return atEnd || atRow
    }

    List convertRowToList(List row) {
        def result = []
        def width = getRowWidth(row)

        def cellIterator = row.iterator()
        def currentCell = cellIterator.next()

        (0..(width - 1)).each {zeroBasedColumnIndex ->
            def cellIndex = convertColumnToNumber(currentCell.reference)

            if((zeroBasedColumnIndex + 1) == cellIndex) {
                result.add(currentCell.formattedValue)
                if(cellIterator.hasNext()) {
                    currentCell = cellIterator.next()
                }
            } else {
                result.add(null)
            }
        }

        return result
    }

    private static Map convertRowToHash(List row, LinkedHashSet columns) {
        def result = [:]

        def rowIterator = row.iterator()
        int columnIndex = 0
        def currentCell = rowIterator.next()

        columns.each {
            columnIndex++
            int cellIndex = convertColumnToNumber(currentCell.reference)
            if (columnIndex == cellIndex) {
                result.put(it, currentCell.formattedValue)
                if (rowIterator.hasNext()) {
                    currentCell = rowIterator.next()
                }
            } else {
                result.put(it, null)
            }
        }

        return result
    }

    private static Map getAttributeMap(XMLStreamReader reader) {
        int attributeCount = reader.attributeCount
        def result = [:]
        for (int i = 0; i < attributeCount; i++) {
            result.put(reader.getAttributeLocalName(i), reader.getAttributeValue(i))
        }

        return result
    }

    private static String getSheetReference(XMLStreamReader workbookReader, Closure closure) {
        def run = true

        try {
            while (run) {
                if (workbookReader.startElement) {
                    String localName = workbookReader.localName
                    if (localName == "sheet") {
                        def attributeMap = getAttributeMap(workbookReader)
                        String result = closure.call(attributeMap)
                        if (result) {
                            return result
                        }
                    }
                }

                boolean hasNext = workbookReader.hasNext()
                if (!hasNext) {
                    run = false
                } else {
                    workbookReader.next()
                }
            }
        } finally {
            closeXmlStreamReader(workbookReader)
        }
        return null
    }

    private static String getSheetReferenceByName(XMLStreamReader workbookReader, String name) {
        return getSheetReference(workbookReader) {Map attributeMap ->
            def sheetName = attributeMap.name
            if (sheetName == name) {
                return attributeMap.id
            }
        }
    }

    private static String getSheetReferenceByIndex(XMLStreamReader workbookReader, int index) {
        return getSheetReference(workbookReader) {Map attributeMap ->
            def oneBaseIndex = index + 1
            def sheetId = Integer.valueOf(attributeMap.sheetId)
            if (oneBaseIndex == sheetId) {
                return attributeMap.id
            }
        }
    }

    private void addCellToHash(XlsxCell cell, int columnIndex, Map result) {

        def value = null

        if (cell != null) {
            value = cell.formattedValue
        }

        def columnsAsList = columns as List
        result.put(columnsAsList[columnIndex], value)
    }

    private static int getRowWidth(List row) {
        int result = 0

        row.each {XlsxCell cell ->
            int columnNumber = convertColumnToNumber(cell.reference)
            if (columnNumber > result) {
                result = columnNumber
            }
        }

        return result
    }

    private List getRow(XMLStreamReader reader) {
        boolean gettingCells = true
        def result = []

        def cell
        while (gettingCells) {
            if (reader.startElement) {
                def name = reader.localName

                switch (name) {
                    case "c":
                        cell = new XlsxCell(stringLookup: getStringLookup())
                        def attributes = getAttributeMap(reader)
                        cell.reference = attributes.r
                        cell.type = attributes.t
                        break
                    case "v":
                        def attributes = getAttributeMap(reader)
                        cell.value = Double.valueOf(reader.getElementText())
                        result.add(cell)
                        break
                }
            }

            if (reader.endElement) {
                def name = reader.localName
                if (name == "row") {
                    gettingCells = false
                }
            }
            reader.next()
        }

        return result
    }

    String getSheetReference() {
        if (sheetName) {
            return getSheetReferenceByName(getWorkbookReader(), sheetName)
        }

        return getSheetReferenceByIndex(getWorkbookReader(), sheetIndex)
    }

    @Override
    List doNext() {
        def row = getNextRow(getReader())

        row ? convertRowToList(row) : null
    }

    private static LinkedHashSet getColumnsFromRowValues(List row) {
        int width = getRowWidth(row)
        Map rowMap = [:]

        row.each {cell ->
            rowMap.put(cell.columnIndex, cell)
        }

        LinkedHashSet result = [] as SortedSet
        (1..width).each {
            boolean hasItem = rowMap.containsKey(it)

            if (!hasItem) {
                result.add(it.toString())
            } else {
                String value = String.valueOf(rowMap.get(it).formattedValue)
                result.add(value)
            }
        }

        return result
    }

    @Override
    Iterator doCreate(InputStream inputStream) {
        def args = [inputStream: inputStream]

        if (parameters) {
            args.putAll(parameters)
        }

        return new XlsxIterator(args)
    }
}

class XlsxCell {
    String reference
    String type
    double value
    SharedStringsTable stringLookup

    def getFormattedValue() {
        def result = value
        if (type == "s") {
            int reference = value
            def entry = stringLookup.getEntryAt(reference)
            result = new XSSFRichTextString(entry).toString();
        }

        return result
    }

    int getColumnIndex() {
        XlsxIterator.convertColumnToNumber(reference)
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy