All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.dbf.DBFCell Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.parser.dbf;

import org.apache.commons.io.IOUtils;
import org.apache.tika.io.EndianUtils;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.Locale;
import java.util.TimeZone;

class DBFCell {

    private final DBFColumnHeader.ColType colType;
    private final byte[] bytes;
    private final int decimalCount;
    int bytesReadLast = 0;

    DBFCell(DBFColumnHeader.ColType colType, int fieldLength, int decimalCount) {
        this.colType = colType;
        this.decimalCount = decimalCount;
        //field length is limit-checked in DBFFileHeader
        this.bytes = new byte[fieldLength];
    }

    String getString(Charset charset) {
        switch (colType) {
            case C:
                return new String(getBytes(), charset).trim();
            case D:
                return getFormattedDate();
            case N:
                return new String(getBytes(), StandardCharsets.US_ASCII).trim();
            case L:
                return new String(getBytes(), StandardCharsets.US_ASCII).trim();
            case T:
                return getFormattedDateTime();
            default:
                //TODO: find examples of other cell types for testing
                return new String(getBytes(), StandardCharsets.US_ASCII).trim();
        }
    }

    //returns whether any content was read
    boolean read(InputStream is) throws IOException {
        bytesReadLast = IOUtils.read(is, bytes);
        if (DBFReader.STRICT && bytesReadLast != bytes.length) {
            throw new IOException("Truncated record, only read "+bytesReadLast+
                    " bytes, but should have read: "+bytes.length);
        }
        return bytesReadLast > 0;
    }

    /**
     *
     * @return copy of bytes that were read on the last read
     */
    byte[] getBytes() {
        //bytesReadLast is effectively limit checked by DBFFileHeader
        byte[] ret = new byte[bytesReadLast];
        System.arraycopy(bytes, 0, ret, 0, bytesReadLast);
        return ret;
    }

    DBFColumnHeader.ColType getColType() {
        return colType;
    }

    @Override
    public String toString() {
        return "DBFCell{" +
                "colType=" + colType +
                ", bytes=" + Arrays.toString(bytes) +
                ", decimalCount=" + decimalCount +
                '}';
    }

    DBFCell deepCopy() {
        DBFCell cell = new DBFCell(colType, bytes.length, decimalCount);
        cell.bytesReadLast = this.bytesReadLast;
        System.arraycopy(this.bytes, 0, cell.bytes, 0, bytesReadLast);
        return cell;
    }

    private String getFormattedDate() {
        byte[] dateBytes = getBytes();
        if (dateBytes.length < 8) {
            return "";
        }
        String year = new String(dateBytes, 0, 4, StandardCharsets.US_ASCII);
        String month = new String(dateBytes, 4, 2, StandardCharsets.US_ASCII);
        String day = new String(dateBytes, 6, 2, StandardCharsets.US_ASCII);
        //test to see that these values make any sense
        for (String s : new String[]{year, month, day}) {
            try {
                Integer.parseInt(s);
            } catch (NumberFormatException e) {
                return "";
            }
        }
        return String.format(Locale.ROOT,
                "%s/%s/%s", month, day, year);
    }

    public String getFormattedDateTime() {
        //sometimes 12/31/1899 instead of 01/01/4713 BC.
        //http://stackoverflow.com/questions/20026154/convert-dbase-timestamp
        //TODO: add heuristic for deciding;
        //TODO: find example of file with time != 0
        Calendar baseCalendar = GregorianCalendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
//        baseCalendar.set(1899, 11, 31, 0, 0, 0);
        baseCalendar.set(-4712, 0, 1, 0, 0, 0);
        byte[] bytes = getBytes();
        try (InputStream is = new ByteArrayInputStream(getBytes())) {

            int date = EndianUtils.readIntLE(is);
            int time = EndianUtils.readIntLE(is);
            baseCalendar.add(Calendar.DATE, date);
            DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT);
            return df.format(baseCalendar.getTime());
        } catch (IOException|EndianUtils.BufferUnderrunException e) {

        }
        return "";
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy