All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.intermine.sql.writebatch.PostgresDataOutputStream Maven / Gradle / Ivy

package org.intermine.sql.writebatch;

/*
 * Copyright (C) 2002-2022 FlyMine
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  See the LICENSE file for more
 * information or http://www.gnu.org/copyleft/lesser.html.
 *
 */

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Collection;
import java.util.Collections;

import org.intermine.model.StringConstructor;
import org.intermine.util.SensibleByteArrayOutputStream;

/**
 * A subclass of DataOutputStream that has extra methods useful for reducing the memory
 * footprint of the Postgres database write operations.
 *
 * @author Matthew Wakeling
 */
public class PostgresDataOutputStream extends DataOutputStream
{
    /**
     * @see DataOutputStream
     *
     * @param out the OutputStream to write to
     */
    public PostgresDataOutputStream(OutputStream out) {
        super(out);
    }

    /**
     * Writes the given String to the stream in modified UTF-8 format, following its length in bytes
     * as a four-byte integer.
     *
     * @param str the String
     * @return the number of bytes written to the OutputStream
     * @throws IOException if there is an error writing to the underlying OutputStream
     */
    public int writeLargeUTF(String str) throws IOException {
        return writeLargeUTF(Collections.singletonList(str));
    }

    /**
     * Writes the given StringConstructor to the stream in modified UTF-8 format, following its
     * length in bytes as a four-byte integer.
     *
     * @param str the StringConstructor
     * @return the number of bytes written to the OutputStream
     * @throws IOException if there is an error writing to the underlying OutputStream
     */
    public int writeLargeUTF(StringConstructor str) throws IOException {
        return writeLargeUTF(str.getStrings());
    }

    /**
     * Writes the given Collection of Strings to the stream in modified UTF-8 format, following its
     * length in bytes as a four-byte integer.
     *
     * @param strs the Collection of Strings
     * @return the number of bytes written to the OutputStream
     * @throws IOException if there is an error writing to the underlying OutputStream
     */
    protected int writeLargeUTF(Collection strs) throws IOException {
        int utflen = 0;
        int c;

        for (String str : strs) {
            int strlen = str.length();
            for (int i = 0; i < strlen; i++) {
                if (!Character.isBmpCodePoint(Character.codePointAt(str, i))) {
                    utflen += 4;
                    i++;
                    continue;
                }
                c = str.charAt(i);
                if ((c >= 0x0001) && (c <= 0x007F)) {
                    utflen++;
                } else if (c > 0x07FF) {
                    utflen += 3;
                } else {
                    utflen += 2;
                }
            }
        }

        if (out instanceof SensibleByteArrayOutputStream) {
            ((SensibleByteArrayOutputStream) out).assureSize(((SensibleByteArrayOutputStream) out)
                .size() + 4 + utflen);
        }

        writeInt(utflen);

        for (String str : strs) {
            write(str.getBytes());
            //commented out because it's modified utf-8 incompatible with standard UTF-8
            //Ref  https://github.com/intermine/intermine/issues/1532
            /*
            int strlen = str.length();
            int i = 0;
            for (i = 0; i < strlen; i++) {
                c = str.charAt(i);
                if (!((c >= 0x0001) && (c <= 0x007F))) {
                    break;
                }
                writeByte((byte) c);
            }

            for (; i < strlen; i++) {
                c = str.charAt(i);
                if ((c >= 0x0001) && (c <= 0x007F)) {
                    writeByte((byte) c);
                } else if (c > 0x07FF) {
                    writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
                    writeByte((byte) (0x80 | ((c >>  6) & 0x3F)));
                    writeByte((byte) (0x80 | ((c >>  0) & 0x3F)));
                } else {
                    writeByte((byte) (0xC0 | ((c >>  6) & 0x1F)));
                    writeByte((byte) (0x80 | ((c >>  0) & 0x3F)));
                }
            }*/
        }
        return utflen + 4;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy