org.apache.commons.compress.harmony.unpack200.Segment Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-compress Show documentation
Apache Commons Compress defines an API for working with compression and archive formats. These include bzip2, gzip, pack200, LZMA, XZ, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.
There is a newer version: 1.27.1
Show newest version
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.commons.compress.harmony.unpack200;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TimeZone;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.zip.CRC32;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;

import org.apache.commons.compress.harmony.pack200.Codec;
import org.apache.commons.compress.harmony.pack200.Pack200Exception;
import org.apache.commons.compress.harmony.unpack200.bytecode.Attribute;
import org.apache.commons.compress.harmony.unpack200.bytecode.CPClass;
import org.apache.commons.compress.harmony.unpack200.bytecode.CPField;
import org.apache.commons.compress.harmony.unpack200.bytecode.CPMethod;
import org.apache.commons.compress.harmony.unpack200.bytecode.CPUTF8;
import org.apache.commons.compress.harmony.unpack200.bytecode.ClassConstantPool;
import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFile;
import org.apache.commons.compress.harmony.unpack200.bytecode.ClassFileEntry;
import org.apache.commons.compress.harmony.unpack200.bytecode.InnerClassesAttribute;
import org.apache.commons.compress.harmony.unpack200.bytecode.SourceFileAttribute;
import org.apache.commons.io.input.BoundedInputStream;

/**
 * A Pack200 archive consists of one or more segments. Each segment is stand-alone, in the sense that every segment has the magic number header; thus, every
 * segment is also a valid archive. However, it is possible to combine (non-GZipped) archives into a single large archive by concatenation alone. Thus, all the
 * hard work in unpacking an archive falls to understanding a segment.
 *
 * The first component of a segment is the header; this contains (amongst other things) the expected counts of constant pool entries, which in turn defines how
 * many values need to be read from the stream. Because values are variable width (see {@link Codec}), it is not possible to calculate the start of the next
 * segment, although one of the header values does hint at the size of the segment if non-zero, which can be used for buffering purposes.
 *
 * Note that this does not perform any buffering of the input stream; each value will be read on a byte-by-byte basis. It does not perform GZip decompression
 * automatically; both of these are expected to be done by the caller if the stream has the magic header for GZip streams ({@link GZIPInputStream#GZIP_MAGIC}).
 * In any case, if GZip decompression is being performed the input stream will be buffered at a higher level, and thus this can read on a byte-oriented basis.
 */
public class Segment {

    public static final int LOG_LEVEL_VERBOSE = 2;

    public static final int LOG_LEVEL_STANDARD = 1;

    public static final int LOG_LEVEL_QUIET = 0;

    private SegmentHeader header;

    private CpBands cpBands;

    private AttrDefinitionBands attrDefinitionBands;

    private IcBands icBands;

    private ClassBands classBands;

    private BcBands bcBands;

    private FileBands fileBands;

    private boolean overrideDeflateHint;

    private boolean deflateHint;

    private boolean doPreRead;

    private int logLevel;

    private PrintWriter logStream;

    private byte[][] classFilesContents;

    private boolean[] fileDeflate;

    private boolean[] fileIsClass;

    private InputStream internalBuffer;

    private ClassFile buildClassFile(final int classNum) {
        final ClassFile classFile = new ClassFile();
        final int[] major = classBands.getClassVersionMajor();
        final int[] minor = classBands.getClassVersionMinor();
        if (major != null) {
            classFile.major = major[classNum];
            classFile.minor = minor[classNum];
        } else {
            classFile.major = header.getDefaultClassMajorVersion();
            classFile.minor = header.getDefaultClassMinorVersion();
        }
        // build constant pool
        final ClassConstantPool cp = classFile.pool;
        final int fullNameIndexInCpClass = classBands.getClassThisInts()[classNum];
        final String fullName = cpBands.getCpClass()[fullNameIndexInCpClass];
        // SourceFile attribute
        int i = fullName.lastIndexOf("/") + 1; // if lastIndexOf==-1, then
        // -1+1=0, so str.substring(0)
        // == str

        // Get the source file attribute
        final List classAttributes = classBands.getClassAttributes()[classNum];
        SourceFileAttribute sourceFileAttribute = null;
        for (final Attribute classAttribute : classAttributes) {
            if (classAttribute.isSourceFileAttribute()) {
                sourceFileAttribute = (SourceFileAttribute) classAttribute;
            }
        }

        if (sourceFileAttribute == null) {
            // If we don't have a source file attribute yet, we need
            // to infer it from the class.
            final AttributeLayout SOURCE_FILE = attrDefinitionBands.getAttributeDefinitionMap().getAttributeLayout(AttributeLayout.ATTRIBUTE_SOURCE_FILE,
                    AttributeLayout.CONTEXT_CLASS);
            if (SOURCE_FILE.matches(classBands.getRawClassFlags()[classNum])) {
                int firstDollar = -1;
                for (int index = 0; index < fullName.length(); index++) {
                    if (fullName.charAt(index) <= '$') {
                        firstDollar = index;
                    }
                }
                String fileName;

                if (firstDollar > -1 && i <= firstDollar) {
                    fileName = fullName.substring(i, firstDollar) + ".java";
                } else {
                    fileName = fullName.substring(i) + ".java";
                }
                sourceFileAttribute = new SourceFileAttribute(cpBands.cpUTF8Value(fileName, false));
                classFile.attributes = new Attribute[] { (Attribute) cp.add(sourceFileAttribute) };
            } else {
                classFile.attributes = new Attribute[] {};
            }
        } else {
            classFile.attributes = new Attribute[] { (Attribute) cp.add(sourceFileAttribute) };
        }

        // If we see any class attributes, add them to the class's attributes
        // that will
        // be written out. Keep SourceFileAttributes out since we just
        // did them above.
        final List classAttributesWithoutSourceFileAttribute = new ArrayList<>(classAttributes.size());
        for (int index = 0; index < classAttributes.size(); index++) {
            final Attribute attrib = classAttributes.get(index);
            if (!attrib.isSourceFileAttribute()) {
                classAttributesWithoutSourceFileAttribute.add(attrib);
            }
        }
        final Attribute[] originalAttributes = classFile.attributes;
        classFile.attributes = new Attribute[originalAttributes.length + classAttributesWithoutSourceFileAttribute.size()];
        System.arraycopy(originalAttributes, 0, classFile.attributes, 0, originalAttributes.length);
        for (int index = 0; index < classAttributesWithoutSourceFileAttribute.size(); index++) {
            final Attribute attrib = classAttributesWithoutSourceFileAttribute.get(index);
            cp.add(attrib);
            classFile.attributes[originalAttributes.length + index] = attrib;
        }

        // this/superclass
        final ClassFileEntry cfThis = cp.add(cpBands.cpClassValue(fullNameIndexInCpClass));
        final ClassFileEntry cfSuper = cp.add(cpBands.cpClassValue(classBands.getClassSuperInts()[classNum]));
        // add interfaces
        final ClassFileEntry[] cfInterfaces = new ClassFileEntry[classBands.getClassInterfacesInts()[classNum].length];
        for (i = 0; i < cfInterfaces.length; i++) {
            cfInterfaces[i] = cp.add(cpBands.cpClassValue(classBands.getClassInterfacesInts()[classNum][i]));
        }
        // add fields
        final ClassFileEntry[] cfFields = new ClassFileEntry[classBands.getClassFieldCount()[classNum]];
        // fieldDescr and fieldFlags used to create this
        for (i = 0; i < cfFields.length; i++) {
            final int descriptorIndex = classBands.getFieldDescrInts()[classNum][i];
            final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex];
            final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex];
            final CPUTF8 name = cpBands.cpUTF8Value(nameIndex);
            final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex);
            cfFields[i] = cp.add(new CPField(name, descriptor, classBands.getFieldFlags()[classNum][i], classBands.getFieldAttributes()[classNum][i]));
        }
        // add methods
        final ClassFileEntry[] cfMethods = new ClassFileEntry[classBands.getClassMethodCount()[classNum]];
        // methodDescr and methodFlags used to create this
        for (i = 0; i < cfMethods.length; i++) {
            final int descriptorIndex = classBands.getMethodDescrInts()[classNum][i];
            final int nameIndex = cpBands.getCpDescriptorNameInts()[descriptorIndex];
            final int typeIndex = cpBands.getCpDescriptorTypeInts()[descriptorIndex];
            final CPUTF8 name = cpBands.cpUTF8Value(nameIndex);
            final CPUTF8 descriptor = cpBands.cpSignatureValue(typeIndex);
            cfMethods[i] = cp.add(new CPMethod(name, descriptor, classBands.getMethodFlags()[classNum][i], classBands.getMethodAttributes()[classNum][i]));
        }
        cp.addNestedEntries();

        // add inner class attribute (if required)
        boolean addInnerClassesAttr = false;
        final IcTuple[] icLocal = getClassBands().getIcLocal()[classNum];
        final boolean icLocalSent = icLocal != null;
        final InnerClassesAttribute innerClassesAttribute = new InnerClassesAttribute("InnerClasses");
        final IcTuple[] icRelevant = getIcBands().getRelevantIcTuples(fullName, cp);
        final List ic_stored = computeIcStored(icLocal, icRelevant);
        for (final IcTuple icStored : ic_stored) {
            final int innerClassIndex = icStored.thisClassIndex();
            final int outerClassIndex = icStored.outerClassIndex();
            final int simpleClassNameIndex = icStored.simpleClassNameIndex();

            final String innerClassString = icStored.thisClassString();
            final String outerClassString = icStored.outerClassString();
            final String simpleClassName = icStored.simpleClassName();

            CPUTF8 innerName = null;
            CPClass outerClass = null;

            final CPClass innerClass = innerClassIndex != -1 ? cpBands.cpClassValue(innerClassIndex) : cpBands.cpClassValue(innerClassString);
            if (!icStored.isAnonymous()) {
                innerName = simpleClassNameIndex != -1 ? cpBands.cpUTF8Value(simpleClassNameIndex) : cpBands.cpUTF8Value(simpleClassName);
            }

            if (icStored.isMember()) {
                outerClass = outerClassIndex != -1 ? cpBands.cpClassValue(outerClassIndex) : cpBands.cpClassValue(outerClassString);
            }
            final int flags = icStored.F;
            innerClassesAttribute.addInnerClassesEntry(innerClass, outerClass, innerName, flags);
            addInnerClassesAttr = true;
        }
        // If ic_local is sent, and it's empty, don't add
        // the inner classes attribute.
        if (icLocalSent && icLocal.length == 0) {
            addInnerClassesAttr = false;
        }

        // If ic_local is not sent and ic_relevant is empty,
        // don't add the inner class attribute.
        if (!icLocalSent && icRelevant.length == 0) {
            addInnerClassesAttr = false;
        }

        if (addInnerClassesAttr) {
            // Need to add the InnerClasses attribute to the
            // existing classFile attributes.
            final Attribute[] originalAttrs = classFile.attributes;
            final Attribute[] newAttrs = new Attribute[originalAttrs.length + 1];
            System.arraycopy(originalAttrs, 0, newAttrs, 0, originalAttrs.length);
            newAttrs[newAttrs.length - 1] = innerClassesAttribute;
            classFile.attributes = newAttrs;
            cp.addWithNestedEntries(innerClassesAttribute);
        }
        // sort CP according to cp_All
        cp.resolve(this);
        // NOTE the indexOf is only valid after the cp.resolve()
        // build up remainder of file
        classFile.accessFlags = (int) classBands.getClassFlags()[classNum];
        classFile.thisClass = cp.indexOf(cfThis);
        classFile.superClass = cp.indexOf(cfSuper);
        // TODO placate format of file for writing purposes
        classFile.interfaces = new int[cfInterfaces.length];
        for (i = 0; i < cfInterfaces.length; i++) {
            classFile.interfaces[i] = cp.indexOf(cfInterfaces[i]);
        }
        classFile.fields = cfFields;
        classFile.methods = cfMethods;
        return classFile;
    }

    /**
     * Given an ic_local and an ic_relevant, use them to calculate what should be added as ic_stored.
     *
     * @param icLocal    IcTuple[] array of local transmitted tuples
     * @param icRelevant IcTuple[] array of relevant tuples
     * @return List of tuples to be stored. If ic_local is null or empty, the values returned may not be correct. The caller will have to determine if this is
     *         the case.
     */
    private List computeIcStored(final IcTuple[] icLocal, final IcTuple[] icRelevant) {
        final List result = new ArrayList<>(icRelevant.length);
        final List duplicates = new ArrayList<>(icRelevant.length);
        final Set isInResult = new HashSet<>(icRelevant.length);

        // need to compute:
        // result = ic_local XOR ic_relevant

        // add ic_local
        if (icLocal != null) {
            for (final IcTuple element : icLocal) {
                if (isInResult.add(element)) {
                    result.add(element);
                }
            }
        }

        // add ic_relevant
        for (final IcTuple element : icRelevant) {
            if (isInResult.add(element)) {
                result.add(element);
            } else {
                duplicates.add(element);
            }
        }

        // eliminate "duplicates"
        duplicates.forEach(result::remove);

        return result;
    }

    protected AttrDefinitionBands getAttrDefinitionBands() {
        return attrDefinitionBands;
    }

    protected ClassBands getClassBands() {
        return classBands;
    }

    public SegmentConstantPool getConstantPool() {
        return cpBands.getConstantPool();
    }

    protected CpBands getCpBands() {
        return cpBands;
    }

    protected IcBands getIcBands() {
        return icBands;
    }

    public SegmentHeader getSegmentHeader() {
        return header;
    }

    public void log(final int logLevel, final String message) {
        if (this.logLevel >= logLevel) {
            logStream.println(message);
        }
    }

    /**
     * Override the archive's deflate hint with the given boolean
     *
     * @param deflateHint - the deflate hint to use
     */
    public void overrideDeflateHint(final boolean deflateHint) {
        this.overrideDeflateHint = true;
        this.deflateHint = deflateHint;
    }

    /**
     * This performs the actual work of parsing against a non-static instance of Segment. This method is intended to run concurrently for multiple segments.
     *
     * @throws IOException      if a problem occurs during reading from the underlying stream
     * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec
     */
    private void parseSegment() throws IOException, Pack200Exception {

        header.unpack();
        cpBands.unpack();
        attrDefinitionBands.unpack();
        icBands.unpack();
        classBands.unpack();
        bcBands.unpack();
        fileBands.unpack();

        int classNum = 0;
        final int numberOfFiles = header.getNumberOfFiles();
        final String[] fileName = fileBands.getFileName();
        final int[] fileOptions = fileBands.getFileOptions();
        final SegmentOptions options = header.getOptions();

        classFilesContents = new byte[numberOfFiles][];
        fileDeflate = new boolean[numberOfFiles];
        fileIsClass = new boolean[numberOfFiles];

        final ByteArrayOutputStream bos = new ByteArrayOutputStream();
        final DataOutputStream dos = new DataOutputStream(bos);

        for (int i = 0; i < numberOfFiles; i++) {
            String name = fileName[i];

            final boolean nameIsEmpty = name == null || name.isEmpty();
            final boolean isClass = (fileOptions[i] & 2) == 2 || nameIsEmpty;
            if (isClass && nameIsEmpty) {
                name = cpBands.getCpClass()[classBands.getClassThisInts()[classNum]] + ".class";
                fileName[i] = name;
            }

            if (!overrideDeflateHint) {
                fileDeflate[i] = (fileOptions[i] & 1) == 1 || options.shouldDeflate();
            } else {
                fileDeflate[i] = deflateHint;
            }

            fileIsClass[i] = isClass;

            if (isClass) {
                final ClassFile classFile = buildClassFile(classNum);
                classFile.write(dos);
                dos.flush();

                classFilesContents[classNum] = bos.toByteArray();
                bos.reset();

                classNum++;
            }
        }
    }

    /**
     * This performs reading the data from the stream into non-static instance of Segment. After the completion of this method stream can be freed.
     *
     * @param in the input stream to read from
     * @throws IOException      if a problem occurs during reading from the underlying stream
     * @throws Pack200Exception if a problem occurs with an unexpected value or unsupported codec
     */
    private void readSegment(final InputStream in) throws IOException, Pack200Exception {
        log(LOG_LEVEL_VERBOSE, "-------");
        cpBands = new CpBands(this);
        cpBands.read(in);
        attrDefinitionBands = new AttrDefinitionBands(this);
        attrDefinitionBands.read(in);
        icBands = new IcBands(this);
        icBands.read(in);
        classBands = new ClassBands(this);
        classBands.read(in);
        bcBands = new BcBands(this);
        bcBands.read(in);
        fileBands = new FileBands(this);
        fileBands.read(in);

        fileBands.processFileBits();
    }

    public void setLogLevel(final int logLevel) {
        this.logLevel = logLevel;
    }

    public void setLogStream(final OutputStream logStream) {
        this.logStream = new PrintWriter(new OutputStreamWriter(logStream, Charset.defaultCharset()), false);
    }

    public void setPreRead(final boolean value) {
        doPreRead = value;
    }

    /**
     * Unpacks a packed stream (either .pack. or .pack.gz) into a corresponding JarOuputStream.
     *
     * @param inputStream  a packed input stream, preferably a {@link BoundedInputStream}.
     * @param out output stream.
     * @throws Pack200Exception if there is a problem unpacking
     * @throws IOException      if there is a problem with I/O during unpacking
     */
    public void unpack(final InputStream inputStream, final JarOutputStream out) throws IOException, Pack200Exception {
        unpackRead(inputStream);
        unpackProcess();
        unpackWrite(out);
    }

    void unpackProcess() throws IOException, Pack200Exception {
        if (internalBuffer != null) {
            readSegment(internalBuffer);
        }
        parseSegment();
    }

    /*
     * Package-private accessors for unpacking stages
     */
    void unpackRead(final InputStream inputStream) throws IOException, Pack200Exception {
        @SuppressWarnings("resource")
        final InputStream in = Pack200UnpackerAdapter.newBoundedInputStream(inputStream);

        header = new SegmentHeader(this);
        header.read(in);

        final int size = (int) header.getArchiveSize() - header.getArchiveSizeOffset();

        if (doPreRead && header.getArchiveSize() != 0) {
            final byte[] data = new byte[size];
            in.read(data);
            internalBuffer = new BufferedInputStream(new ByteArrayInputStream(data));
        } else {
            readSegment(in);
        }
    }

    void unpackWrite(final JarOutputStream out) throws IOException {
        writeJar(out);
        if (logStream != null) {
            logStream.close();
        }
    }

    /**
     * Writes the segment to an output stream. The output stream should be pre-buffered for efficiency. Also takes the same input stream for reading, since the
     * file bits may not be loaded and thus just copied from one stream to another. Doesn't close the output stream when finished, in case there are more
     * entries (e.g. further segments) to be written.
     *
     * @param out the JarOutputStream to write data to
     * @throws IOException if an error occurs while reading or writing to the streams
     */
    public void writeJar(final JarOutputStream out) throws IOException {
        final String[] fileName = fileBands.getFileName();
        final int[] fileModtime = fileBands.getFileModtime();
        final long[] fileSize = fileBands.getFileSize();
        final byte[][] fileBits = fileBands.getFileBits();

        // now write the files out
        int classNum = 0;
        final int numberOfFiles = header.getNumberOfFiles();
        final long archiveModtime = header.getArchiveModtime();

        for (int i = 0; i < numberOfFiles; i++) {
            final String name = fileName[i];
            // For Pack200 archives, modtime is in seconds
            // from the epoch. JarEntries need it to be in
            // milliseconds from the epoch.
            // Even though we're adding two longs and multiplying
            // by 1000, we won't overflow because both longs are
            // always under 2^32.
            final long modtime = 1000 * (archiveModtime + fileModtime[i]);
            final boolean deflate = fileDeflate[i];

            final JarEntry entry = new JarEntry(name);
            if (deflate) {
                entry.setMethod(ZipEntry.DEFLATED);
            } else {
                entry.setMethod(ZipEntry.STORED);
                final CRC32 crc = new CRC32();
                if (fileIsClass[i]) {
                    crc.update(classFilesContents[classNum]);
                    entry.setSize(classFilesContents[classNum].length);
                } else {
                    crc.update(fileBits[i]);
                    entry.setSize(fileSize[i]);
                }
                entry.setCrc(crc.getValue());
            }
            // On Windows at least, need to correct for timezone
            entry.setTime(modtime - TimeZone.getDefault().getRawOffset());
            out.putNextEntry(entry);

            // write to output stream
            if (fileIsClass[i]) {
                entry.setSize(classFilesContents[classNum].length);
                out.write(classFilesContents[classNum]);
                classNum++;
            } else {
                entry.setSize(fileSize[i]);
                out.write(fileBits[i]);
            }
        }
    }

}