All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.api.split.TransformSplit Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

package org.datavec.api.split;

import lombok.NonNull;
import org.nd4j.common.collection.CompactHeapStringList;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;

public class TransformSplit extends BaseInputSplit {
    private final BaseInputSplit sourceSplit;
    private final URITransform transform;

    /**
     * Apply a given transformation to the raw URI objects
     *
     * @param sourceSplit the split with URIs to transform
     * @param transform transform operation that returns a new URI based on an input URI
     * @throws URISyntaxException thrown if the transformed URI is malformed
     */
    public TransformSplit(@NonNull BaseInputSplit sourceSplit, @NonNull URITransform transform)
            throws URISyntaxException {
        this.sourceSplit = sourceSplit;
        this.transform = transform;
        initialize();
    }

    /**
     * Static factory method, replace the string version of the URI with a simple search-replace pair
     *
     * @param sourceSplit the split with URIs to transform
     * @param search the string to search
     * @param replace the string to replace with
     * @throws URISyntaxException thrown if the transformed URI is malformed
     */
    public static TransformSplit ofSearchReplace(@NonNull BaseInputSplit sourceSplit, @NonNull final String search,
                                                 @NonNull final String replace) throws URISyntaxException {
        return new TransformSplit(sourceSplit, new URITransform() {
            @Override
            public URI apply(URI uri) throws URISyntaxException {
                return new URI(uri.toString().replace(search, replace));
            }
        });
    }

    private void initialize() throws URISyntaxException {
        length = sourceSplit.length();
        uriStrings = new CompactHeapStringList();
        Iterator iter = sourceSplit.locationsIterator();
        while (iter.hasNext()) {
            URI uri = iter.next();
            uri = transform.apply(uri);
            uriStrings.add(uri.toString());
        }
    }


    @Override
    public void updateSplitLocations(boolean reset) {
        sourceSplit.updateSplitLocations(reset);
    }

    @Override
    public boolean needsBootstrapForWrite() {
        return sourceSplit.needsBootstrapForWrite();
    }

    @Override
    public void bootStrapForWrite() {
        sourceSplit.bootStrapForWrite();
    }

    @Override
    public OutputStream openOutputStreamFor(String location) throws Exception {
        return sourceSplit.openOutputStreamFor(location);
    }

    @Override
    public InputStream openInputStreamFor(String location) throws Exception {
        return sourceSplit.openInputStreamFor(location);
    }

    @Override
    public void reset() {
        //No op: BaseInputSplit doesn't support randomization directly, and TransformSplit doesn't either
    }

    @Override
    public boolean resetSupported() {
        return true;
    }

    public interface URITransform {
        URI apply(URI uri) throws URISyntaxException;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy