All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.api.split.TransformSplit Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
package org.datavec.api.split;

import lombok.NonNull;
import org.nd4j.linalg.collection.CompactHeapStringList;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;

/**
 * InputSplit implementation that maps the URIs of a given BaseInputSplit to new URIs. Useful when features and labels
 * are in different files sharing a common naming scheme, and the name of the output file can be determined given the
 * name of the input file.
 *
 * @author Ede Meijer
 */
public class TransformSplit extends BaseInputSplit {
    private final BaseInputSplit sourceSplit;
    private final URITransform transform;

    /**
     * Apply a given transformation to the raw URI objects
     *
     * @param sourceSplit the split with URIs to transform
     * @param transform transform operation that returns a new URI based on an input URI
     * @throws URISyntaxException thrown if the transformed URI is malformed
     */
    public TransformSplit(@NonNull BaseInputSplit sourceSplit, @NonNull URITransform transform)
                    throws URISyntaxException {
        this.sourceSplit = sourceSplit;
        this.transform = transform;
        initialize();
    }

    /**
     * Static factory method, replace the string version of the URI with a simple search-replace pair
     *
     * @param sourceSplit the split with URIs to transform
     * @param search the string to search
     * @param replace the string to replace with
     * @throws URISyntaxException thrown if the transformed URI is malformed
     */
    public static TransformSplit ofSearchReplace(@NonNull BaseInputSplit sourceSplit, @NonNull final String search,
                    @NonNull final String replace) throws URISyntaxException {
        return new TransformSplit(sourceSplit, new URITransform() {
            @Override
            public URI apply(URI uri) throws URISyntaxException {
                return new URI(uri.toString().replace(search, replace));
            }
        });
    }

    private void initialize() throws URISyntaxException {
        length = sourceSplit.length();
        uriStrings = new CompactHeapStringList();
        Iterator iter = sourceSplit.locationsIterator();
        while (iter.hasNext()) {
            URI uri = iter.next();
            uri = transform.apply(uri);
            uriStrings.add(uri.toString());
        }
    }

    @Override
    public void write(DataOutput out) throws IOException {

    }

    @Override
    public void readFields(DataInput in) throws IOException {

    }

    @Override
    public void reset() {
        //No op: BaseInputSplit doesn't support randomization directly, and TransformSplit doesn't either
    }

    public interface URITransform {
        URI apply(URI uri) throws URISyntaxException;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy