All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.api.split.NumberedFileInputSplit Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

package org.datavec.api.split;

import lombok.extern.slf4j.Slf4j;
import org.datavec.api.util.files.UriFromPathIterator;

import java.io.*;
import java.net.URI;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Slf4j
public class NumberedFileInputSplit implements InputSplit {
    private final String baseString;
    private final int minIdx;
    private final int maxIdx;

    private static final Pattern p = Pattern.compile("\\%(0\\d)?d");

    /**
     * @param baseString String that defines file format. Must contain "%d", which will be replaced with
     *                   the index of the file, possibly zero-padded to x digits if the pattern is in the form %0xd.
     * @param minIdxInclusive Minimum index/number (starting number in sequence of files, inclusive)
     * @param maxIdxInclusive Maximum index/number (last number in sequence of files, inclusive)
     *                        @see {NumberedFileInputSplitTest}
     */
    public NumberedFileInputSplit(String baseString, int minIdxInclusive, int maxIdxInclusive) {
        Matcher m = p.matcher(baseString);
        if (baseString == null || !m.find()) {
            throw new IllegalArgumentException("Base String must match this regular expression: " + p.toString());
        }
        this.baseString = baseString;
        this.minIdx = minIdxInclusive;
        this.maxIdx = maxIdxInclusive;
    }

    @Override
    public boolean canWriteToLocation(URI location) {
        return location.isAbsolute();
    }

    @Override
    public String addNewLocation() {
        return null;
    }

    @Override
    public String addNewLocation(String location) {
        return null;
    }

    @Override
    public void updateSplitLocations(boolean reset) {
        //no-op (locations() is dynamic)
    }

    @Override
    public boolean needsBootstrapForWrite() {
        return locations() == null ||
                locations().length < 1
                || locations().length == 1 && !locations()[0].isAbsolute();
    }

    @Override
    public void bootStrapForWrite() {
        if(locations().length == 1 && !locations()[0].isAbsolute()) {
            File parentDir = new File(locations()[0]);
            File writeFile = new File(parentDir,"write-file");
            try {
                writeFile.createNewFile();
            } catch (IOException e) {
                log.error("",e);
            }


        }
    }

    @Override
    public OutputStream openOutputStreamFor(String location) throws Exception {
        FileOutputStream ret = location.startsWith("file:") ? new FileOutputStream(new File(URI.create(location))):
                new FileOutputStream(new File(location));
        return ret;
    }

    @Override
    public InputStream openInputStreamFor(String location) throws Exception {
        FileInputStream fileInputStream = new FileInputStream(location);
        return fileInputStream;
    }

    @Override
    public long length() {
        return maxIdx - minIdx + 1;
    }

    @Override
    public URI[] locations() {
        URI[] uris = new URI[(int) length()];
        int x = 0;
        if(baseString.matches(".{2,}:/.*")){
            //URI (has scheme)
            for (int i = minIdx; i <= maxIdx; i++) {
                uris[x++] = URI.create(String.format(baseString, i));
            }
        } else {
            //File, no scheme
            for (int i = minIdx; i <= maxIdx; i++) {
                uris[x++] = new File(String.format(baseString, i)).toURI();
            }
        }
        return uris;
    }

    @Override
    public Iterator locationsIterator() {
        return new UriFromPathIterator(locationsPathIterator());
    }

    @Override
    public Iterator locationsPathIterator() {
        return new NumberedFileIterator();
    }

    @Override
    public void reset() {
        //No op
    }

    @Override
    public boolean resetSupported() {
        return true;
    }


    private class NumberedFileIterator implements Iterator {

        private int currIdx;

        private NumberedFileIterator() {
            currIdx = minIdx;
        }

        @Override
        public boolean hasNext() {
            return currIdx <= maxIdx;
        }

        @Override
        public String next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }
            return String.format(baseString, currIdx++);
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy