All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.spark.data.PathToMultiDataSetFunction Maven / Gradle / Ivy

The newest version!
/*
 *  ******************************************************************************
 *  *
 *  *
 *  * This program and the accompanying materials are made available under the
 *  * terms of the Apache License, Version 2.0 which is available at
 *  * https://www.apache.org/licenses/LICENSE-2.0.
 *  *
 *  *  See the NOTICE file distributed with this work for additional
 *  *  information regarding copyright ownership.
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 *  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 *  * License for the specific language governing permissions and limitations
 *  * under the License.
 *  *
 *  * SPDX-License-Identifier: Apache-2.0
 *  *****************************************************************************
 */

package org.deeplearning4j.spark.data;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;
import org.datavec.spark.util.DefaultHadoopConfig;
import org.datavec.spark.util.SerializableHadoopConfig;
import org.nd4j.linalg.dataset.api.MultiDataSet;

import java.io.IOException;
import java.net.URI;

public class PathToMultiDataSetFunction implements Function {
    public static final int BUFFER_SIZE = 4194304; //4 MB

    private transient FileSystem fileSystem;
    private final Broadcast conf;

    public PathToMultiDataSetFunction(){
        this(null);
    }

    public PathToMultiDataSetFunction(Broadcast configuration){
        this.conf = configuration;
    }

    @Override
    public MultiDataSet call(String path) throws Exception {
        if (fileSystem == null) {
            try {
                Configuration c = conf == null ? DefaultHadoopConfig.get() : conf.getValue().getConfiguration();
                fileSystem = FileSystem.get(new URI(path), c);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        MultiDataSet ds = new org.nd4j.linalg.dataset.MultiDataSet();
        try (FSDataInputStream inputStream = fileSystem.open(new Path(path), BUFFER_SIZE)) {
            ds.load(inputStream);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        return ds;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy