All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.databand.parameters.DatasetOperationPreview Maven / Gradle / Ivy

There is a newer version: 1.0.26.1
Show newest version
package ai.databand.parameters;

import ai.databand.schema.DatasetOperationSchema;
import ai.databand.schema.Pair;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static ai.databand.DbndPropertyNames.DBND_INTERNAL_ALIAS;

public class DatasetOperationPreview extends DatasetPreview {

    @Override
    public Object schema(Dataset input) {
        Dataset schemaAlias = input.alias(String.format("%s_%s",DBND_INTERNAL_ALIAS,"SCHEMA"));
        return extractSchema(schemaAlias.schema(), schemaAlias.count()).left();
    }

    public Pair> extractSchema(StructType schema, long rows) {
        try {
            List columns = new ArrayList<>(schema.fields().length);
            Map dtypes = new HashMap<>(schema.fields().length);
            for (StructField field : schema.fields()) {
                columns.add(field.name());
                dtypes.put(field.name(), field.dataType().typeName());
            }
            List shape = Arrays.asList(rows, (long) columns.size());
            try {
                return new Pair<>(new ObjectMapper().writeValueAsString(new DatasetOperationSchema(columns, dtypes, shape)), shape);
            } catch (JsonProcessingException e) {
                return new Pair<>("", shape);
            }
        } catch (Exception e) {
            return new Pair<>("", Collections.emptyList());
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy