ai.databand.parameters.DatasetOperationPreview Maven / Gradle / Ivy
package ai.databand.parameters;
import ai.databand.schema.DatasetOperationSchema;
import ai.databand.schema.Pair;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static ai.databand.DbndPropertyNames.DBND_INTERNAL_ALIAS;
public class DatasetOperationPreview extends DatasetPreview {
@Override
public Object schema(Dataset input) {
Dataset> schemaAlias = input.alias(String.format("%s_%s",DBND_INTERNAL_ALIAS,"SCHEMA"));
return extractSchema(schemaAlias.schema(), schemaAlias.count()).left();
}
public Pair> extractSchema(StructType schema, long rows) {
try {
List columns = new ArrayList<>(schema.fields().length);
Map dtypes = new HashMap<>(schema.fields().length);
for (StructField field : schema.fields()) {
columns.add(field.name());
dtypes.put(field.name(), field.dataType().typeName());
}
List shape = Arrays.asList(rows, (long) columns.size());
try {
return new Pair<>(new ObjectMapper().writeValueAsString(new DatasetOperationSchema(columns, dtypes, shape)), shape);
} catch (JsonProcessingException e) {
return new Pair<>("", shape);
}
} catch (Exception e) {
return new Pair<>("", Collections.emptyList());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy