org.finos.tracdap.test.data.SampleData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tracdap-lib-test Show documentation
Show all versions of tracdap-lib-test Show documentation
TRAC D.A.P. test library, pulls in everything needed to run tests across the TRAC platform services
/*
* Copyright 2022 Accenture Global Solutions Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.finos.tracdap.test.data;
import org.apache.arrow.vector.types.pojo.Schema;
import org.finos.tracdap.common.data.ArrowSchema;
import org.finos.tracdap.common.exception.ETracInternal;
import org.finos.tracdap.common.exception.EUnexpected;
import org.finos.tracdap.metadata.*;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.*;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.*;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class SampleData {
public static final String BASIC_CSV_DATA_RESOURCE = "/sample_data/csv_basic.csv";
public static final String BASIC_CSV_DATA_RESOURCE_V2 = "/sample_data/csv_basic_v2.csv";
public static final String BASIC_JSON_DATA_RESOURCE = "/sample_data/json_basic.json";
public static final String ALT_CSV_DATA_RESOURCE = "/sample_data/csv_alt.csv";
public static final SchemaDefinition BASIC_TABLE_SCHEMA
= SchemaDefinition.newBuilder()
.setSchemaType(SchemaType.TABLE)
.setTable(TableSchema.newBuilder()
.addFields(FieldSchema.newBuilder()
.setFieldName("boolean_field")
.setFieldOrder(0)
.setFieldType(BasicType.BOOLEAN))
.addFields(FieldSchema.newBuilder()
.setFieldName("integer_field")
.setFieldOrder(1)
.setFieldType(BasicType.INTEGER))
.addFields(FieldSchema.newBuilder()
.setFieldName("float_field")
.setFieldOrder(2)
.setFieldType(BasicType.FLOAT))
.addFields(FieldSchema.newBuilder()
.setFieldName("decimal_field")
.setFieldOrder(3)
.setFieldType(BasicType.DECIMAL))
.addFields(FieldSchema.newBuilder()
.setFieldName("string_field")
.setFieldOrder(4)
.setFieldType(BasicType.STRING))
.addFields(FieldSchema.newBuilder()
.setFieldName("date_field")
.setFieldOrder(5)
.setFieldType(BasicType.DATE))
.addFields(FieldSchema.newBuilder()
.setFieldName("datetime_field")
.setFieldOrder(6)
.setFieldType(BasicType.DATETIME)))
.build();
public static final SchemaDefinition BASIC_TABLE_SCHEMA_V2
= BASIC_TABLE_SCHEMA.toBuilder()
.setTable(BASIC_TABLE_SCHEMA.getTable().toBuilder()
.addFields(FieldSchema.newBuilder()
.setFieldName("extra_string_field")
.setFieldOrder(7)
.setFieldType(BasicType.STRING)))
.build();
public static final SchemaDefinition ALT_TABLE_SCHEMA
= SchemaDefinition.newBuilder()
.setSchemaType(SchemaType.TABLE)
.setTable(TableSchema.newBuilder()
.addFields(FieldSchema.newBuilder()
.setFieldName("alt_string_field")
.setFieldOrder(0)
.setFieldType(BasicType.STRING))
.addFields(FieldSchema.newBuilder()
.setFieldName("alt_categorical_field")
.setFieldOrder(1)
.setFieldType(BasicType.STRING)
.setCategorical(true))
.addFields(FieldSchema.newBuilder()
.setFieldName("alt_value_field")
.setFieldOrder(2)
.setFieldType(BasicType.FLOAT))
.addFields(FieldSchema.newBuilder()
.setFieldName("alt_value_2_field")
.setFieldOrder(3)
.setFieldType(BasicType.FLOAT))
.addFields(FieldSchema.newBuilder()
.setFieldName("alt_flag")
.setFieldOrder(4)
.setFieldType(BasicType.BOOLEAN)))
.build();
public static final SchemaDefinition ALT_TABLE_SCHEMA_V2
= ALT_TABLE_SCHEMA.toBuilder()
.setTable(ALT_TABLE_SCHEMA.getTable().toBuilder()
.addFields(FieldSchema.newBuilder()
.setFieldName("alt_extra_flag")
.setFieldOrder(5)
.setFieldType(BasicType.BOOLEAN)))
.build();
public static final FlowDefinition SAMPLE_FLOW = FlowDefinition.newBuilder()
.putNodes("basic_data_input", FlowNode.newBuilder()
.setNodeType(FlowNodeType.INPUT_NODE)
.build())
.putNodes("alt_data_input", FlowNode.newBuilder()
.setNodeType(FlowNodeType.INPUT_NODE)
.build())
.putNodes("model_1", FlowNode.newBuilder()
.setNodeType(FlowNodeType.MODEL_NODE)
.addInputs("basic_data_input")
.addOutputs("enriched_basic_data")
.build())
.putNodes("model_2", FlowNode.newBuilder()
.setNodeType(FlowNodeType.MODEL_NODE)
.addInputs("alt_data_input")
.addOutputs("enriched_alt_data")
.build())
.putNodes("model_3", FlowNode.newBuilder()
.setNodeType(FlowNodeType.MODEL_NODE)
.addInputs("enriched_basic_data")
.addInputs("enriched_alt_data")
.addOutputs("sample_output_data")
.build())
.putNodes("sample_output_data", FlowNode.newBuilder()
.setNodeType(FlowNodeType.OUTPUT_NODE)
.build())
.addEdges(FlowEdge.newBuilder()
.setSource(FlowSocket.newBuilder()
.setNode("basic_data_input"))
.setTarget(FlowSocket.newBuilder()
.setNode("model_1")
.setSocket("basic_data_input")))
.addEdges(FlowEdge.newBuilder()
.setSource(FlowSocket.newBuilder()
.setNode("alt_data_input"))
.setTarget(FlowSocket.newBuilder()
.setNode("model_2")
.setSocket("alt_data_input")))
.addEdges(FlowEdge.newBuilder()
.setSource(FlowSocket.newBuilder()
.setNode("model_1")
.setSocket("enriched_basic_data"))
.setTarget(FlowSocket.newBuilder()
.setNode("model_3")
.setSocket("enriched_basic_data")))
.addEdges(FlowEdge.newBuilder()
.setSource(FlowSocket.newBuilder()
.setNode("model_2")
.setSocket("enriched_alt_data"))
.setTarget(FlowSocket.newBuilder()
.setNode("model_3")
.setSocket("enriched_alt_data")))
.addEdges(FlowEdge.newBuilder()
.setSource(FlowSocket.newBuilder()
.setNode("model_3")
.setSocket("sample_output_data"))
.setTarget(FlowSocket.newBuilder()
.setNode("sample_output_data")))
.build();
public static VectorSchemaRoot generateBasicData(BufferAllocator arrowAllocator) {
var javaData = new HashMap>();
for (var field : BASIC_TABLE_SCHEMA.getTable().getFieldsList()) {
var javaValues = generateJavaValues(field.getFieldType(), 10);
javaData.put(field.getFieldName(), javaValues);
}
return convertData(BASIC_TABLE_SCHEMA, javaData, 10, arrowAllocator);
}
public static List