All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dido.operators.Flatten Maven / Gradle / Ivy

The newest version!
package dido.operators;

import dido.data.*;

import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

public class Flatten {

    public static  List> flatten(F field, IndexedData data) {
        return new DynamicFlatten<>(extractorForFieldOrIndex(field, 0)).apply(data);
    }

    public static  List> flattenAt(int index, IndexedData data) {
        return new DynamicFlatten<>(extractorForFieldOrIndex((F) null, 0)).apply(data);
    }

    public static  Function, List>> field(F field) {

        return new DynamicFlatten<>(extractorForFieldOrIndex(field, 0));
    }

    public static  Function, List>> fields(F... fields) {

        Collection> extractors = Arrays.stream(fields)
                .map(f -> extractorForFieldOrIndex(f, 0))
                .collect(Collectors.toList());

        return new DynamicIterableFlatten<>(extractors);
    }

    public static  Function, List>> indices(int... indices) {

        Collection> extractors = Arrays.stream(indices)
                .mapToObj(i -> (Extractor) extractorForFieldOrIndex(null, i))
                .collect(Collectors.toList());

        return new DynamicIterableFlatten<>(extractors);
    }

    public static  Function, List>> fieldOfSchema(F field,
                                                                                   DataSchema schema) {
        return fieldOrIndexOfSchema(field, 0, schema);
    }

    public static  Function, List>> fieldOrIndexOfSchema(F field,
                                                                                          int index,
                                                                                          DataSchema schema) {

        Extractor extractor = extractorForFieldOrIndex(field, index);

        return extractorOfSchema(extractor, schema);
    }

    static  Function, List>> extractorOfSchema(Extractor extractor,
                                                                                          DataSchema schema) {

        DataSchema nestedSchema = Objects.requireNonNull(extractor.getSchema(schema),
                "No Nested Schema for " + extractor );

        Concatenator concatenator = extractor.bodgeFields(Concatenator.withSettings())
                .makeFromSchemas(schema, nestedSchema);

        return new KnownRepeatingFlatten<>(concatenator, extractor);
    }


    static class KnownRepeatingFlatten implements Function, List>> {

        private final Concatenator concatenator;

        private final Extractor extractor;

        KnownRepeatingFlatten(Concatenator concatenator, Extractor extractor) {
            this.concatenator = concatenator;
            this.extractor = extractor;
        }

        @Override
        public List> apply(IndexedData data) {

            @SuppressWarnings("unchecked")
            RepeatingData nested = (RepeatingData) extractor.extract(data);

            List> flattened = new ArrayList<>(nested.size());
            for (IndexedData element : nested) {
                flattened.add(concatenator.concat(data, element));
            }

            return flattened;
        }
    }

    static  Function, List>> strategyFlatten(DataSchema schema,
                                                                              Collection> extractors) {

        Map> extractorMap = new HashMap<>();
        SchemaBuilder schemaBuilder = SchemaBuilder.impliedType();

        for (int index = schema.firstIndex(); index > 0; index = schema.nextIndex(index)) {

            Extractor extractor = null;

            for (Extractor e : extractors) {

                if (e.isForIndexInSchema(index, schema)) {
                    extractor = e;
                    break;
                }
            }

            SchemaField existingSchemaField = schema.getSchemaFieldAt(index);
            if (extractor == null) {

                schemaBuilder.addSchemaField(existingSchemaField);
            }
            else {

                extractorMap.put(index, extractor);

                Class type = schema.getTypeAt(index);
                Class newType;
                if (type.isArray()) {
                    newType = Primitives.wrap(type.getComponentType());
                }
                else {
                    newType = Object.class;
                }
                schemaBuilder.addSchemaField(
                        SchemaField.of(1, newType)
                                .mapTo(existingSchemaField.getIndex(), existingSchemaField.getField()));
            }
        }

        return new KnownIterableFlatten<>(extractorMap, schema, schemaBuilder.build());
    }


    static class KnownIterableFlatten implements Function, List>> {

        private final Map> extractors;

        private final DataSchema schema;

        private final DataSchema newSchema;

        KnownIterableFlatten(Map> extractors, DataSchema schema, DataSchema newSchema) {
            this.extractors = extractors;
            this.schema = schema;
            this.newSchema = newSchema;
        }

        @Override
        public List> apply(IndexedData data) {

            int maxSize = 1;

            Map> lists = new HashMap<>(extractors.size());

            for (Map.Entry> entry : extractors.entrySet()) {

                int index = entry.getKey();
                Extractor extractor = entry.getValue();

                Object value = extractor.extract(data);
                if (value == null) {
                    lists.put(index, Collections.emptyList());
                    continue;
                }

                List list;
                Class type = extractor.getType(schema);
                if (type.isArray()) {
                    Class component = type.getComponentType();
                    if (component.isPrimitive()) {
                        if (component == int.class) {
                            int[] ia = (int[]) value;
                            list = Arrays.stream(ia).mapToObj(Integer::valueOf).collect(Collectors.toList());
                        }
                        else if (component == double.class) {
                            double[] da = (double[]) value;
                            list = Arrays.stream(da).mapToObj(Double::valueOf).collect(Collectors.toList());

                        }
                        else if (component == long.class) {
                            long[] la = (long[]) value;
                            list = Arrays.stream(la).mapToObj(Long::valueOf).collect(Collectors.toList());

                        }
                        else {
                            throw new IllegalArgumentException("No implemented " + type);
                        }
                    }
                    else {
                        list = Arrays.asList((Object[]) value);
                    }
                }
                else {
                    list = StreamSupport.stream(((Iterable) value).spliterator(), false)
                            .collect(Collectors.toList());
                }
                maxSize = Math.max(maxSize, list.size());
                lists.put(index, list);
            }

            List> flattened = new ArrayList<>(maxSize);

            for (int l = 0; l < maxSize; ++l) {

                ArrayData.Builder arrayData = ArrayData.builderForSchema(newSchema);

                for (int i = schema.firstIndex(); i > 0; i = schema.nextIndex(i)) {

                    List list = lists.get(i);
                    if (list == null) {
                        arrayData.setAt(i, data.getAt(i));
                    } else {
                        if (l < list.size()) {
                            arrayData.setAt(i, list.get(l));
                        }
                    }
                }

                flattened.add(arrayData.build());
            }

            return flattened;
        }
    }

    /**
     * Compares previous schemas so we can maybe shortcut.
     *
     * @param  Field Type.
     */
    public static class DynamicIterableFlatten implements Function, List>> {

        private final Collection> extractors;

        private Function, List>> last;

        private DataSchema previous;

        public DynamicIterableFlatten(Collection> extractors) {
            this.extractors = extractors;
        }


        @Override
        public List> apply(IndexedData indexedData) {

            if (last == null || !indexedData.getSchema().equals(previous)) {
                previous = indexedData.getSchema();
                last = strategyFlatten(previous, extractors);
            }

            return last.apply(indexedData);
        }
    }

    /**
     * Compares previous schemas so we can maybe shortcut.
     *
     * @param  Field Type.
     */
    public static class DynamicFlatten implements Function, List>> {

        private final Extractor extractor;

        private Function, List>> last;

        private DataSchema previous;

        public DynamicFlatten(Extractor extractor) {
            this.extractor = extractor;
        }

        @Override
        public List> apply(IndexedData indexedData) {

            if (last == null || !indexedData.getSchema().equals(previous)) {
                previous = indexedData.getSchema();
                last = extractorOfSchema(extractor, previous);
            }

            return last.apply(indexedData);
        }
    }

    interface Extractor {

        Object extract(IndexedData data);

        Class getType(DataSchema schema);

         DataSchema getSchema(DataSchema schema);

        Concatenator.Settings bodgeFields(Concatenator.Settings settings);

        boolean isForIndexInSchema(int index, DataSchema schema);

    }

    static  Extractor extractorForFieldOrIndex(F field, int index) {

        if (field == null && index == 0) {
            throw new IllegalStateException("Field Or Index must be provided");
        }

        if (field == null) {
            return new IndexExtractor<>(index);
        }
        else {
            return new FieldExtractor<>(field);
        }
    }

    static class FieldExtractor implements Extractor {

        private final F field;

        FieldExtractor(F field) {
            this.field = field;
        }

        @Override
        public Object extract(IndexedData data) {
            return GenericData.from(data).get(field);
        }

        @Override
        public Class getType(DataSchema schema) {
            return schema.getType(field);
        }

        @Override
        public  DataSchema getSchema(DataSchema schema) {
            return schema.getSchema(field);
        }

        @Override
        public Concatenator.Settings bodgeFields(Concatenator.Settings settings) {
            return settings.excludeFields(field);
        }

        @Override
        public boolean isForIndexInSchema(int index, DataSchema schema) {
            return schema.getIndex(field) == index;
        }

        @Override
        public String toString() {
            return "Field=" + field;
        }
    }

    static class IndexExtractor implements Extractor {

        private final int index;

        IndexExtractor(int index) {
            this.index = index;

        }

        @Override
        public Object extract(IndexedData data) {
            return data.getAt(index);
        }

        @Override
        public Class getType(DataSchema schema) {
            return schema.getTypeAt(index);
        }

        @Override
        public  DataSchema getSchema(DataSchema schema) {
            return schema.getSchemaAt(index);
        }

        @Override
        public Concatenator.Settings bodgeFields(Concatenator.Settings settings) {
            return settings;
        }

        @Override
        public boolean isForIndexInSchema(int index, DataSchema schema) {
            return index == this.index;
        }

        @Override
        public String toString() {
            return "Index=" + index;
        }
    }
}