All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jillesvangurp.iterables.MergingCsvIterable Maven / Gradle / Ivy

Go to download

Misc classes to make processing utf-8 content with iterators a bit less painful.

The newest version!
package com.jillesvangurp.iterables;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Merge two sorted csv files on primary and foreign key fields and iterate over the groups of records.
 */
public class MergingCsvIterable implements Iterable>>{

    private final CSVLineIterable primary;
    private final CSVLineIterable secondary;
    private final int primaryKeyIndex;
    private final int foreignKeyIndex;

    /**
     * @param primary CSVLineIterable for the sorted (on the primary key) csv with the primary key.
     * @param secondary CSVLineIterable for the csv with the foreign key, sorted on this key.
     * @param primaryKyIndex column that contains the primary key
     * @param foreignKeyIndex colum that contains the foreign key
     */
    public MergingCsvIterable(CSVLineIterable primary, CSVLineIterable secondary, int primaryKyIndex, int foreignKeyIndex) {
        this.primary = primary;
        this.secondary = secondary;
        primaryKeyIndex = primaryKyIndex;
        this.foreignKeyIndex = foreignKeyIndex;
    }

    @Override
    public Iterator>> iterator() {
        final Iterator> primaryIterator = primary.iterator();
        final PeekableIterator> secondaryIterator = new PeekableIterator<>(secondary.iterator());

        return new Iterator>>() {

            @Override
            public boolean hasNext() {
                return primaryIterator.hasNext();
            }

            @Override
            public List> next() {
                List> records = new ArrayList<>();
                List primaryRecord = primaryIterator.next();
                String pk = primaryRecord.get(primaryKeyIndex);
                records.add(primaryRecord);
                List secondaryRecord;
                while(secondaryIterator.hasNext()) {
                    secondaryRecord = secondaryIterator.peek();
                    String fk = secondaryRecord.get(foreignKeyIndex);

//                    int compareTo = fk.compareTo(pk);
                    // assume the ids are integers
                    // TODO make comparator configurable
                    int compareTo = Integer.valueOf(fk) - Integer.valueOf(pk);
                    if(compareTo == 0) {
                        records.add(secondaryRecord);
                    } else if(compareTo > 0) {
                        // don't move secondary iterator forward to ensure we see the secondary record again for the next primary record.
                        break;
                    }
                    // we've peeked, now move it forward
                    secondaryIterator.next();
                }

                return records;
            }

            @Override
            public void remove() {
                primaryIterator.remove();
            }
        };
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy