All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.expleague.ml.data.tools.PoolBuilder Maven / Gradle / Ivy

package com.expleague.ml.data.tools;

import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Stream;


import com.expleague.commons.seq.ArraySeq;
import com.expleague.commons.func.Factory;
import com.expleague.commons.seq.Seq;
import com.expleague.commons.util.Pair;
import com.expleague.ml.meta.DSItem;
import com.expleague.ml.meta.impl.JsonDataSetMeta;
import com.expleague.ml.meta.impl.JsonFeatureMeta;
import com.expleague.ml.meta.impl.JsonTargetMeta;

/**
 * User: solar
 * Date: 07.07.14
 * Time: 12:55
 */
@SuppressWarnings("unchecked")
public class PoolBuilder implements Factory> {
  private JsonDataSetMeta meta;
  private List items = new ArrayList<>();
  private List>> features = new ArrayList<>();
  private List>> targets = new ArrayList<>();

  @Override
  public Pool create() {
    return create((Class)meta.type());
  }

  public  Pool create(final Class clazz) {
    final Pool result = new Pool<>(
        meta,
        new ArraySeq<>(items.toArray((Item[])Array.newInstance(items.get(0).getClass(), items.size()))),
        features.toArray((Pair>[]) new Pair[features.size()]),
        targets.toArray((Pair>[]) new Pair[targets.size()]));
    { // verifying lines
      for (final Pair> entry : features) {
        entry.getFirst().owner = result;
        if (entry.second.length() != items.size())
          throw new RuntimeException(
              "Feature " + entry.first.toString() + " has " + entry.second.length() + " entries " + " expected " + items.size());
      }
    }
    { // checking targets
      for (final Pair> entry : targets) {
        entry.getFirst().owner = result;
        if (entry.second.length() != items.size())
          throw new RuntimeException(
              "Target has " + entry.second.length() + " entries " + " expected " + items.size());
      }
    }

    final Set itemIds = new HashSet<>();
    for (final Item item : (List)items) {
      if (itemIds.contains(item.id()))
        throw new RuntimeException(
            "Contain duplicates! Id = " + item.id()
        );
      itemIds.add(toString());
    }
    meta = null;
    items = new ArrayList<>();
    features = new ArrayList<>();
    targets = new ArrayList<>();
    return result;
  }

  public void setMeta(final JsonDataSetMeta meta) {
    this.meta = meta;
  }

  public void addItem(final DSItem read) {
    items.add(read);
  }

  public void newFeature(final JsonFeatureMeta meta, final Seq values) {
    meta.associated = this.meta.id();
    features.add(Pair.>create(meta, values));
  }

  public void newTarget(final JsonTargetMeta meta, final Seq target) {
    meta.associated = this.meta.id();
    this.targets.add(Pair.>create(meta, target));
  }

  public  Stream items() {
    return (Stream) items.stream();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy