io.nosqlbench.virtdata.library.basics.shared.distributions.DelimFrequencySamplerAutoDocsInfo Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtdata-lib-basics Show documentation
There is a newer version: 5.17.0
// This file is auto-generated.
package io.nosqlbench.virtdata.library.basics.shared.distributions;

import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.processors.DocCtorData;
import io.nosqlbench.virtdata.api.processors.DocForFuncCtor;
import io.nosqlbench.virtdata.api.processors.DocFuncData;
import java.lang.String;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;

@Service(
    value = DocFuncData.class,
    selector = "io.nosqlbench.virtdata.library.basics.shared.distributions.DelimFrequencySampler"
)
public class DelimFrequencySamplerAutoDocsInfo implements DocFuncData {
  public String getClassName() {
    return "DelimFrequencySampler";
  }

  public String getPackageName() {
    return "io.nosqlbench.virtdata.library.basics.shared.distributions";
  }

  public String getClassJavadoc() {
    return "Takes a CSV with sample data and generates random values based on the\n"
            + "relative frequencies of the values in the file.\n"
            + "The CSV file must have headers which can\n"
            + "be used to find the named columns.\n"
            + "\n"
            + "I.E. take the following imaginary `animals.csv` file:\n"
            + "animal,count,country\n"
            + "puppy,1,usa\n"
            + "puppy,2,colombia\n"
            + "puppy,3,senegal\n"
            + "kitten,2,colombia\n"
            + "\n"
            + "`CSVFrequencySampler('animals.csv', animal)` will return `puppy` or `kitten` randomly. `puppy` will be 3x more frequent than `kitten`.\n"
            + "\n"
            + "`CSVFrequencySampler('animals.csv', country)` will return `usa`, `colombia`, or `senegal` randomly. `colombia` will be 2x more frequent than `usa` or `senegal`.\n"
            + "\n"
            + "Use this function to infer frequencies of categorical values from CSVs.\n";
  }

  public String getInType() {
    return "long";
  }

  public String getOutType() {
    return "java.lang.String";
  }

  public Category[] getCategories() {
    return new Category[] { Category.general };
  }

  public List getCtors() {
    return new ArrayList() {{
          add(new DocForFuncCtor("DelimFrequencySampler", "Create a sampler of strings from the given delimited file. The delimited file must have plain headers\n"
              + "as its first line.\n"
              + "@param filename The name of the file to be read into the sampler buffer\n"
              + "@param columnName The name of the column to be sampled\n"
              + "@param delimiter delimmiter\n", 
            new LinkedHashMap() {{
              put("filename","java.lang.String");
              put("columnName","java.lang.String");
              put("delimiter","char");
            }},
            new ArrayList>() {{
              add(new ArrayList() {{
                add("DelimFrequencySampler('values.csv','modelno', '|')");
                add("Read values.csv, count the frequency of values in 'modelno' column, and sample from this column proportionally");
              }});
            }}
          ));
        }};
  }
}