io.nosqlbench.virtdata.library.basics.shared.distributions.CSVSamplerAutoDocsInfo Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtdata-lib-basics Show documentation
There is a newer version: 5.17.0
// This file is auto-generated.
package io.nosqlbench.virtdata.library.basics.shared.distributions;

import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.processors.DocCtorData;
import io.nosqlbench.virtdata.api.processors.DocForFuncCtor;
import io.nosqlbench.virtdata.api.processors.DocFuncData;
import java.lang.String;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;

@Service(
    value = DocFuncData.class,
    selector = "io.nosqlbench.virtdata.library.basics.shared.distributions.CSVSampler"
)
public class CSVSamplerAutoDocsInfo implements DocFuncData {
  public String getClassName() {
    return "CSVSampler";
  }

  public String getPackageName() {
    return "io.nosqlbench.virtdata.library.basics.shared.distributions";
  }

  public String getClassJavadoc() {
    return "\n"
            + "This function is a toolkit version of the {@link WeightedStringsFromCSV} function.\n"
            + "It is more capable and should be the preferred function for alias sampling over any CSV data.\n"
            + "This sampler uses a named column in the CSV data as the value. This is also referred to as the\n"
            + "labelColumn. The frequency of this label depends on the weight assigned to it in another named\n"
            + "CSV column, known as the weightColumn.\n"
            + "\n"
            + "Combining duplicate labels\n"
            + "When you have CSV data which is not organized around the specific identifier that you want to sample by,\n"
            + "you can use some combining functions to tabulate these prior to sampling. In that case, you can use\n"
            + "any of \"sum\", \"avg\", \"count\", \"min\", or \"max\" as the reducing function on the value in the weight column.\n"
            + "If none are specified, then \"sum\" is used by default. All modes except \"count\" and \"name\" require a valid weight\n"
            + "column to be specified.\n"
            + "\n"
            + "\n"
            + "    sum, avg, min, max - takes the given stat for the weight of each distinct label\n"
            + "    count - takes the number of occurrences of a given label as the weight\n"
            + "    name - sets the weight of all distinct labels to 1.0d\n"
            + "\n"
            + "\n"
            + "Map vs Hash mode\n"
            + "As with some of the other statistical functions, you can use this one to pick through the sample values\n"
            + "by using the map mode. This is distinct from the default hash mode. When map mode is used,\n"
            + "the values will appear monotonically as you scan through the unit interval of all long values.\n"
            + "Specifically, 0L represents 0.0d in the unit interval on input, and Long.MAX_VALUE represents\n"
            + "1.0 on the unit interval.) This mode is only recommended for advanced scenarios and should otherwise be\n"
            + "avoided. You will know if you need this mode.\n"
            + "\n";
  }

  public String getInType() {
    return "long";
  }

  public String getOutType() {
    return "java.lang.String";
  }

  public Category[] getCategories() {
    return new Category[] { Category.general };
  }

  public List getCtors() {
    return new ArrayList() {{
          add(new DocForFuncCtor("CSVSampler", "Build an efficient O(1) sampler for the given column values with respect to the weights,\n"
              + "combining equal values by summing the weights.\n"
              + "\n"
              + "@param labelColumn   The CSV column name containing the value\n"
              + "@param weightColumn  The CSV column name containing a double weight\n"
              + "@param data Sampling modes or file names. Any of map, hash, sum, avg, count are taken\n"
              + "            as configuration modes, and all others are taken as CSV filenames.\n", 
            new LinkedHashMap() {{
              put("labelColumn","java.lang.String");
              put("weightColumn","java.lang.String");
              put("data","java.lang.String[]...");
            }},
            new ArrayList>() {{
              add(new ArrayList() {{
                add("CSVSampler('USPS','n/a','name','census_state_abbrev')");
                add("");
              }});
            }}
          ));
        }};
  }
}