All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.nosqlbench.virtdata.library.basics.shared.distributions.CSVSamplerAutoDocsInfo Maven / Gradle / Ivy

The newest version!
// This file is auto-generated.
package io.nosqlbench.virtdata.library.basics.shared.distributions;

import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.processors.DocCtorData;
import io.nosqlbench.virtdata.api.processors.DocForFuncCtor;
import io.nosqlbench.virtdata.api.processors.DocFuncData;
import java.lang.String;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;

@Service(
    value = DocFuncData.class,
    selector = "io.nosqlbench.virtdata.library.basics.shared.distributions.CSVSampler"
)
public class CSVSamplerAutoDocsInfo implements DocFuncData {
  public String getClassName() {
    return "CSVSampler";
  }

  public String getPackageName() {
    return "io.nosqlbench.virtdata.library.basics.shared.distributions";
  }

  public String getClassJavadoc() {
    return "\n"
            + "This function is a toolkit version of the {@link WeightedStringsFromCSV} function.\n"
            + "It is more capable and should be the preferred function for alias sampling over any CSV data.\n"
            + "This sampler uses a named column in the CSV data as the value. This is also referred to as the\n"
            + "labelColumn. The frequency of this label depends on the weight assigned to it in another named\n"
            + "CSV column, known as the weightColumn.\n"
            + "\n"
            + "

Combining duplicate labels

\n" + "When you have CSV data which is not organized around the specific identifier that you want to sample by,\n" + "you can use some combining functions to tabulate these prior to sampling. In that case, you can use\n" + "any of \"sum\", \"avg\", \"count\", \"min\", or \"max\" as the reducing function on the value in the weight column.\n" + "If none are specified, then \"sum\" is used by default. All modes except \"count\" and \"name\" require a valid weight\n" + "column to be specified.\n" + "\n" + "
    \n" + "
  • sum, avg, min, max - takes the given stat for the weight of each distinct label
  • \n" + "
  • count - takes the number of occurrences of a given label as the weight
  • \n" + "
  • name - sets the weight of all distinct labels to 1.0d
  • \n" + "
\n" + "\n" + "

Map vs Hash mode

\n" + "As with some of the other statistical functions, you can use this one to pick through the sample values\n" + "by using the map mode. This is distinct from the default hash mode. When map mode is used,\n" + "the values will appear monotonically as you scan through the unit interval of all long values.\n" + "Specifically, 0L represents 0.0d in the unit interval on input, and Long.MAX_VALUE represents\n" + "1.0 on the unit interval.) This mode is only recommended for advanced scenarios and should otherwise be\n" + "avoided. You will know if you need this mode.\n" + "\n"; } public String getInType() { return "long"; } public String getOutType() { return "java.lang.String"; } public Category[] getCategories() { return new Category[] { Category.general }; } public List getCtors() { return new ArrayList() {{ add(new DocForFuncCtor("CSVSampler", "Build an efficient O(1) sampler for the given column values with respect to the weights,\n" + "combining equal values by summing the weights.\n" + "\n" + "@param labelColumn The CSV column name containing the value\n" + "@param weightColumn The CSV column name containing a double weight\n" + "@param data Sampling modes or file names. Any of map, hash, sum, avg, count are taken\n" + " as configuration modes, and all others are taken as CSV filenames.\n", new LinkedHashMap() {{ put("labelColumn","java.lang.String"); put("weightColumn","java.lang.String"); put("data","java.lang.String[]..."); }}, new ArrayList>() {{ add(new ArrayList() {{ add("CSVSampler('USPS','n/a','name','census_state_abbrev')"); add(""); }}); }} )); }}; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy