io.nosqlbench.virtdata.library.basics.shared.distributions.CSVSamplerAutoDocsInfo Maven / Gradle / Ivy
// This file is auto-generated.
package io.nosqlbench.virtdata.library.basics.shared.distributions;
import io.nosqlbench.nb.annotations.Service;
import io.nosqlbench.virtdata.api.annotations.Category;
import io.nosqlbench.virtdata.api.processors.DocCtorData;
import io.nosqlbench.virtdata.api.processors.DocForFuncCtor;
import io.nosqlbench.virtdata.api.processors.DocFuncData;
import java.lang.String;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
@Service(
value = DocFuncData.class,
selector = "io.nosqlbench.virtdata.library.basics.shared.distributions.CSVSampler"
)
public class CSVSamplerAutoDocsInfo implements DocFuncData {
public String getClassName() {
return "CSVSampler";
}
public String getPackageName() {
return "io.nosqlbench.virtdata.library.basics.shared.distributions";
}
public String getClassJavadoc() {
return "\n"
+ "This function is a toolkit version of the {@link WeightedStringsFromCSV} function.\n"
+ "It is more capable and should be the preferred function for alias sampling over any CSV data.\n"
+ "This sampler uses a named column in the CSV data as the value. This is also referred to as the\n"
+ "labelColumn. The frequency of this label depends on the weight assigned to it in another named\n"
+ "CSV column, known as the weightColumn.\n"
+ "\n"
+ "Combining duplicate labels
\n"
+ "When you have CSV data which is not organized around the specific identifier that you want to sample by,\n"
+ "you can use some combining functions to tabulate these prior to sampling. In that case, you can use\n"
+ "any of \"sum\", \"avg\", \"count\", \"min\", or \"max\" as the reducing function on the value in the weight column.\n"
+ "If none are specified, then \"sum\" is used by default. All modes except \"count\" and \"name\" require a valid weight\n"
+ "column to be specified.\n"
+ "\n"
+ "\n"
+ " - sum, avg, min, max - takes the given stat for the weight of each distinct label
\n"
+ " - count - takes the number of occurrences of a given label as the weight
\n"
+ " - name - sets the weight of all distinct labels to 1.0d
\n"
+ "
\n"
+ "\n"
+ "Map vs Hash mode
\n"
+ "As with some of the other statistical functions, you can use this one to pick through the sample values\n"
+ "by using the map mode. This is distinct from the default hash mode. When map mode is used,\n"
+ "the values will appear monotonically as you scan through the unit interval of all long values.\n"
+ "Specifically, 0L represents 0.0d in the unit interval on input, and Long.MAX_VALUE represents\n"
+ "1.0 on the unit interval.) This mode is only recommended for advanced scenarios and should otherwise be\n"
+ "avoided. You will know if you need this mode.\n"
+ "\n";
}
public String getInType() {
return "long";
}
public String getOutType() {
return "java.lang.String";
}
public Category[] getCategories() {
return new Category[] { Category.general };
}
public List getCtors() {
return new ArrayList() {{
add(new DocForFuncCtor("CSVSampler", "Build an efficient O(1) sampler for the given column values with respect to the weights,\n"
+ "combining equal values by summing the weights.\n"
+ "\n"
+ "@param labelColumn The CSV column name containing the value\n"
+ "@param weightColumn The CSV column name containing a double weight\n"
+ "@param data Sampling modes or file names. Any of map, hash, sum, avg, count are taken\n"
+ " as configuration modes, and all others are taken as CSV filenames.\n",
new LinkedHashMap() {{
put("labelColumn","java.lang.String");
put("weightColumn","java.lang.String");
put("data","java.lang.String[]...");
}},
new ArrayList>() {{
add(new ArrayList() {{
add("CSVSampler('USPS','n/a','name','census_state_abbrev')");
add("");
}});
}}
));
}};
}
}