org.apache.lucene.benchmark.stats.TestData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-benchmark Show documentation
Show all versions of lucene-benchmark Show documentation
Apache Lucene (module: benchmark)
package org.apache.lucene.benchmark.stats;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.Constants;
import org.apache.lucene.store.Directory;
/**
* This class holds together all parameters related to a test. Single test is
* performed several times, and all results are averaged.
*
*/
public class TestData
{
public static int[] MAX_BUFFERED_DOCS_COUNTS = new int[]{10, 20, 50, 100, 200, 500};
public static int[] MERGEFACTOR_COUNTS = new int[]{10, 20, 50, 100, 200, 500};
/**
* ID of this test data.
*/
private String id;
/**
* Heap size.
*/
private long heap;
/**
* List of results for each test run with these parameters.
*/
private Vector runData = new Vector();
private int maxBufferedDocs, mergeFactor;
/**
* Directory containing source files.
*/
private File source;
/**
* Lucene Directory implementation for creating an index.
*/
private Directory directory;
/**
* Analyzer to use when adding documents.
*/
private Analyzer analyzer;
/**
* If true, use compound file format.
*/
private boolean compound;
/**
* If true, optimize index when finished adding documents.
*/
private boolean optimize;
/**
* Data for search benchmarks.
*/
private QueryData[] queries;
public TestData()
{
heap = Runtime.getRuntime().maxMemory();
}
private static class DCounter
{
double total;
int count, recordCount;
}
private static class LCounter
{
long total;
int count;
}
private static class LDCounter
{
double Dtotal;
int Dcount, DrecordCount;
long Ltotal0;
int Lcount0;
long Ltotal1;
int Lcount1;
}
/**
* Get a textual summary of the benchmark results, average from all test runs.
*/
static final String ID = "# testData id ";
static final String OP = "operation ";
static final String RUNCNT = " runCnt";
static final String RECCNT = " recCnt";
static final String RECSEC = " rec/s";
static final String FREEMEM = " avgFreeMem";
static final String TOTMEM = " avgTotalMem";
static final String COLS[] = {
ID,
OP,
RUNCNT,
RECCNT,
RECSEC,
FREEMEM,
TOTMEM
};
public String showRunData(String prefix)
{
if (runData.size() == 0)
{
return "# [NO RUN DATA]";
}
HashMap resByTask = new HashMap();
StringBuffer sb = new StringBuffer();
String lineSep = System.getProperty("line.separator");
sb.append("warm = Warm Index Reader").append(lineSep).append("srch = Search Index").append(lineSep).append("trav = Traverse Hits list, optionally retrieving document").append(lineSep).append(lineSep);
for (int i = 0; i < COLS.length; i++) {
sb.append(COLS[i]);
}
sb.append("\n");
LinkedHashMap mapMem = new LinkedHashMap();
LinkedHashMap mapSpeed = new LinkedHashMap();
for (int i = 0; i < runData.size(); i++)
{
TestRunData trd = (TestRunData) runData.get(i);
Collection labels = trd.getLabels();
Iterator it = labels.iterator();
while (it.hasNext())
{
String label = (String) it.next();
MemUsage mem = trd.getMemUsage(label);
if (mem != null)
{
TestData.LCounter[] tm = (TestData.LCounter[]) mapMem.get(label);
if (tm == null)
{
tm = new TestData.LCounter[2];
tm[0] = new TestData.LCounter();
tm[1] = new TestData.LCounter();
mapMem.put(label, tm);
}
tm[0].total += mem.avgFree;
tm[0].count++;
tm[1].total += mem.avgTotal;
tm[1].count++;
}
TimeData td = trd.getTotals(label);
if (td != null)
{
TestData.DCounter dc = (TestData.DCounter) mapSpeed.get(label);
if (dc == null)
{
dc = new TestData.DCounter();
mapSpeed.put(label, dc);
}
dc.count++;
//dc.total += td.getRate();
dc.total += (td.count>0 && td.elapsed<=0 ? 1 : td.elapsed); // assume at least 1ms for any countable op
dc.recordCount += td.count;
}
}
}
LinkedHashMap res = new LinkedHashMap();
Iterator it = mapSpeed.keySet().iterator();
while (it.hasNext())
{
String label = (String) it.next();
TestData.DCounter dc = (TestData.DCounter) mapSpeed.get(label);
res.put(label,
format(dc.count, RUNCNT) +
format(dc.recordCount / dc.count, RECCNT) +
format(1,(float) (dc.recordCount * 1000.0 / (dc.total>0 ? dc.total : 1.0)), RECSEC)
//format((float) (dc.total / (double) dc.count), RECSEC)
);
// also sum by task
String task = label.substring(label.lastIndexOf("-")+1);
LDCounter ldc = (LDCounter) resByTask.get(task);
if (ldc==null) {
ldc = new LDCounter();
resByTask.put(task,ldc);
}
ldc.Dcount += dc.count;
ldc.DrecordCount += dc.recordCount;
ldc.Dtotal += (dc.count>0 && dc.total<=0 ? 1 : dc.total); // assume at least 1ms for any countable op
}
it = mapMem.keySet().iterator();
while (it.hasNext())
{
String label = (String) it.next();
TestData.LCounter[] lc = (TestData.LCounter[]) mapMem.get(label);
String speed = (String) res.get(label);
boolean makeSpeed = false;
if (speed == null)
{
makeSpeed = true;
speed =
format(lc[0].count, RUNCNT) +
format(0, RECCNT) +
format(0,(float)0.0, RECSEC);
}
res.put(label, speed +
format(0, lc[0].total / lc[0].count, FREEMEM) +
format(0, lc[1].total / lc[1].count, TOTMEM));
// also sum by task
String task = label.substring(label.lastIndexOf("-")+1);
LDCounter ldc = (LDCounter) resByTask.get(task);
if (ldc==null) {
ldc = new LDCounter();
resByTask.put(task,ldc);
makeSpeed = true;
}
if (makeSpeed) {
ldc.Dcount += lc[0].count;
}
ldc.Lcount0 += lc[0].count;
ldc.Lcount1 += lc[1].count;
ldc.Ltotal0 += lc[0].total;
ldc.Ltotal1 += lc[1].total;
}
it = res.keySet().iterator();
while (it.hasNext())
{
String label = (String) it.next();
sb.append(format(prefix, ID));
sb.append(format(label, OP));
sb.append(res.get(label)).append("\n");
}
// show results by task (srch, optimize, etc.)
sb.append("\n");
for (int i = 0; i < COLS.length; i++) {
sb.append(COLS[i]);
}
sb.append("\n");
it = resByTask.keySet().iterator();
while (it.hasNext())
{
String task = (String) it.next();
LDCounter ldc = (LDCounter) resByTask.get(task);
sb.append(format(" ", ID));
sb.append(format(task, OP));
sb.append(format(ldc.Dcount, RUNCNT));
sb.append(format(ldc.DrecordCount / ldc.Dcount, RECCNT));
sb.append(format(1,(float) (ldc.DrecordCount * 1000.0 / (ldc.Dtotal>0 ? ldc.Dtotal : 1.0)), RECSEC));
sb.append(format(0, ldc.Ltotal0 / ldc.Lcount0, FREEMEM));
sb.append(format(0, ldc.Ltotal1 / ldc.Lcount1, TOTMEM));
sb.append("\n");
}
return sb.toString();
}
private static NumberFormat numFormat [] = { NumberFormat.getInstance(), NumberFormat.getInstance()};
private static final String padd = " ";
static {
numFormat[0].setMaximumFractionDigits(0);
numFormat[0].setMinimumFractionDigits(0);
numFormat[1].setMaximumFractionDigits(1);
numFormat[1].setMinimumFractionDigits(1);
}
// pad number from left
// numFracDigits must be 0 or 1.
static String format(int numFracDigits, float f, String col) {
String res = padd + numFormat[numFracDigits].format(f);
return res.substring(res.length() - col.length());
}
// pad number from left
static String format(int n, String col) {
String res = padd + n;
return res.substring(res.length() - col.length());
}
// pad string from right
static String format(String s, String col) {
return (s + padd).substring(0,col.length());
}
/**
* Prepare a list of benchmark data, using all possible combinations of
* benchmark parameters.
*
* @param sources list of directories containing different source document
* collections
* @param analyzers of analyzers to use.
*/
public static TestData[] getAll(File[] sources, Analyzer[] analyzers)
{
List res = new ArrayList(50);
TestData ref = new TestData();
for (int q = 0; q < analyzers.length; q++)
{
for (int m = 0; m < sources.length; m++)
{
for (int i = 0; i < MAX_BUFFERED_DOCS_COUNTS.length; i++)
{
for (int k = 0; k < MERGEFACTOR_COUNTS.length; k++)
{
for (int n = 0; n < Constants.BOOLEANS.length; n++)
{
for (int p = 0; p < Constants.BOOLEANS.length; p++)
{
ref.id = "td-" + q + m + i + k + n + p;
ref.source = sources[m];
ref.analyzer = analyzers[q];
ref.maxBufferedDocs = MAX_BUFFERED_DOCS_COUNTS[i];
ref.mergeFactor = MERGEFACTOR_COUNTS[k];
ref.compound = Constants.BOOLEANS[n].booleanValue();
ref.optimize = Constants.BOOLEANS[p].booleanValue();
try
{
res.add(ref.clone());
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
}
}
}
}
return (TestData[]) res.toArray(new TestData[0]);
}
/**
* Similar to {@link #getAll(java.io.File[], org.apache.lucene.analysis.Analyzer[])} but only uses
* maxBufferedDocs of 10 and 100 and same for mergeFactor, thus reducing the number of permutations significantly.
* It also only uses compound file and optimize is always true.
*
* @param sources
* @param analyzers
* @return An Array of {@link TestData}
*/
public static TestData[] getTestDataMinMaxMergeAndMaxBuffered(File[] sources, Analyzer[] analyzers)
{
List res = new ArrayList(50);
TestData ref = new TestData();
for (int q = 0; q < analyzers.length; q++)
{
for (int m = 0; m < sources.length; m++)
{
ref.id = "td-" + q + m + "_" + 10 + "_" + 10;
ref.source = sources[m];
ref.analyzer = analyzers[q];
ref.maxBufferedDocs = 10;
ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
ref.compound = true;
ref.optimize = true;
try
{
res.add(ref.clone());
}
catch (Exception e)
{
e.printStackTrace();
}
ref.id = "td-" + q + m + "_" + 10 + "_" + 100;
ref.source = sources[m];
ref.analyzer = analyzers[q];
ref.maxBufferedDocs = 10;
ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
ref.compound = true;
ref.optimize = true;
try
{
res.add(ref.clone());
}
catch (Exception e)
{
e.printStackTrace();
}
ref.id = "td-" + q + m + "_" + 100 + "_" + 10;
ref.source = sources[m];
ref.analyzer = analyzers[q];
ref.maxBufferedDocs = 100;
ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
ref.compound = true;
ref.optimize = true;
try
{
res.add(ref.clone());
}
catch (Exception e)
{
e.printStackTrace();
}
ref.id = "td-" + q + m + "_" + 100 + "_" + 100;
ref.source = sources[m];
ref.analyzer = analyzers[q];
ref.maxBufferedDocs = 100;
ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
ref.compound = true;
ref.optimize = true;
try
{
res.add(ref.clone());
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
return (TestData[]) res.toArray(new TestData[0]);
}
protected Object clone()
{
TestData cl = new TestData();
cl.id = id;
cl.compound = compound;
cl.heap = heap;
cl.mergeFactor = mergeFactor;
cl.maxBufferedDocs = maxBufferedDocs;
cl.optimize = optimize;
cl.source = source;
cl.directory = directory;
cl.analyzer = analyzer;
// don't clone runData
return cl;
}
public String toString()
{
StringBuffer res = new StringBuffer();
res.append("#-- ID: ").append(id).append(", ").append(new Date()).append(", heap=").append(heap).append(" --\n");
res.append("# source=").append(source).append(", directory=").append(directory).append("\n");
res.append("# maxBufferedDocs=").append(maxBufferedDocs).append(", mergeFactor=").append(mergeFactor);
res.append(", compound=").append(compound).append(", optimize=").append(optimize).append("\n");
if (queries != null)
{
res.append(QueryData.getLabels()).append("\n");
for (int i = 0; i < queries.length; i++)
{
res.append("# ").append(queries[i].toString()).append("\n");
}
}
return res.toString();
}
public Analyzer getAnalyzer()
{
return analyzer;
}
public void setAnalyzer(Analyzer analyzer)
{
this.analyzer = analyzer;
}
public boolean isCompound()
{
return compound;
}
public void setCompound(boolean compound)
{
this.compound = compound;
}
public Directory getDirectory()
{
return directory;
}
public void setDirectory(Directory directory)
{
this.directory = directory;
}
public long getHeap()
{
return heap;
}
public void setHeap(long heap)
{
this.heap = heap;
}
public String getId()
{
return id;
}
public void setId(String id)
{
this.id = id;
}
public int getMaxBufferedDocs()
{
return maxBufferedDocs;
}
public void setMaxBufferedDocs(int maxBufferedDocs)
{
this.maxBufferedDocs = maxBufferedDocs;
}
public int getMergeFactor()
{
return mergeFactor;
}
public void setMergeFactor(int mergeFactor)
{
this.mergeFactor = mergeFactor;
}
public boolean isOptimize()
{
return optimize;
}
public void setOptimize(boolean optimize)
{
this.optimize = optimize;
}
public QueryData[] getQueries()
{
return queries;
}
public void setQueries(QueryData[] queries)
{
this.queries = queries;
}
public Vector getRunData()
{
return runData;
}
public void setRunData(Vector runData)
{
this.runData = runData;
}
public File getSource()
{
return source;
}
public void setSource(File source)
{
this.source = source;
}
}