com.bigdata.counters.query.CounterSetQuery Maven / Gradle / Ivy
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* Created on Apr 6, 2009
package com.bigdata.counters.query;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.log4j.Logger;
import org.xml.sax.SAXException;
import com.bigdata.Banner;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.IHostCounters;
import com.bigdata.counters.IRequiredHostCounters;
import com.bigdata.counters.PeriodEnum;
import com.bigdata.counters.XMLUtility;
import com.bigdata.counters.httpd.DummyEventReportingService;
import com.bigdata.counters.render.IRenderer;
import com.bigdata.counters.render.RendererFactory;
import com.bigdata.service.Event;
import com.bigdata.util.httpd.NanoHTTPD;
* Utility to extract a batch of performance counters from a collection of
* logged XML counter set files. This utility accepts file(s) giving the URLs
* which would be used to demand the corresponding performance counters against
* the live bigdata federation. The URLs listed in that file are parsed. The
* host and port information are ignored, but the URL query parameters are
* extracted and used to configured a set of {@link URLQueryModel}s.
* A single pass is made through the specified XML counter set files. Each file
* is read into memory by itself, and each query implied by a listed URL is run
* against the in-memory {@link CounterSet} hierarchy. The results are collected
* in independent {@link CounterSet} provisioned for the specified reporting
* units, etc. Once the last XML counter set file has been processed, the
* various reports requested by the listed URLs are generated.
* For each generated report, the name of the file on which the report will be
* written is taken from the name of the counter whose value was extracted for
* that report. This filename may be overridden by including the URL query
* parameter {@value URLQueryModel#FILE}, which specifies the file on which to
* write the report for that query.
* @author Bryan Thompson
* @version $Id$
* @see IHostCounters
* @see IRequiredHostCounters
* @todo When rendering HTML output using flot, the flot resources need to be
* available in order to view the graphs. They should be written once into
* the output directory and the links in the (X)HTML output should resolve
* them there.
* @todo Permit nsamples to be specified in units of minutes, hours, days. E.g.,
* 3d would be 3 days.
* @todo Performance for long runs could be improved if we use more efficient
* classes for mutable strings in {@link XMLUtility} and perhaps
* {@link CounterSet}. E.g., mg4j mutable string or
public class CounterSetQuery {
static private final Logger log = Logger.getLogger(CounterSetQuery.class);
* Reads a list of {@link URL}s from a file. Blank lines and comment lines
* are ignored.
* @param file
* A file containing URLs, blank lines, and comment lines (which
* start with '#').
* @return A list of the URLs read from the file.
* @throws IOException
private static Collection readURLsFromFile(final File file) throws IOException {
if(log.isInfoEnabled())"Reading queries: "+file);
final List tmp = new LinkedList();
final BufferedReader r = new BufferedReader(new FileReader(file));
try {
String s;
while ((s = r.readLine()) != null) {
s = s.trim();
if (s.isEmpty())
if (s.startsWith("#"))
tmp.add(new URL(s));
} finally {
return tmp;
* Reads URLs from a file or all files (recursively) in a directory.
* @param file
* The file or directory.
* @return The URLs read from the file(s).
* @throws IOException
static private Collection readURLs(final File file) throws IOException {
* note: duplicates are not filtered out but this preserves the
* evaluation order.
final Collection urls = new LinkedList();
if (file.isDirectory()) {
final File[] files = file.listFiles();
for(File f : files) {
if (f.isHidden())
} else {
return urls;
private static void readFiles(final Collection counterSetFiles,
final CounterSet counterSet, final int nsamples,
final PeriodEnum period, final Pattern regex) throws IOException,
SAXException, ParserConfigurationException, InterruptedException,
ExecutionException {
// flatten directories in the list of files.
final Collection flatFileList = QueryUtil.collectFiles(counterSetFiles,
new FileFilter() {
public boolean accept(File pathname) {
return !pathname.isHidden()
&& pathname.getName().endsWith(".xml");
if (log.isInfoEnabled())"Reading performance counters from "
+ flatFileList.size() + " sources.");
// read the files
if (false/* sequential */) {
// process the files one at a time.
readFilesSequential(flatFileList, counterSet, nsamples, period,
} else {
// process the files in parallel.
readFilesParallel(flatFileList, counterSet, nsamples, period,
private static void readFilesSequential(
final Collection counterSetFiles,
final CounterSet counterSet, final int nsamples,
final PeriodEnum period, final Pattern regex) throws IOException,
SAXException, ParserConfigurationException {
for (File file : counterSetFiles) {
if(log.isInfoEnabled())"Reading file: " + file);
QueryUtil.readCountersFromFile(file, counterSet, regex, nsamples,
private static void readFilesParallel(
final Collection counterSetFiles,
final CounterSet counterSet, final int nsamples,
final PeriodEnum period, final Pattern regex) throws IOException,
SAXException, ParserConfigurationException, InterruptedException, ExecutionException {
final int nfiles = counterSetFiles.size();
final List> tasks = new ArrayList>(nfiles);
for (File file : counterSetFiles) {
tasks.add(new QueryUtil.ReadCounterSetXMLFileTask(file, counterSet,
nsamples, period, regex));
final ExecutorService service = Executors.newFixedThreadPool(nfiles);
final List> futures;
try {
// run all tasks.
futures = service.invokeAll(tasks);
} finally {
int i = 0;
int nerrors = 0;
for(Future future : futures) {
// look for errors in the tasks.
try {
} catch(ExecutionException ex) {
if(ex.getCause() instanceof SAXException) {
* Sometimes you can get a partial XML file if the LBS was
* in the process of generating the file when it was copied.
* This shows up as a SAXException. Rather than dying, this
* just logs a warning and continues.
log.warn("Could not parse file (ignored): " + tasks.get(i), ex);
if (nerrors != 0)
log.error("There were " + nerrors + " errors.");
* Utility class for running extracting data from performance counter dumps
* and running various kinds of reports on those data.
* Usage:
* - -outputDir
* - The output directory (default is the current working directory).
* - -mimeType
* - The default MIME type for the rendered reports. The default is
, but can be overridden on a query by query basis
* using {@link URLQueryModel#MIMETYPE}.
* - -nsamples
* - Override for the default #of history samples to be retained. It is an
* error if there are more distinct samples in the processed XML counter set
* files (that is, if the #of time periods sampled exceeds this many
* samples). If there are fewer, then some internal arrays will be
* dimensioned larger than is otherwise necessary.
* - -events <file>
* - A file containing tab-delimited {@link Event}s. The {@link Event}s
* are not required for simple performance counter views.
* - -queries <file>
* - A file, or directory of files, containing a list of URLs, each of
* which is interpreted as a {@link URLQueryModel}.
* - <file>(s)
* - One or more XML counter set files or directories containing such
* files. All such files will be processed before the reports are generated.
* @param args
* Command line arguments.
* @throws IOException
* @throws ParserConfigurationException
* @throws SAXException
* @throws ExecutionException
* @throws InterruptedException
public static void main(final String[] args) throws IOException,
SAXException, ParserConfigurationException, InterruptedException,
ExecutionException {
if (args.length == 0) {
System.err.println("See javadoc for usage.");
* The events read from the file(s).
final DummyEventReportingService service = new DummyEventReportingService();
// The default output format (text, html, etc.)
String defaultMimeType = NanoHTTPD.MIME_TEXT_PLAIN;
* The #of slots to allocate (one slot per period of data to be read).
* Note: The default is 7 days of data if period is minutes.
int nsamples = 60 * 24 * 7;
// The output directory defaults to the current working directory.
File outputDir = new File(".");
// the set of queries to be processed.
final List queries = new LinkedList();
// the set of counter set XML files to be processed.
final List counterSetFiles = new LinkedList();
for (int i = 0; i < args.length; i++) {
final String arg = args[i];
if (arg.startsWith("-")) {
if (arg.equals("-outputDir")) {
outputDir = new File(args[++i]);
if (log.isInfoEnabled()) {"outputDir: " + outputDir);
if(!outputDir.exists()) {
} else if (arg.equals("-mimeType")) {
defaultMimeType = args[++i];
} else if (arg.equals("-nsamples")) {
nsamples = Integer.valueOf(args[++i]);
if (nsamples <= 0)
throw new IllegalArgumentException(
"nslots must be positive.");
} else if (arg.equals("-events")) {
// @todo read list of event files once all args are parsed.
QueryUtil.readEvents(service, new File(args[++i]));
} else if (arg.equals("-queries")) {
final File file = new File(args[++i]);
final Collection urls = readURLs(file);
for (URL url : urls) {
} else {
System.err.println("Unknown option: " + arg);
} else {
final File file = new File(arg);
if (!file.exists())
throw new FileNotFoundException(file.toString());
if (queries.isEmpty()) {
throw new RuntimeException("No queries were specified.");
if (counterSetFiles.isEmpty()) {
throw new RuntimeException("No counter set files were specified.");
* Compute a regular expression which will match anything which would
* have been matched by the individual URLs. E.g., the OR of the
* individual regular expressions entailed by each URL when interpreted
* as a query.
final Pattern regex;
final List tmp = new LinkedList();
for (URLQueryModel model : queries) {
if (model.pattern != null) {
regex = QueryUtil.getPattern(tmp);
* Read counters accepted by the optional filter into the counter set to
* be served.
* @todo this does not support reading at different periods for each
* query.
// The performance counters read from the file(s).
final CounterSet counterSet = new CounterSet();
readFiles(counterSetFiles, counterSet, nsamples, PeriodEnum.Minutes,
* Run each query in turn against the filtered pre-loaded counter set.
if (log.isInfoEnabled())"Evaluating " + queries.size() + " queries.");
for (URLQueryModel model : queries) {
try {
final IRenderer renderer = RendererFactory.get(model,
new CounterSetSelector(counterSet), defaultMimeType);
* Render on a file. The file can be specified by a URL query
* parameter.
* @todo Use the munged counter path / counter name (when one
* can be identified) as the default filename.
File file;
if (model.file == null) {
file = File.createTempFile("query", ".out", outputDir);
} else {
if (!model.file.isAbsolute()) {
file = new File(outputDir, model.file.toString());
} else {
file = model.file;
if (file.getParentFile() != null
&& !file.getParentFile().exists()) {
if (log.isInfoEnabled()) {"Creating directory: " + file.getParentFile());
// make sure the parent directory exists.
if (log.isInfoEnabled()) {"Writing file: " + file + " for query: "
+ model.uri);
final Writer w = new BufferedWriter(
new FileWriter(file, false/* append */));
try {
} finally {
} catch (Throwable t) {
log.error("Could not run query: " + model.uri, t);