All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.counters.query.CounterSetQuery Maven / Gradle / Ivy

/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Apr 6, 2009
 */

package com.bigdata.counters.query;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.log4j.Logger;
import org.xml.sax.SAXException;

import com.bigdata.Banner;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.IHostCounters;
import com.bigdata.counters.IRequiredHostCounters;
import com.bigdata.counters.PeriodEnum;
import com.bigdata.counters.XMLUtility;
import com.bigdata.counters.httpd.DummyEventReportingService;
import com.bigdata.counters.render.IRenderer;
import com.bigdata.counters.render.RendererFactory;
import com.bigdata.service.Event;
import com.bigdata.util.httpd.NanoHTTPD;

/**
 * Utility to extract a batch of performance counters from a collection of
 * logged XML counter set files. This utility accepts file(s) giving the URLs
 * which would be used to demand the corresponding performance counters against
 * the live bigdata federation. The URLs listed in that file are parsed. The
 * host and port information are ignored, but the URL query parameters are
 * extracted and used to configured a set of {@link URLQueryModel}s.
 * 

* A single pass is made through the specified XML counter set files. Each file * is read into memory by itself, and each query implied by a listed URL is run * against the in-memory {@link CounterSet} hierarchy. The results are collected * in independent {@link CounterSet} provisioned for the specified reporting * units, etc. Once the last XML counter set file has been processed, the * various reports requested by the listed URLs are generated. *

* For each generated report, the name of the file on which the report will be * written is taken from the name of the counter whose value was extracted for * that report. This filename may be overridden by including the URL query * parameter {@value URLQueryModel#FILE}, which specifies the file on which to * write the report for that query. * * @author Bryan Thompson * @version $Id$ * * @see IHostCounters * @see IRequiredHostCounters * * @todo When rendering HTML output using flot, the flot resources need to be * available in order to view the graphs. They should be written once into * the output directory and the links in the (X)HTML output should resolve * them there. * * @todo Permit nsamples to be specified in units of minutes, hours, days. E.g., * 3d would be 3 days. * * @todo Performance for long runs could be improved if we use more efficient * classes for mutable strings in {@link XMLUtility} and perhaps * {@link CounterSet}. E.g., mg4j mutable string or * http://javolution.org/. */ public class CounterSetQuery { static private final Logger log = Logger.getLogger(CounterSetQuery.class); /** * Reads a list of {@link URL}s from a file. Blank lines and comment lines * are ignored. * * @param file * A file containing URLs, blank lines, and comment lines (which * start with '#'). * * @return A list of the URLs read from the file. * * @throws IOException */ private static Collection readURLsFromFile(final File file) throws IOException { if(log.isInfoEnabled()) log.info("Reading queries: "+file); final List tmp = new LinkedList(); final BufferedReader r = new BufferedReader(new FileReader(file)); try { String s; while ((s = r.readLine()) != null) { s = s.trim(); if (s.isEmpty()) continue; if (s.startsWith("#")) continue; tmp.add(new URL(s)); } } finally { r.close(); } return tmp; } /** * Reads URLs from a file or all files (recursively) in a directory. * * @param file * The file or directory. * * @return The URLs read from the file(s). * * @throws IOException */ static private Collection readURLs(final File file) throws IOException { /* * note: duplicates are not filtered out but this preserves the * evaluation order. */ final Collection urls = new LinkedList(); if (file.isDirectory()) { final File[] files = file.listFiles(); for(File f : files) { if (f.isHidden()) continue; urls.addAll(readURLsFromFile(f)); } } else { urls.addAll(readURLsFromFile(file)); } return urls; } private static void readFiles(final Collection counterSetFiles, final CounterSet counterSet, final int nsamples, final PeriodEnum period, final Pattern regex) throws IOException, SAXException, ParserConfigurationException, InterruptedException, ExecutionException { // flatten directories in the list of files. final Collection flatFileList = QueryUtil.collectFiles(counterSetFiles, new FileFilter() { public boolean accept(File pathname) { return !pathname.isHidden() && pathname.getName().endsWith(".xml"); } }); if (log.isInfoEnabled()) log.info("Reading performance counters from " + flatFileList.size() + " sources."); // read the files if (false/* sequential */) { // process the files one at a time. readFilesSequential(flatFileList, counterSet, nsamples, period, regex); } else { // process the files in parallel. readFilesParallel(flatFileList, counterSet, nsamples, period, regex); } } private static void readFilesSequential( final Collection counterSetFiles, final CounterSet counterSet, final int nsamples, final PeriodEnum period, final Pattern regex) throws IOException, SAXException, ParserConfigurationException { for (File file : counterSetFiles) { if(log.isInfoEnabled()) log.info("Reading file: " + file); QueryUtil.readCountersFromFile(file, counterSet, regex, nsamples, period); } } private static void readFilesParallel( final Collection counterSetFiles, final CounterSet counterSet, final int nsamples, final PeriodEnum period, final Pattern regex) throws IOException, SAXException, ParserConfigurationException, InterruptedException, ExecutionException { final int nfiles = counterSetFiles.size(); final List> tasks = new ArrayList>(nfiles); for (File file : counterSetFiles) { tasks.add(new QueryUtil.ReadCounterSetXMLFileTask(file, counterSet, nsamples, period, regex)); } final ExecutorService service = Executors.newFixedThreadPool(nfiles); final List> futures; try { // run all tasks. futures = service.invokeAll(tasks); } finally { service.shutdownNow(); } int i = 0; int nerrors = 0; for(Future future : futures) { // look for errors in the tasks. try { future.get(); } catch(ExecutionException ex) { if(ex.getCause() instanceof SAXException) { /* * Sometimes you can get a partial XML file if the LBS was * in the process of generating the file when it was copied. * This shows up as a SAXException. Rather than dying, this * just logs a warning and continues. */ log.warn("Could not parse file (ignored): " + tasks.get(i), ex); nerrors++; continue; } } i++; } if (nerrors != 0) log.error("There were " + nerrors + " errors."); } /** * Utility class for running extracting data from performance counter dumps * and running various kinds of reports on those data. *

* Usage: *

*
-outputDir
*
The output directory (default is the current working directory).
*
-mimeType
*
The default MIME type for the rendered reports. The default is * text/plain, but can be overridden on a query by query basis * using {@link URLQueryModel#MIMETYPE}.
*
-nsamples
*
Override for the default #of history samples to be retained. It is an * error if there are more distinct samples in the processed XML counter set * files (that is, if the #of time periods sampled exceeds this many * samples). If there are fewer, then some internal arrays will be * dimensioned larger than is otherwise necessary.
*
-events <file>
*
A file containing tab-delimited {@link Event}s. The {@link Event}s * are not required for simple performance counter views.
*
-queries <file>
*
A file, or directory of files, containing a list of URLs, each of * which is interpreted as a {@link URLQueryModel}.
*
<file>(s)
*
One or more XML counter set files or directories containing such * files. All such files will be processed before the reports are generated. *
*
* * @param args * Command line arguments. * * @throws IOException * @throws ParserConfigurationException * @throws SAXException * @throws ExecutionException * @throws InterruptedException */ public static void main(final String[] args) throws IOException, SAXException, ParserConfigurationException, InterruptedException, ExecutionException { Banner.banner(); if (args.length == 0) { System.err.println("See javadoc for usage."); System.exit(1); } /* * The events read from the file(s). */ final DummyEventReportingService service = new DummyEventReportingService(); // The default output format (text, html, etc.) String defaultMimeType = NanoHTTPD.MIME_TEXT_PLAIN; /* * The #of slots to allocate (one slot per period of data to be read). * * Note: The default is 7 days of data if period is minutes. */ int nsamples = 60 * 24 * 7; // The output directory defaults to the current working directory. File outputDir = new File("."); // the set of queries to be processed. final List queries = new LinkedList(); // the set of counter set XML files to be processed. final List counterSetFiles = new LinkedList(); for (int i = 0; i < args.length; i++) { final String arg = args[i]; if (arg.startsWith("-")) { if (arg.equals("-outputDir")) { outputDir = new File(args[++i]); if (log.isInfoEnabled()) { log.info("outputDir: " + outputDir); } if(!outputDir.exists()) { outputDir.mkdirs(); } } else if (arg.equals("-mimeType")) { defaultMimeType = args[++i]; } else if (arg.equals("-nsamples")) { nsamples = Integer.valueOf(args[++i]); if (nsamples <= 0) throw new IllegalArgumentException( "nslots must be positive."); } else if (arg.equals("-events")) { // @todo read list of event files once all args are parsed. QueryUtil.readEvents(service, new File(args[++i])); } else if (arg.equals("-queries")) { final File file = new File(args[++i]); final Collection urls = readURLs(file); for (URL url : urls) { queries.add(URLQueryModel.getInstance(url)); } } else { System.err.println("Unknown option: " + arg); System.exit(1); } } else { final File file = new File(arg); if (!file.exists()) throw new FileNotFoundException(file.toString()); counterSetFiles.add(file); } } if (queries.isEmpty()) { throw new RuntimeException("No queries were specified."); } if (counterSetFiles.isEmpty()) { throw new RuntimeException("No counter set files were specified."); } /* * Compute a regular expression which will match anything which would * have been matched by the individual URLs. E.g., the OR of the * individual regular expressions entailed by each URL when interpreted * as a query. */ final Pattern regex; { final List tmp = new LinkedList(); for (URLQueryModel model : queries) { if (model.pattern != null) { tmp.add(model.pattern); } } regex = QueryUtil.getPattern(tmp); } /* * Read counters accepted by the optional filter into the counter set to * be served. * * @todo this does not support reading at different periods for each * query. */ // The performance counters read from the file(s). final CounterSet counterSet = new CounterSet(); readFiles(counterSetFiles, counterSet, nsamples, PeriodEnum.Minutes, regex); /* * Run each query in turn against the filtered pre-loaded counter set. */ if (log.isInfoEnabled()) log.info("Evaluating " + queries.size() + " queries."); for (URLQueryModel model : queries) { try { final IRenderer renderer = RendererFactory.get(model, new CounterSetSelector(counterSet), defaultMimeType); /* * Render on a file. The file can be specified by a URL query * parameter. * * @todo Use the munged counter path / counter name (when one * can be identified) as the default filename. */ File file; if (model.file == null) { file = File.createTempFile("query", ".out", outputDir); } else { if (!model.file.isAbsolute()) { file = new File(outputDir, model.file.toString()); } else { file = model.file; } } if (file.getParentFile() != null && !file.getParentFile().exists()) { if (log.isInfoEnabled()) { log.info("Creating directory: " + file.getParentFile()); } // make sure the parent directory exists. file.getParentFile().mkdirs(); } if (log.isInfoEnabled()) { log.info("Writing file: " + file + " for query: " + model.uri); } final Writer w = new BufferedWriter( new FileWriter(file, false/* append */)); try { renderer.render(w); w.flush(); } finally { w.close(); } } catch (Throwable t) { log.error("Could not run query: " + model.uri, t); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy