com.bigdata.counters.query.CounterSetQuery Maven / Gradle / Ivy

Go to download
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Apr 6, 2009
 */

package com.bigdata.counters.query;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.log4j.Logger;
import org.xml.sax.SAXException;

import com.bigdata.Banner;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.IHostCounters;
import com.bigdata.counters.IRequiredHostCounters;
import com.bigdata.counters.PeriodEnum;
import com.bigdata.counters.XMLUtility;
import com.bigdata.counters.httpd.DummyEventReportingService;
import com.bigdata.counters.render.IRenderer;
import com.bigdata.counters.render.RendererFactory;
import com.bigdata.service.Event;
import com.bigdata.util.httpd.NanoHTTPD;

/**
 * Utility to extract a batch of performance counters from a collection of
 * logged XML counter set files. This utility accepts file(s) giving the URLs
 * which would be used to demand the corresponding performance counters against
 * the live bigdata federation. The URLs listed in that file are parsed. The
 * host and port information are ignored, but the URL query parameters are
 * extracted and used to configured a set of {@link URLQueryModel}s.
 * 
 * A single pass is made through the specified XML counter set files. Each file
 * is read into memory by itself, and each query implied by a listed URL is run
 * against the in-memory {@link CounterSet} hierarchy. The results are collected
 * in independent {@link CounterSet} provisioned for the specified reporting
 * units, etc. Once the last XML counter set file has been processed, the
 * various reports requested by the listed URLs are generated.
 * 

 * For each generated report, the name of the file on which the report will be
 * written is taken from the name of the counter whose value was extracted for
 * that report. This filename may be overridden by including the URL query
 * parameter {@value URLQueryModel#FILE}, which specifies the file on which to
 * write the report for that query.
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 * @see IHostCounters
 * @see IRequiredHostCounters
 * 
 * @todo When rendering HTML output using flot, the flot resources need to be
 *       available in order to view the graphs. They should be written once into
 *       the output directory and the links in the (X)HTML output should resolve
 *       them there.
 * 
 * @todo Permit nsamples to be specified in units of minutes, hours, days. E.g.,
 *       3d would be 3 days.
 * 
 * @todo Performance for long runs could be improved if we use more efficient
 *       classes for mutable strings in {@link XMLUtility} and perhaps
 *       {@link CounterSet}. E.g., mg4j mutable string or
 *       http://javolution.org/.
 */
public class CounterSetQuery {

    static private final Logger log = Logger.getLogger(CounterSetQuery.class);

    /**
     * Reads a list of {@link URL}s from a file. Blank lines and comment lines
     * are ignored.
     * 
     * @param file
     *            A file containing URLs, blank lines, and comment lines (which
     *            start with '#').
     * 
     * @return A list of the URLs read from the file.
     * 
     * @throws IOException
     */
    private static Collection readURLsFromFile(final File file) throws IOException {

        if(log.isInfoEnabled())
            log.info("Reading queries: "+file);
        
        final List tmp = new LinkedList();

        final BufferedReader r = new BufferedReader(new FileReader(file));

        try {

            String s;
            while ((s = r.readLine()) != null) {

                s = s.trim();

                if (s.isEmpty())
                    continue;

                if (s.startsWith("#"))
                    continue;

                tmp.add(new URL(s));

            }

        } finally {

            r.close();

        }

        return tmp;

    }

    /**
     * Reads URLs from a file or all files (recursively) in a directory.
     * 
     * @param file
     *            The file or directory.
     *            
     * @return The URLs read from the file(s).
     * 
     * @throws IOException 
     */
    static private Collection readURLs(final File file) throws IOException {

        /*
         * note: duplicates are not filtered out but this preserves the
         * evaluation order.
         */
        final Collection urls = new LinkedList();
        
        if (file.isDirectory()) {

            final File[] files = file.listFiles();
            
            for(File f : files) {
               
                if (f.isHidden())
                    continue;
                
                urls.addAll(readURLsFromFile(f));
                               
            }
            
        } else {

            urls.addAll(readURLsFromFile(file));

        }

        return urls;
        
    }

    private static void readFiles(final Collection counterSetFiles,
            final CounterSet counterSet, final int nsamples,
            final PeriodEnum period, final Pattern regex) throws IOException,
            SAXException, ParserConfigurationException, InterruptedException,
            ExecutionException {

        // flatten directories in the list of files.
        final Collection flatFileList = QueryUtil.collectFiles(counterSetFiles,
                new FileFilter() {

                    public boolean accept(File pathname) {

                        return !pathname.isHidden()
                                && pathname.getName().endsWith(".xml");

                    }

                });

        if (log.isInfoEnabled())
            log.info("Reading performance counters from "
                    + flatFileList.size() + " sources.");
        
        // read the files
        if (false/* sequential */) {

            // process the files one at a time.
            readFilesSequential(flatFileList, counterSet, nsamples, period,
                    regex);

        } else {

            // process the files in parallel.
            readFilesParallel(flatFileList, counterSet, nsamples, period,
                    regex);
            
        }

    }

    private static void readFilesSequential(
            final Collection counterSetFiles,
            final CounterSet counterSet, final int nsamples,
            final PeriodEnum period, final Pattern regex) throws IOException,
            SAXException, ParserConfigurationException {

        for (File file : counterSetFiles) {

                if(log.isInfoEnabled())
                    log.info("Reading file: " + file);

                QueryUtil.readCountersFromFile(file, counterSet, regex, nsamples,
                        period);

        }

    }

    private static void readFilesParallel(
            final Collection counterSetFiles,
            final CounterSet counterSet, final int nsamples,
            final PeriodEnum period, final Pattern regex) throws IOException,
            SAXException, ParserConfigurationException, InterruptedException, ExecutionException {

        final int nfiles = counterSetFiles.size();

        final List> tasks = new ArrayList>(nfiles);

        for (File file : counterSetFiles) {

            tasks.add(new QueryUtil.ReadCounterSetXMLFileTask(file, counterSet,
                    nsamples, period, regex));
            
        }
        
        final ExecutorService service = Executors.newFixedThreadPool(nfiles);
        
        final List> futures;
        try {
        
            // run all tasks.
            futures = service.invokeAll(tasks);

        } finally {
            
            service.shutdownNow();
            
        }

        int i = 0;
        int nerrors = 0;
        for(Future future : futures) {
            
            // look for errors in the tasks.
            try {
                future.get();
            } catch(ExecutionException ex) {
                if(ex.getCause() instanceof SAXException) {
                    /*
                     * Sometimes you can get a partial XML file if the LBS was
                     * in the process of generating the file when it was copied.
                     * This shows up as a SAXException.  Rather than dying, this
                     * just logs a warning and continues.
                     */
                    log.warn("Could not parse file (ignored): " + tasks.get(i), ex);
                    nerrors++;
                    continue;
                }
            }
            
            i++;
            
        }
        
        if (nerrors != 0)
            log.error("There were " + nerrors + " errors.");

    }

    /**
     * Utility class for running extracting data from performance counter dumps
     * and running various kinds of reports on those data.
     * 

     * Usage:
     * 

     * -outputDir
     * The output directory (default is the current working directory).
     * -mimeType
     * The default MIME type for the rendered reports. The default is
     * text/plain, but can be overridden on a query by query basis
     * using {@link URLQueryModel#MIMETYPE}.
     * -nsamples
     * Override for the default #of history samples to be retained. It is an
     * error if there are more distinct samples in the processed XML counter set
     * files (that is, if the #of time periods sampled exceeds this many
     * samples). If there are fewer, then some internal arrays will be
     * dimensioned larger than is otherwise necessary.
     * -events <file>
     * A file containing tab-delimited {@link Event}s. The {@link Event}s
     * are not required for simple performance counter views.
     * -queries <file>
     * A file, or directory of files, containing a list of URLs, each of
     * which is interpreted as a {@link URLQueryModel}.
     * <file>(s)
     * One or more XML counter set files or directories containing such
     * files. All such files will be processed before the reports are generated.
     * 
     * 
     * 
     * @param args
     *            Command line arguments.
     * 
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws SAXException
     * @throws ExecutionException
     * @throws InterruptedException
     */
    public static void main(final String[] args) throws IOException,
            SAXException, ParserConfigurationException, InterruptedException,
            ExecutionException {

        Banner.banner();

        if (args.length == 0) {

            System.err.println("See javadoc for usage.");

            System.exit(1);
            
        }
        
        /*
         * The events read from the file(s).
         */
        final DummyEventReportingService service = new DummyEventReportingService();

        // The default output format (text, html, etc.)
        String defaultMimeType = NanoHTTPD.MIME_TEXT_PLAIN;

        /*
         * The #of slots to allocate (one slot per period of data to be read).
         * 
         * Note: The default is 7 days of data if period is minutes.
         */
        int nsamples = 60 * 24 * 7;

        // The output directory defaults to the current working directory.
        File outputDir = new File(".");
        
        // the set of queries to be processed.
        final List queries = new LinkedList();
        
        // the set of counter set XML files to be processed.
        final List counterSetFiles = new LinkedList();
        
        for (int i = 0; i < args.length; i++) {

            final String arg = args[i];

            if (arg.startsWith("-")) {

                if (arg.equals("-outputDir")) {

                    outputDir = new File(args[++i]);

                    if (log.isInfoEnabled()) {

                        log.info("outputDir: " + outputDir);

                    }
                    
                    if(!outputDir.exists()) {
                        
                        outputDir.mkdirs();
                        
                    }
                    
                } else if (arg.equals("-mimeType")) {

                    defaultMimeType = args[++i];

                } else if (arg.equals("-nsamples")) {

                    nsamples = Integer.valueOf(args[++i]);

                    if (nsamples <= 0)
                        throw new IllegalArgumentException(
                                "nslots must be positive.");

                } else if (arg.equals("-events")) {

                    // @todo read list of event files once all args are parsed.
                    QueryUtil.readEvents(service, new File(args[++i]));

                } else if (arg.equals("-queries")) {

                    final File file = new File(args[++i]);

                    final Collection urls = readURLs(file);
                    
                    for (URL url : urls) {
                        
                        queries.add(URLQueryModel.getInstance(url));

                    }

                } else {

                    System.err.println("Unknown option: " + arg);

                    System.exit(1);

                }

            } else {

                final File file = new File(arg);

                if (!file.exists())
                    throw new FileNotFoundException(file.toString());

                counterSetFiles.add(file);

            }

        }

        if (queries.isEmpty()) {

            throw new RuntimeException("No queries were specified.");

        }

        if (counterSetFiles.isEmpty()) {

            throw new RuntimeException("No counter set files were specified.");

        }

        /*
         * Compute a regular expression which will match anything which would
         * have been matched by the individual URLs. E.g., the OR of the
         * individual regular expressions entailed by each URL when interpreted
         * as a query.
         */
        final Pattern regex;
        {
            final List tmp = new LinkedList();

            for (URLQueryModel model : queries) {

                if (model.pattern != null) {

                    tmp.add(model.pattern);

                }

            }

            regex = QueryUtil.getPattern(tmp);

        }

        /*
         * Read counters accepted by the optional filter into the counter set to
         * be served.
         * 
         * @todo this does not support reading at different periods for each
         * query.
         */

        // The performance counters read from the file(s).
        final CounterSet counterSet = new CounterSet();

        readFiles(counterSetFiles, counterSet, nsamples, PeriodEnum.Minutes,
                regex);

        /*
         * Run each query in turn against the filtered pre-loaded counter set.
         */
        if (log.isInfoEnabled())
            log.info("Evaluating " + queries.size() + " queries.");

        for (URLQueryModel model : queries) {

            try {

                final IRenderer renderer = RendererFactory.get(model,
                        new CounterSetSelector(counterSet), defaultMimeType);

                /*
                 * Render on a file. The file can be specified by a URL query
                 * parameter.
                 * 
                 * @todo Use the munged counter path / counter name (when one
                 * can be identified) as the default filename.
                 */
                File file;

                if (model.file == null) {

                    file = File.createTempFile("query", ".out", outputDir);

                } else {

                    if (!model.file.isAbsolute()) {

                        file = new File(outputDir, model.file.toString());

                    } else {

                        file = model.file;

                    }

                }

                if (file.getParentFile() != null
                        && !file.getParentFile().exists()) {

                    if (log.isInfoEnabled()) {

                        log.info("Creating directory: " + file.getParentFile());

                    }

                    // make sure the parent directory exists.
                    file.getParentFile().mkdirs();

                }

                if (log.isInfoEnabled()) {

                    log.info("Writing file: " + file + " for query: "
                            + model.uri);

                }

                final Writer w = new BufferedWriter(
                        new FileWriter(file, false/* append */));

                try {

                    renderer.render(w);

                    w.flush();

                } finally {

                    w.close();

                }

            } catch (Throwable t) {

                log.error("Could not run query: " + model.uri, t);

            }
            
        }

    }

}