org.dspace.app.statistics.ReportGenerator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dspace-api Show documentation
DSpace core data model and service APIs.
There is a newer version: 8.0
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.app.statistics;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.sql.SQLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.dspace.content.Item;
import org.dspace.content.MetadataSchemaEnum;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Context;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;

/**
 * This class performs the action of coordinating a usage report being
 * generated using the standard internal aggregation file format as a basis.
 * All it's configuration information must come from that file.  There is the
 * opportunity for different output format options such as HTML.
 *
 * Use the -help flag for more information
 *
 * @author Richard Jones
 */
public class ReportGenerator {
    // set up our class globals

    /////////////////
    // aggregators
    /////////////////

    /**
     * aggregator for all actions performed in the system
     */
    private static Map actionAggregator;

    /**
     * aggregator for all searches performed
     */
    private static Map searchAggregator;

    /**
     * aggregator for user logins
     */
    private static Map userAggregator;

    /**
     * aggregator for item views
     */
    private static Map itemAggregator;

    /**
     * aggregator for current archive state statistics
     */
    private static Map archiveStats;


    //////////////////
    // statistics config data
    //////////////////

    /**
     * bottom limit to output for search word analysis
     */
    private static int searchFloor;

    /**
     * bottom limit to output for item view analysis
     */
    private static int itemFloor;

    /**
     * number of items from most popular to be looked up in the database
     */
    private static int itemLookup;

    /**
     * mode to use for user email display
     */
    private static String userEmail;

    /**
     * URL of the service being analysed
     */
    private static String url;

    /**
     * Name of the service being analysed
     */
    private static String name;

    /**
     * average number of views per item
     */
    private static int avgItemViews;

    /**
     * name of the server being analysed
     */
    private static String serverName;

    /**
     * start date of this report
     */
    private static Date startDate = null;

    /**
     * end date of this report
     */
    private static Date endDate = null;

    /**
     * the time taken to build the aggregation file from the log
     */
    private static int processTime;

    /**
     * the number of log lines analysed
     */
    private static int logLines;

    /**
     * the number of warnings encountered
     */
    private static int warnings;

    /**
     * the list of results to be displayed in the general summary
     */
    private static List generalSummary;

    //////////////////
    // regular expressions
    //////////////////

    /**
     * pattern that matches an unqualified aggregator property
     */
    private static final Pattern real = Pattern.compile("^(.+)=(.+)");

    //////////////////////////
    // Miscellaneous variables
    //////////////////////////

    /**
     * process timing clock
     */
    private static Calendar startTime = null;

    /**
     * a map from log file action to human readable action
     */
    private static Map actionMap = null;

    /////////////////
    // report generator config data
    ////////////////

    /**
     * the input file to build the report from
     */
    private static String input = null;

    /**
     * the log file action to human readable action map
     */
    private static String map;

    private static final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
    private static final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
    private static final ConfigurationService configurationService
            = DSpaceServicesFactory.getInstance().getConfigurationService();

    /**
     * Default constructor
     */
    private ReportGenerator() { }

    /**
     * main method to be run from command line.  See usage information for
     * details as to how to use the command line flags
     *
     * @param argv the command line arguments given
     * @throws Exception    on generic exception
     * @throws SQLException An exception that provides information on a database access error or other errors.
     */
    public static void main(String[] argv)
        throws Exception, SQLException {
        // create context as super user
        Context context = new Context();
        context.turnOffAuthorisationSystem();

        String myFormat = null;
        String myInput = null;
        String myOutput = null;
        String myMap = null;

        // read in our command line options
        for (int i = 0; i < argv.length; i++) {
            if (argv[i].equals("-format")) {
                myFormat = argv[i + 1].toLowerCase();
            }

            if (argv[i].equals("-in")) {
                myInput = argv[i + 1];
            }

            if (argv[i].equals("-out")) {
                myOutput = argv[i + 1];
            }

            if (argv[i].equals("-map")) {
                myMap = argv[i + 1];
            }

            if (argv[i].equals("-help")) {
                usage();
                System.exit(0);
            }
        }

        processReport(context, myFormat, myInput, myOutput, myMap);
    }

    /**
     * using the pre-configuration information passed here, read in the
     * aggregation data and output a file containing the report in the
     * requested format
     *
     * this method is retained for backwards compatibility, but delegates the actual
     * wprk to a new method
     *
     * @param context  the DSpace context in which this action is performed
     * @param myFormat the desired output format (currently on HTML supported)
     * @param myInput  the aggregation file to be turned into a report
     * @param myOutput the file into which to write the report
     * @param myMap    the map
     * @throws Exception    if error
     * @throws SQLException if database error
     */
    public static void processReport(Context context, String myFormat,
                                     String myInput, String myOutput,
                                     String myMap)
        throws Exception, SQLException {
        if (myMap != null) {
            map = myMap;
        } else {
            map = configurationService.getProperty("dspace.dir")
                    + File.separator + "config" + File.separator + "dstat.map";
        }

        // create the relevant report type
        // FIXME: at the moment we only support HTML report generation
        Report report = null;
        if (myFormat.equals("html")) {
            report = new HTMLReport();
            ((HTMLReport) report).setOutput(myOutput);
        }

        if (report == null) {
            throw new IllegalStateException("Must specify a valid report format");
        }

        ReportGenerator.processReport(context, report, myInput);
    }

    /**
     * using the pre-configuration information passed here, read in the
     * aggregation data and output a file containing the report in the
     * requested format
     *
     * @param context context
     * @param report  report
     * @param myInput input
     * @throws Exception    if error
     * @throws SQLException if database error
     */
    public static void processReport(Context context, Report report,
                                     String myInput)
        throws Exception, SQLException {
        startTime = new GregorianCalendar();

        /** instantiate aggregators */
        actionAggregator = new HashMap<>();
        searchAggregator = new HashMap<>();
        userAggregator = new HashMap<>();
        itemAggregator = new HashMap<>();
        archiveStats = new HashMap<>();
        actionMap = new HashMap<>();

        /** instantiate lists */
        generalSummary = new ArrayList<>();

        // set the parameters for this analysis
        setParameters(myInput);

        // read the input file
        readInput(input);

        // load the log file action to human readable action map
        readMap(map);

        report.setStartDate(startDate);
        report.setEndDate(endDate);
        report.setMainTitle(name, serverName);

        // define our standard variables for re-use
        // FIXME: we probably don't need these once we've finished re-factoring
        Iterator keys = null;
        int i = 0;
        String explanation = null;
        int value;

        // FIXME: All of these sections should probably be buried in their own
        // custom methods

        Statistics overview = new Statistics();

        overview.setSectionHeader("General Overview");

        Iterator summaryEntries = generalSummary.iterator();
        while (summaryEntries.hasNext()) {
            String entry = summaryEntries.next();
            if (actionAggregator.containsKey(entry)) {
                int count = Integer.parseInt(actionAggregator.get(entry));
                overview.add(new Stat(translate(entry), count));
            }
        }

        report.addBlock(overview);

        // prepare the archive statistics package
        if (archiveStats.size() > 0) {
            Statistics archiveInfo = prepareStats(archiveStats, true, false);
            archiveInfo.setSectionHeader("Archive Information");
            archiveInfo.setStatName("Content Type");
            archiveInfo.setResultName("Number of items");

            report.addBlock(archiveInfo);
        }


        // process the items in preparation to be displayed.  This includes sorting
        // by view number, building the links, and getting further info where
        // necessary
        Statistics viewedItems = new Statistics("Item/Handle", "Number of views", itemFloor);
        viewedItems.setSectionHeader("Items Viewed");

        Stat[] items = new Stat[itemAggregator.size()];

        keys = itemAggregator.keySet().iterator();
        i = 0;
        while (keys.hasNext()) {
            String key = keys.next();
            String link = url + "handle/" + key;
            value = Integer.parseInt(itemAggregator.get(key));
            items[i] = new Stat(key, value, link);
            i++;
        }

        Arrays.sort(items);

        String info = null;
        for (i = 0; i < items.length; i++) {
            // Allow negative value to say that all items should be looked up
            if (itemLookup < 0 || i < itemLookup) {
                info = getItemInfo(context, items[i].getKey());
            }

            // if we get something back from the db then set it as the key,
            // else just use the link
            if (info != null) {
                items[i].setKey(info + " (" + items[i].getKey() + ")");
            } else {
                items[i].setKey(items[i].getReference());
            }

            // reset the info register
            info = null;
        }

        viewedItems.add(items);

        report.addBlock(viewedItems);

        // prepare a report of the full action statistics
        Statistics fullInfo = prepareStats(actionAggregator, true, true);
        fullInfo.setSectionHeader("All Actions Performed");
        fullInfo.setStatName("Action");
        fullInfo.setResultName("Number of times");

        report.addBlock(fullInfo);

        // prepare the user login statistics package
        if (!userEmail.equals("off")) {
            Statistics userLogins = prepareStats(userAggregator, true, false);
            userLogins.setSectionHeader("User Logins");
            userLogins.setStatName("User");
            userLogins.setResultName("Number of logins");
            if (userEmail.equals("alias")) {
                explanation = "(distinct addresses)";
                userLogins.setExplanation(explanation);
            }

            report.addBlock(userLogins);
        }

        // prepare the search word statistics package
        Statistics searchWords = prepareStats(searchAggregator, true, false);
        searchWords.setSectionHeader("Words Searched");
        searchWords.setStatName("Word");
        searchWords.setResultName("Number of searches");
        searchWords.setFloor(searchFloor);

        report.addBlock(searchWords);

        // FIXME: because this isn't an aggregator it can't be passed to
        // prepareStats; should we overload this method for use with this kind
        // of data?
        // prepare the average item views statistics
        if (avgItemViews > 0) {
            Statistics avg = new Statistics();
            avg.setSectionHeader("Averaging Information");

            Stat[] average = new Stat[1];

            average[0] = new Stat("Average views per item", avgItemViews);
            avg.add(average);
            report.addBlock(avg);
        }


        // prepare the log line level statistics
        // FIXME: at the moment we only know about warnings, but future versions
        // should aggregate all log line levels and display here
        Statistics levels = new Statistics("Level", "Number of lines");
        levels.setSectionHeader("Log Level Information");

        Stat[] level = new Stat[1];
        level[0] = new Stat("Warnings", warnings);

        levels.add(level);

        report.addBlock(levels);

        // get the display processing time information
        Calendar endTime = new GregorianCalendar();
        long timeInMillis = (endTime.getTimeInMillis() - startTime.getTimeInMillis());
        int outputProcessTime = (int) (timeInMillis / 1000);

        // prepare the processing information statistics
        Statistics process = new Statistics("Operation", "");
        process.setSectionHeader("Processing Information");

        Stat[] proc = new Stat[3];
        proc[0] = new Stat("Log Processing Time", processTime);
        proc[0].setUnits("seconds");
        proc[1] = new Stat("Output Processing Time", outputProcessTime);
        proc[1].setUnits("seconds");
        proc[2] = new Stat("Log File Lines Analysed", logLines);
        proc[2].setUnits("lines");

        process.add(proc);

        report.addBlock(process);

        report.render();
    }


    /**
     * a standard stats block preparation method for use when an aggregator
     * has to be put out in its entirity.  This method will not be able to
     * deal with complex cases, although it will perform sorting by value and
     * translations as per the map file if requested
     *
     * @param aggregator the aggregator that should be converted
     * @param sort       should the resulting stats be sorted by value
     * @param translate  translate the stat name using the map file
     * @return a Statistics object containing all the relevant information
     */
    public static Statistics prepareStats(Map aggregator, boolean sort, boolean translate) {
        Stat[] stats = new Stat[aggregator.size()];
        if (aggregator.size() > 0) {
            int i = 0;
            for (Map.Entry aggregatorEntry : aggregator.entrySet()) {
                String key = aggregatorEntry.getKey();
                int value = Integer.parseInt(aggregatorEntry.getValue());
                if (translate) {
                    stats[i] = new Stat(translate(key), value);
                } else {
                    stats[i] = new Stat(key, value);
                }
                i++;
            }

            if (sort) {
                Arrays.sort(stats);
            }
        }

        // add the results to the statistics object
        Statistics statistics = new Statistics();
        statistics.add(stats);

        return statistics;
    }


    /**
     * look the given text up in the action map table and return a translated
     * value if one exists.  If no translation exists the original text is
     * returned
     *
     * @param text the text to be translated
     * @return a string containing either the translated text or the original
     * text
     */
    public static String translate(String text) {
        if (actionMap.containsKey(text)) {
            return actionMap.get(text);
        } else {
            return text;
        }
    }


    /**
     * read in the action map file which converts log file line actions into
     * actions which are more understandable to humans
     *
     * @param map the map file
     * @throws IOException if IO error
     */
    public static void readMap(String map)
        throws IOException {
        FileReader fr = null;
        BufferedReader br = null;

        try {
            // read in the map file, printing a warning if none is found
            String record = null;
            try {
                fr = new FileReader(map);
                br = new BufferedReader(fr);
            } catch (IOException e) {
                System.err.println("Failed to read map file: log file actions will be displayed without translation");
                return;
            }

            // loop through the map file and read in the values
            while ((record = br.readLine()) != null) {
                Matcher matchReal = real.matcher(record);

                // if the line is real then read it in
                if (matchReal.matches()) {
                    actionMap.put(matchReal.group(1).trim(), matchReal.group(2).trim());
                }
            }
        } finally {
            if (br != null) {
                try {
                    br.close();
                } catch (IOException ioe) {
                    // ignore
                }
            }

            if (fr != null) {
                try {
                    fr.close();
                } catch (IOException ioe) {
                    // ignore
                }
            }
        }
    }


    /**
     * set the passed parameters up as global class variables.  This has to
     * be done in a separate method because the API permits for running from
     * the command line with args or calling the processReport method statically
     * from elsewhere
     *
     * @param myInput regex for log file names
     */
    public static void setParameters(String myInput) {
        if (myInput != null) {
            input = myInput;
        }
    }


    /**
     * read the input file and populate all the class globals with the contents
     * The values that come from this file form the basis of the analysis report
     *
     * @param input the aggregator file
     * @throws IOException    if IO error
     * @throws ParseException if parse error
     */
    public static void readInput(String input)
        throws IOException, ParseException {
        FileReader fr = null;
        BufferedReader br = null;

        // read in the analysis information, throwing an error if we fail to open
        // the given file
        String record = null;
        try {
            fr = new FileReader(input);
            br = new BufferedReader(fr);
        } catch (IOException e) {
            System.out.println("Failed to read input file: " + input);
            return;
        }

        // first initialise a date format object to do our date processing
        // if necessary
        SimpleDateFormat sdf = new SimpleDateFormat("dd'/'MM'/'yyyy");

        // FIXME: although this works, it is not very elegant
        // loop through the aggregator file and read in the values
        while ((record = br.readLine()) != null) {
            // match real lines
            Matcher matchReal = real.matcher(record);

            // pre-prepare our input strings
            String section = null;
            String key = null;
            String value = null;

            // temporary string to hold the left hand side of the equation
            String left = null;

            // match the line or skip this record
            if (matchReal.matches()) {
                // lift the values out of the matcher's result groups
                left = matchReal.group(1).trim();
                value = matchReal.group(2).trim();

                // now analyse the left hand side, splitting by ".", taking the
                // first token as the section and the remainder of the string
                // as they key if it exists
                StringTokenizer tokens = new StringTokenizer(left, ".");
                int numTokens = tokens.countTokens();
                if (tokens.hasMoreTokens()) {
                    section = tokens.nextToken();
                    if (numTokens > 1) {
                        key = left.substring(section.length() + 1);
                    } else {
                        key = "";
                    }
                }
            } else {
                continue;
            }

            // if the line is real, then we carry on
            // read the analysis contents in
            if ("archive".equals(section)) {
                archiveStats.put(key, value);
            } else if ("action".equals(section)) {
                actionAggregator.put(key, value);
            } else if ("user".equals(section)) {
                userAggregator.put(key, value);
            } else if ("search".equals(section)) {
                searchAggregator.put(key, value);
            } else if ("item".equals(section)) {
                itemAggregator.put(key, value);
            } else if ("user_email".equals(section)) {
                userEmail = value;
            } else if ("item_floor".equals(section)) {
                itemFloor = Integer.parseInt(value);
            } else if ("search_floor".equals(section)) {
                searchFloor = Integer.parseInt(value);
            } else if ("host_url".equals(section)) {
                url = value;
            } else if ("item_lookup".equals(section)) {
                itemLookup = Integer.parseInt(value);
            } else if ("avg_item_views".equals(section)) {
                try {
                    avgItemViews = Integer.parseInt(value);
                } catch (NumberFormatException e) {
                    avgItemViews = 0;
                }
            } else if ("server_name".equals(section)) {
                serverName = value;
            } else if ("service_name".equals(section)) {
                name = value;
            } else if ("start_date".equals(section)) {
                startDate = sdf.parse(value);
            } else if ("end_date".equals(section)) {
                endDate = sdf.parse(value);
            } else if ("analysis_process_time".equals(section)) {
                processTime = Integer.parseInt(value);
            } else if ("general_summary".equals(section)) {
                generalSummary.add(value);
            } else if ("log_lines".equals(section)) {
                logLines = Integer.parseInt(value);
            } else if ("warnings".equals(section)) {
                warnings = Integer.parseInt(value);
            }
        }

        // close the inputs
        br.close();
        fr.close();
    }

    /**
     * get the information for the item with the given handle
     *
     * @param context the DSpace context we are operating under
     * @param handle  the handle of the item being looked up, in the form
     *                1234/567 and so forth
     * @return a string containing a reference (almost citation) to the
     * article
     * @throws SQLException if database error
     */
    public static String getItemInfo(Context context, String handle)
        throws SQLException {
        Item item = null;

        // ensure that the handle exists
        try {
            item = (Item) handleService.resolveToObject(context, handle);
        } catch (Exception e) {
            return null;
        }

        // if no handle that matches is found then also return null
        if (item == null) {
            return null;
        }

        // build the referece
        // FIXME: here we have blurred the line between content and presentation
        // and it should probably be un-blurred
        List title = itemService.getMetadata(item, MetadataSchemaEnum.DC.getName(),
                                                            "title", null, Item.ANY);
        List author = itemService
            .getMetadata(item, MetadataSchemaEnum.DC.getName(), "contributor", "author", Item.ANY);

        StringBuilder authors = new StringBuilder();
        if (author.size() > 0) {
            authors.append("(").append(author.get(0).getValue());
        }
        if (author.size() > 1) {
            authors.append(" et al");
        }
        if (author.size() > 0) {
            authors.append(")");
        }

        String content = title.get(0).getValue() + " " + authors.toString();

        return content;
    }


    /**
     * output the usage information to the terminal
     */
    public static void usage() {
        String usage = "Usage Information:\n" +
            "ReportGenerator [options [parameters]]\n" +
            "-format [output format]\n" +
            "\tRequired\n" +
            "\tSpecify the format that you would like the output in\n" +
            "\tOptions:\n" +
            "\t\thtml\n" +
            "-in [aggregation file]\n" +
            "\tRequired\n" +
            "\tSpecify the aggregation data file to display\n" +
            "-out [output file]\n" +
            "\tOptional\n" +
            "\tSpecify the file to output the report to\n" +
            "\tDefault uses [dspace log directory]/report\n" +
            "-map [map file]\n" +
            "\tOptional\n" +
            "\tSpecify the map file to translate log file actions into human readable actions\n" +
            "\tDefault uses [dspace config directory]/dstat.map\n" +
            "-help\n" +
            "\tdisplay this usage information\n";

        System.out.println(usage);
    }
}