All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.crx.statistics.loader.AccessLogLoader Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
*  Copyright 1997 Adobe Systems Incorporated
*  All Rights Reserved.
*
* NOTICE:  All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any.  The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.day.crx.statistics.loader;

import com.day.crx.statistics.Entry;
import com.day.crx.statistics.query.Query;
import com.day.crx.statistics.result.ResultSelected;

import java.io.InputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.StringTokenizer;
import java.util.Locale;
import java.text.ParseException;
import java.text.SimpleDateFormat;

/**
 * AccessLogLoader reads a CQSE access.log.
 *
 * @author mreutegg
 */
public class AccessLogLoader {

    /**
     * Date format as used in the CQSE access.log
     */
    private final SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z", Locale.US);

    /**
     * Optional progress listener.
     */
    private ProgressListener listener;

    /**
     * Query location. Where extracted queries from the access.log are written.
     */
    private final String ql;

    /**
     * Result location. Where extracted results from the access.log are written.
     */
    private final String rl;

    /**
     * Query path prefix. URLs with this path prefix are considered searches.
     */
    private final String qpp;

    /**
     * Query parameter name. The name of the query parameter.
     */
    private final String qpn;

    /**
     * Optional result path prefix. Only result URLs with this prefix are
     * considered.
     */
    private final String rpp;

    /**
     * Creates a new AccessLogLoader.
     *
     * @param ql  the query location.
     * @param rl  the result location.
     * @param qpp the query path prefix.
     * @param qpn the query parameter name.
     * @param rpp the result path prefix.
     */
    public AccessLogLoader(String ql, String rl, String qpp,
                           String qpn, String rpp) {
        this.ql = ql;
        this.rl = rl;
        this.qpp = qpp;
        this.qpn = qpn;
        this.rpp = rpp;
    }

    /**
     * Processes an access.log and notifies an attached listeners about entries
     * read.
     *
     * @param in     the input stream where to read from the access.log.
     * @return an integer array of length 2, where the first value indicates the
     *         number of queries and the second the number of results loaded.
     * @throws IOException         if an error occurs while reading from the
     *                             access.log.
     */
    public int[] processFile(InputStream in) throws IOException {
        BufferedReader reader = new BufferedReader(new InputStreamReader(in, "ISO-8859-1"));
        try {
            String line;
            int lineCount = 0;
            int queryCount = 0;
            int resultCount = 0;
            while ((line = reader.readLine()) != null) {
                Entry entry = null;
                lineCount++;
                try {
                    Access a = Access.fromString(line);
                    URI uri = new URI(a.requestURI);
                    URI referer = new URI(a.referer);
                    if (referer.getPath() != null && referer.getPath().startsWith(qpp)) {
                        // result selected
                        long time = dateFormat.parse(a.dateTime + " " + a.timeZone).getTime();
                        String path = uri.getPath();
                        // check result path prefix if any
                        if (rpp != null && path.startsWith(rpp)) {
                            int extIdx = path.lastIndexOf('.');
                            if (extIdx != -1) {
                                path = path.substring(0, extIdx).trim();
                            }
                            String query = getQuery(referer.getQuery());
                            if (query != null) {
                                entry = new ResultSelected(rl, path, 1, query);
                                entry.setTimestamp(time);
                                resultCount++;
                            }
                        }
                    } else if (uri.getPath().startsWith(qpp)) {
                        String query = getQuery(uri.getQuery());
                        if (query != null) {
                            long time = dateFormat.parse(a.dateTime + " " + a.timeZone).getTime();
                            entry = new Query(ql, query, 0, 0);
                            entry.setTimestamp(time);
                            queryCount++;
                        }
                    }
                } catch (IllegalArgumentException e) {
                    // malformed
                } catch (ParseException e) {
                    // malformed
                } catch (URISyntaxException e) {
                    // malformed
                }
                notifyListener(lineCount, queryCount, resultCount, entry);
            }
            return new int[]{queryCount, resultCount};
        } finally {
            reader.close();
        }
    }

    /**
     * Notifies the progress listener if one is set.
     *
     * @param lineCount   the number of lines parsed so far.
     * @param queryCount  the number of queries loaded so far.
     * @param resultCount the number of results loaded so far.
     * @param entry       the entry read from the log or null if
     *                    the line did not create an entry.
     */
    private void notifyListener(int lineCount,
                                int queryCount,
                                int resultCount,
                                Entry entry) {
        if (listener != null) {
            listener.lineProcessed(lineCount, queryCount, resultCount, entry);
        }
    }

    /**
     * Extracts the query string from the URI query part.
     *
     * @param uriQueryPart the URI query part.
     * @return the query string as typed in by the user.
     */
    private String getQuery(String uriQueryPart) {
        if (uriQueryPart == null) {
            return null;
        }
        StringTokenizer tokenizer = new StringTokenizer(uriQueryPart, "&");
        while (tokenizer.hasMoreTokens()) {
            String param = tokenizer.nextToken();
            if (param.startsWith(qpn + "=")) {
                String query = param.substring(qpn.length() + 1);
                if (query.length() > 0) {
                    query = query.replaceAll("\\+", " ").trim();
                    return query;
                }
            }
        }
        return null;
    }

    /**
     * Sets a progress listener.
     *
     * @param listener the progress listener.
     */
    public void setProgressListener(ProgressListener listener) {
        this.listener = listener;
    }

    /**
     * Simple interface to get notifications about the progress of reading the
     * access.log file.
     */
    public interface ProgressListener {

        /**
         * This method is called whenever a line from the access.log is
         * processed.
         *
         * @param line        the number of lines parsed so far.
         * @param queryCount  the number of queries loaded so far.
         * @param resultCount the number of results loaded so far.
         * @param entry       the entry read from the log or null
         *                    if the line did not create an entry.
         */
        public void lineProcessed(int line, int queryCount,
                                  int resultCount, Entry entry);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy