All Downloads are FREE. Search and download functionalities are using the official Maven repository.

liquibase.ezproxy.penn.EzproxyLogLoading.groovy Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 Trustees of the University of Pennsylvania Licensed under the
 * Educational Community License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may
 * obtain a copy of the License at
 *
 * http://www.osedu.org/licenses/ECL-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an "AS IS"
 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
import metridoc.utils.*
import org.apache.camel.Exchange
import java.text.SimpleDateFormat

jobs{

    job("loadEzproxyData") {
        runJobs "setup", "ezproxyFileIngestion"
    }

    job("setup") {

        loadProperties "metridoc", "ezproxy"

        services.put "repository",
            dataSource(user:repository.user, password:repository.password, driverClass:repository.driverClass, jdbcUrl:repository.jdbcUrl)

        def sqlUpdate = "sqlplus:insert into ezproxy_log(" +
            "  patron_ip, city, state, country, " +
            "  patron_id, proxy_time, http_method, url, response_code, " +
            "  response_size, ref_url, ezproxy_id, cookies, source_file, line_num, agent) " +
            "values (" +
            "  #patron_ip, #city, #state, #country, " +
            "  #patron_id, #proxy_time, #http_method, #url, #response_code, " +
            "  #response_size, #ref_url, #ezproxy_id, #cookies, #source_file, #line_num, #agent)?dataSource=repository";
        services.put("ezproxySqlUpdate", sqlUpdate)
        def validation_error_storage = "sqlplus: insert into ez_loading_errors(" +
                " error_message, exchange, source_file, line_num, stack_trace) " +
                "values (" +
                " #error_message, #exchange, #source_file, #line_num, #stack_trace)?dataSource=repository";
        services.put("ezproxyAssertionErrorRepository", validation_error_storage)
    }

    job("ezproxyFileIngestion") {
        log.info "running ezproxy file ingestion"
        def parser = {
            def splitLine = it.in.body.split("\\|\\|")
            def logLine = it.getProperty("CamelSplitIndex") + 1
            def fileName = it.in.getHeader("CamelFileNameOnly")
            def rawTime = splitLine[6].substring(1,21).replace('/','-').replace("11:","11 ")
            
            def parsedLine = [
                patron_ip: splitLine[0],
                city: splitLine[1],
                line_num:logLine,
                source_file:fileName,
                state:splitLine[2],
                country:splitLine[3],
                patron_id:splitLine[5],
                proxy_time:new SimpleDateFormat("yy-MM-dd HH:mm:ss").format(new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss").parse(rawTime)),
                http_method:splitLine[7],
                url:splitLine[8],
                response_code:splitLine[9],
                response_size:splitLine[10],
                ref_url:splitLine[11],
                agent:splitLine[12],
                ezproxy_id:splitLine[13],
                cookies:splitLine[14],
            ]
            it.out.body = parsedLine
            it.out.headers = it.in.headers

            if (logLine % 25000 == 0) {
                log.info "${logLine} lines have been processed for file ${fileName}"
            }
        }

        def validator = {
            def body = it.in.body
            assert body["patron_ip"].length() <= 32
            assert body["city"].length() <= 32
            assert body["state"].length() <= 2
            assert body["country"].length() <= 32
            assert body["patron_id"].length() <= 32
            //assert body["proxy_time"].length() <= 32
            assert body["http_method"].length() <= 12
            assert body["url"].length() <= 2000
            assert body["response_code"].isNumber()
            assert body["response_size"].isNumber()
            assert body["ref_url"].length() <= 2000
            assert body["agent"].length() <= 1024
            assert body["source_file"].length() <= 32
            assert body["ezproxy_id"].length() <= 32
        }

        def parseError = {
            def body = it.in.body
            def exception = it.getProperty(Exchange.EXCEPTION_CAUGHT)
            def stackTrace = ExceptionUtils.getStackTrace(exception)
            def exchangeText = ExchangeUtils.format(it)

            def newBody = [
                error_message: exception.message,
                exchange: exchangeText,
                source_file: body["source_file"],
                line_num: body["line_num"],
                stack_trace: stackTrace
            ]

            it.out.body = newBody
            it.out.headers = it.in.headers
        }

        def logException = {
            log.warn(it.getProperty(Exchange.EXCEPTION_CAUGHT).message)
        }

        runRoute {
            errorHandler(deadLetterChannel("seda:ezproxyErrors"))
            onException(AssertionError.class).handled(true).to("seda:ezproxyAssertionErrors")

            from("seda:ezproxyAssertionErrors?concurrentConsumers=10").process(parseError).process(logException)
                .aggregateBody().to(ezproxyAssertionErrorRepository)

            from("direct:parseLogs").process(parser).process(validator)
                .aggregateBody().to(ezproxySqlUpdate)

            from(ezproxyDirectory).splitByLine().threads(50, 60).maxQueueSize(100).to("direct:parseLogs")


            from("seda:ezproxyErrors").process(logException)
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy