All Downloads are FREE. Search and download functionalities are using the official Maven repository.

metridoc.modules.EzproxyModule.groovy Maven / Gradle / Ivy

There is a newer version: 0.30
Show newest version
/*
 * Copyright 2010 Trustees of the University of Pennsylvania Licensed under the
 * Educational Community License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may
 * obtain a copy of the License at
 *
 * http://www.osedu.org/licenses/ECL-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an "AS IS"
 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package metridoc.modules

import groovy.util.logging.Slf4j
import org.apache.camel.Exchange
import metridoc.workflows.*

/**
 * Created by IntelliJ IDEA.
 * User: tbarker
 * Date: 2/29/12
 * Time: 1:19 PM
 */
@Slf4j
class EzproxyModule extends Script {
    @Override
    Object run() {
        projectName = "ezproxy"

        importModule CamelContextModule
        importModule PropertiesModule
        importModule DataSourceModule
        importModule MapperModule
        importModule SchemaUpdateModule
        importModule FileToLoadingTableModule
        importModule ValidatorModule

        ezproxyLogWidth = 15
        delimiter = /\|\|/

        lineToArray = {
            String line = it.in.body.get(0)
            def splitLine = line.split(delimiter, ezproxyLogWidth)
            it.out.headers.putAll(it.in.headers)
            it.out.body = splitLine
        }

        listToMapMapping = [
            ref_url_key: ["ref_url"],
            url_key: ["url"],
            ezproxy_id_key: ["ezproxy_id"],
            source_file_key: ["source_file"],
            agent_key: ["agent"],
            response_key: ["http_method", "http_status", "response_size"],
            patron_ip_key: ["patron_ip"],
            patron_address_key: ["city", "state", "country"],
            patron_id_key: ["patron_id"],
            patron_ip: 0,
            city: 1,
            state: 2,
            country: 3,
            patron_id: 5,
            proxy_time: {
                String timeText = it.in.body[6]
                try {
                    return Date.parse("[dd/MMM/yyyy:hh:mm:ss Z]", timeText)
                } catch (Exception e) {
                    log.warn "could not parse the date {}", timeText
                    return null
                }
            },
            http_method: 7,
            url: 8,
            http_status: 9,
            response_size: 10,
            ref_url: 11,
            agent: 12,
            ezproxy_id: 13,
            cookies: 14,
            source_file: {
                String fileName = it.in.getHeader(Exchange.FILE_NAME_ONLY, String.class)
                assert fileName: "ezproxy file name should not be null"
                return fileName
            },
            line_num: {
                return it.getProperty(Exchange.SPLIT_INDEX, int.class) + 1
            }
        ]

        defaultValidationMap = [type: String, length: 32]
        defaultKeyType = [type: byte[]]

        validationMap = [

            ref_url_key: defaultKeyType,
            url_key: defaultKeyType,
            source_file_key: defaultKeyType,
            ezproxy_id_key: defaultKeyType,
            response_key: defaultKeyType,
            agent_key: defaultKeyType,
            patron_address_key: defaultKeyType,
            patron_id_key: defaultKeyType,
            patron_ip_key: defaultKeyType,
            patron_ip: [
                regex: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/,
                type: String
            ],
            city: defaultValidationMap,
            http_method: [
                length: 12,
                type: String
            ],
            state: [
                length: 2,
                type: String
            ],
            agent: [
                length: 1024,
                type: String
            ],
            source_file: defaultValidationMap,
            line_num: [
                type: Integer,
            ],
            country: defaultValidationMap,
            patron_id: defaultValidationMap,
            http_status: [
                type: Integer,
            ],
            response_size: [
                type: Integer,
            ],
            ref_url: [
                type: String,
                length: 2000
            ],
            url: [
                type: String,
                length: 2000
            ],
            ezproxy_id: defaultValidationMap,
        ]

        validArray = {
            def isValid = true

            try {
                def body = it.in.body
                assert ezproxyLogWidth == body.size()
            } catch (AssertionError e) {
                validationErrorHandler.handle(e, it)
                isValid = false
            }

            return isValid
        }

        fileNameFilter = /ezproxy.log.\d{8}.gz/

        processLine = {
            it.process(lineToArray)
                .filter(validArray)
                .process(transformListToMap)
                .filter(validateMap)
        }

        normalizations = [
            ez_patron: [
                key: ["ez_patron_id"],
                columns: ["ez_patron"]
            ]
        ]

        pipeline = [schemaUpdate, fileToLoadingTable]
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy