All Downloads are FREE. Search and download functionalities are using the official Maven repository.

liquibase.ezproxy.penn.EzproxyNormalize.groovy Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 Trustees of the University of Pennsylvania Licensed under the
 * Educational Community License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may
 * obtain a copy of the License at
 *
 * http://www.osedu.org/licenses/ECL-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an "AS IS"
 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
import metridoc.utils.*
import groovy.sql.Sql

jobs{

    job("normalizeEzproxyData") {
        runJobs "setup", "phaseI", "phaseII", "phaseIII", "phaseIV", "cleanup"
    }

    job("setup") {

        loadProperties "metridoc", "ezproxy"

        services.put "repository",
            dataSource(user:repository.user, password:repository.password, driverClass:repository.driverClass, jdbcUrl:repository.jdbcUrl)

        sql = Sql.newInstance(services."repository")
    }

    job("phaseI") {
        sql.execute("insert into data_file(data_file_name) select distinct source_file from ezproxy_log where source_file not in (select data_file_name from data_file)")
	sql.execute("insert into ip_address(ip_address) select distinct patron_ip from ezproxy_log where patron_ip not in (select ip_address from ip_address)")
        sql.execute("insert into location(city, state, country) select distinct city, state, country from ezproxy_log where country not in (select country from location) or state not in (select state from location) or city not in (select city from location)")
        sql.execute("insert into resource(resource, mini_resource) select distinct url, md5(url) from ezproxy_log e where md5(url) not in (select mini_resource from resource)")
        sql.execute("insert into resource(resource, mini_resource) select distinct ref_url, md5(ref_url) from ezproxy_log e where md5(ref_url) not in (select mini_resource from resource)")
        sql.execute("insert into agent(agent, mini_agent) select distinct agent, md5(agent) from ezproxy_log e where md5(agent) not in (select mini_agent from agent)")
        sql.execute("insert into cookies(cookies, mini_cookies) select distinct cookies, md5(cookies) from ezproxy_log e where md5(cookies) not in (select mini_cookies from cookies)")
        sql.execute("insert into ezproxy_identifier(ezproxy_id) select distinct ezproxy_id from ezproxy_log where ezproxy_id not in (select ezproxy_id from ezproxy_identifier)")
        sql.execute("insert into ez_log_master (data_file_id, line_number, ip_address_id, resource_id, proxy_time) select distinct d.data_file_id, e.line_num, i.ip_address_id, r.resource_id, e.proxy_time from ezproxy_log e, resource r, ip_address i, data_file d where md5(e.url) = r.mini_resource and e.patron_ip = i.ip_address and e.source_file = d.data_file_name")

    }

    job("phaseII") {
        sql.execute("insert into ez_master select ez_log_master_id, ez_key from ez_log_master m, ezproxy_log e, data_file d where e.source_file = d.data_file_name and d.data_file_id = m.data_file_id and e.line_num = m.line_number")
        sql.execute("insert into ez_http_master(master_id, method, response_code, response_size) select distinct m.master_id, e.http_method, e.response_code, response_size from ez_master m, ezproxy_log e where e.ez_key = m.ez_key")
        sql.execute("insert into ez_location_master(master_id, location_id) select distinct m.master_id, l.location_id from ez_master m, location l, ezproxy_log e where l.city=e.city and l.state=e.state and l.country=e.country and e.ez_key=m.ez_key")
        sql.execute("insert into ez_resource_reference_master(master_id, resource_id) select distinct m.master_id, r.resource_id from ez_master m, resource r, ezproxy_log e where r.mini_resource = md5(e.ref_url) and e.ez_key = m.ez_key and length(e.ref_url) > 2")
        sql.execute("insert into ez_agent_master(master_id, agent_id) select distinct m.master_id, a.agent_id from ez_master m, agent a, ezproxy_log e where a.mini_agent = md5(e.agent) and e.ez_key = m.ez_key and length(e.agent) > 2")
        sql.execute("insert into ez_cookies_master(master_id, cookies_id) select distinct m.master_id, c.cookies_id from ez_master m, cookies c, ezproxy_log e where c.mini_cookies = md5(e.cookies) and e.ez_key = m.ez_key and length(e.cookies) > 2")
        sql.execute("insert into ezproxy_id_master(master_id, ezproxy_id) select distinct m.master_id, p.ezproxy_identifier_id from ez_master m, ezproxy_identifier p, ezproxy_log e where p.ezproxy_identifier_id = e.ezproxy_id and e.ez_key = m.ez_key and length(e.ezproxy_id) > 1")
    }

    job("phaseIII") {
    }

    job("phaseIV") {
        sql.execute("insert into ez_rank_master(master_id, rank_id) select distinct m.master_id, r.rank_id from ez_master m, patron_rank r, ezproxy_log e where r.patron_id = e.patron_id and e.ez_key = m.ez_key")
        sql.execute("insert into ez_org_master(org_id, master_id) select distinct o.org_id, m.master_id from ez_master m, patron p, ezproxy_log e, organization o where o.org_code = p.org and p.id = e.patron_id and e.ez_key = m.ez_key")
    }

    job("cleanup") {
	sql.execute("truncate table ez_master")
	sql.execute("truncate table ezproxy_log")
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy