All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.spring.PathSharingContext Maven / Gradle / Ivy

Go to download

The Archive Commons Code Libraries project contains general Java utility libraries, as used by the Heritrix crawler and other projects.

The newest version!
/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.archive.spring;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.commons.io.FileUtils;
import org.archive.util.ArchiveUtils;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.config.ConfigurableListableBeanFactory;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.validation.BeanPropertyBindingResult;
import org.springframework.validation.Errors;
import org.springframework.validation.Validator;

/**
 * Spring ApplicationContext extended for Heritrix use.
 * 
 * Notable extensions:
 * 
 * Remembers its primary XML configuration file, and can report its filesystem
 * path.
 * 
 * Reports a summary of Errors collected from self-Validating Beans.
 * 
 * Generates launchId from timestamp, creates launch directory
 * {jobDir}/{launchId}, and snapshots crawl configuration file into the launch
 * directory. Other configuration files, if any, are automatically snapshotted
 * into the launch directory when they are read (see
 * {@link ConfigFile#obtainReader()}). The token ${launchId} will be
 * interpolated in configuration-relative paths (see
 * {@link ConfigPathConfigurer}) so that launch-specific paths can be used for
 * logs, reports, warcs, etc.
 * 
 * @author gojomo
 */
public class PathSharingContext extends FileSystemXmlApplicationContext {
    private static Logger LOGGER =
        Logger.getLogger(PathSharingContext.class.getName());

    public PathSharingContext(String configLocation) throws BeansException {
        super(configLocation);
        init();
    }
    public PathSharingContext(String[] configLocations, ApplicationContext parent) throws BeansException {
        super(configLocations, parent);
        init();
    }
    public PathSharingContext(String[] configLocations, boolean refresh, ApplicationContext parent) throws BeansException {
        super(configLocations, refresh, parent);
        init();
    }
    public PathSharingContext(String[] configLocations, boolean refresh) throws BeansException {
        super(configLocations, refresh);
        init();
    }
    public PathSharingContext(String[] configLocations) throws BeansException {
        super(configLocations);
        init();
    }

    private void init() {
        // enforce @Required annotation
        addBeanFactoryPostProcessor(beanFactory -> beanFactory.addBeanPostProcessor(new RequiredAnnotationBeanPostProcessor()));
    }

    public String getPrimaryConfigurationPath() {
        return getConfigLocations()[0];
    }

    //
    // Cascading self-validation
    //
    protected HashMap allErrors; // bean name -> Errors
    public void validate() {
        allErrors = new HashMap();
            
        for(Entry entry : getBeansOfType(HasValidator.class).entrySet()) {
            String name = entry.getKey();
            HasValidator hv = entry.getValue();
            Validator v = hv.getValidator();
            Errors errors = new BeanPropertyBindingResult(hv,name);
            v.validate(hv, errors);
            if(errors.hasErrors()) {
                allErrors.put(name,errors);
            }
        }
        for(String name : allErrors.keySet()) {
            for(Object obj : allErrors.get(name).getAllErrors()) {
                LOGGER.fine("validation error for '"+name+"': "+obj);
            }
        }
    }

    @Override
    public void start() {
        initLaunchDir();
        super.start();
    }
    
    public HashMap getAllErrors() {
        return allErrors;
    }
    
    protected transient String currentLaunchId;
    protected void initLaunchId() {
        currentLaunchId = ArchiveUtils.getUnique14DigitDate();
        LOGGER.info("launch id " + currentLaunchId);
    }
    public String getCurrentLaunchId() {
        return currentLaunchId;
    }

    protected transient File currentLaunchDir;
    public File getCurrentLaunchDir() {
        return currentLaunchDir;
    }
    
    protected File getConfigurationFile() {
        String primaryConfigurationPath =  getPrimaryConfigurationPath();
        if (primaryConfigurationPath.startsWith("file:")) {
            // strip URI-scheme if present (as is usual)
            try {
                return new File(new URI(primaryConfigurationPath));
            } catch (URISyntaxException e) {
                throw new RuntimeException(e);
            }
        } else {
            return new File(primaryConfigurationPath);
        }
    }
    
    protected void initLaunchDir() {
        initLaunchId();
        try {
            currentLaunchDir = new File(getConfigurationFile().getParentFile(), getCurrentLaunchId());
            if (!currentLaunchDir.mkdir()) {
                throw new IOException("failed to create directory " + currentLaunchDir);
            }
            
            // copy cxml to launch dir
            FileUtils.copyFileToDirectory(getConfigurationFile(), currentLaunchDir);
            
            // attempt to symlink "latest" to launch dir
            File latestSymlink = new File(getConfigurationFile().getParentFile(), "latest");
            latestSymlink.delete();
            try {
                Files.createSymbolicLink(latestSymlink.toPath(), Paths.get(currentLaunchDir.getName()));
            } catch (IOException | UnsupportedOperationException e) {
                LOGGER.log(Level.WARNING, "failed to create symlink from " + latestSymlink + " to " + currentLaunchDir, e);
            }
        } catch (IOException e) {
            LOGGER.log(Level.SEVERE, "failed to initialize launch directory: " + e);
            currentLaunchDir = null;
        }
    }
    
    /**
     * Initialize the LifecycleProcessor.
     * Uses HeritrixLifecycleProcessor, which prevents an automatic lifecycle
     * start(), if none defined in the context.
     * @see org.springframework.context.support.DefaultLifecycleProcessor
     */
    protected void initLifecycleProcessor() {
        ConfigurableListableBeanFactory beanFactory = getBeanFactory();
        if (!beanFactory.containsLocalBean(LIFECYCLE_PROCESSOR_BEAN_NAME)) {
            HeritrixLifecycleProcessor obj = (HeritrixLifecycleProcessor)beanFactory.createBean(HeritrixLifecycleProcessor.class); 
            beanFactory.registerSingleton(LIFECYCLE_PROCESSOR_BEAN_NAME,obj);
        }
        super.initLifecycleProcessor();
    }
    
    protected ConcurrentHashMap data;

    /**
     * @return a shared map for arbitrary use during a crawl; for example, could
     *         be used for state persisting for the duration of the crawl,
     *         shared among ScriptedProcessor, scripting console, etc scripts
     */
    public ConcurrentHashMap getData() {
        if (data == null) {
            data = new ConcurrentHashMap();
        }
        return data;
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy