All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.oodt.cas.pushpull.retrievalmethod.RemoteCrawler Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.oodt.cas.pushpull.retrievalmethod;

//OODT imports
import org.apache.oodt.cas.filemgr.structs.exceptions.CatalogException;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.cas.protocol.exceptions.ProtocolException;
import org.apache.oodt.cas.pushpull.config.DataFilesInfo;
import org.apache.oodt.cas.pushpull.config.DownloadInfo;
import org.apache.oodt.cas.pushpull.exceptions.AlreadyInDatabaseException;
import org.apache.oodt.cas.pushpull.exceptions.ParserException;
import org.apache.oodt.cas.pushpull.exceptions.RetrievalMethodException;
import org.apache.oodt.cas.pushpull.exceptions.ToManyFailedDownloadsException;
import org.apache.oodt.cas.pushpull.exceptions.UndefinedTypeException;
import org.apache.oodt.cas.pushpull.filerestrictions.FileRestrictions;
import org.apache.oodt.cas.pushpull.filerestrictions.Parser;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFile;
import org.apache.oodt.cas.pushpull.filerestrictions.VirtualFileStructure;
import org.apache.oodt.cas.protocol.ProtocolFile;
import org.apache.oodt.cas.protocol.util.ProtocolFileFilter;
import org.apache.oodt.cas.pushpull.protocol.ProtocolPath;
import org.apache.oodt.cas.pushpull.protocol.RemoteSite;
import org.apache.oodt.cas.pushpull.protocol.RemoteSiteFile;
import org.apache.oodt.cas.pushpull.retrievalsystem.DataFileToPropFileLinker;
import org.apache.oodt.cas.pushpull.retrievalsystem.FileRetrievalSystem;


//JDK imports
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.util.List;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author bfoster
 * @version $Revision$
 *
 * 

* Describe your class here *

. */ public class RemoteCrawler implements RetrievalMethod { private static final Logger LOG = Logger.getLogger(RemoteCrawler.class .getName()); /** * Starts the crawler and creates a default DirStruct if null was supplied * in constructor * * @throws java.net.MalformedURLException * @throws org.apache.oodt.cas.pushpull.exceptions.ProtocolException * @throws org.apache.oodt.cas.pushpull.exceptions.ProtocolFileException */ @Override public void processPropFile(FileRetrievalSystem frs, Parser propFileParser, File propFile, DataFilesInfo dfi, DataFileToPropFileLinker linker) throws FileNotFoundException, ParserException, ProtocolException, MalformedURLException, RetrievalMethodException { RemoteSite remoteSite; // parse property file Metadata fileMetadata = new Metadata(); VirtualFileStructure vfs = propFileParser.parse(new FileInputStream( propFile), fileMetadata); // determine RemoteSite DownloadInfo di = dfi.getDownloadInfo(); if (!di.isAllowAliasOverride() || (remoteSite = vfs.getRemoteSite()) == null) { remoteSite = di.getRemoteSite(); } // modify vfs to be root based if HOME directory based if (!vfs.isRootBased()) { String homeDirPath = frs.getHomeDir(remoteSite).getPath(); VirtualFile root = new VirtualFile(homeDirPath, true); root.addChild(vfs.getRootVirtualFile()); vfs = new VirtualFileStructure(homeDirPath + "/" + vfs.getPathToRoot(), root.getRootDir()); frs.changeToHOME(remoteSite); } // initialize variables final String initialCdPath = vfs.getPathToRoot(); final VirtualFile vf = vfs.getRootVirtualFile(); // change to initial directory (takes care of Linux auto-mounting) frs.changeToDir(initialCdPath, remoteSite); // add starting directory to stack Stack files = new Stack(); files.add(new RemoteSiteFile(frs.getCurrentFile(remoteSite), remoteSite)); // start crawling while (!files.isEmpty()) { RemoteSiteFile file = files.peek(); try { // if directory, then add its children to the crawl list if (file.isDir()) { // get next page worth of children List children = frs.getNextPage(file, new ProtocolFileFilter() { @Override public boolean accept(ProtocolFile pFile) { return FileRestrictions.isAllowed(new ProtocolPath(pFile .getPath(), pFile.isDir()), vf); } }); // if directory had more children then add them if (children.size() > 0) { files.addAll(children); }// otherwise remove the directory from the crawl list else { files.pop(); } // if file, then download it } else { linker.addPropFileToDataFileLink(propFile, file); if (!frs.addToDownloadQueue(files.pop(), di .getRenamingConv(), di.getStagingArea(), dfi .getQueryMetadataElementName(), di .deleteFromServer(), fileMetadata)) { linker.eraseLinks(propFile); } } } catch (ToManyFailedDownloadsException e) { throw new RetrievalMethodException( "Connection appears to be down. . .unusual number of download failures. . .stopping : " + e.getMessage()); } catch (CatalogException e) { throw new RetrievalMethodException( "Failed to communicate with database : " + e.getMessage()); } catch (AlreadyInDatabaseException e) { LOG.log(Level.WARNING, "Skipping file : " + e.getMessage()); } catch (UndefinedTypeException e) { LOG.log(Level.WARNING, "Skipping file : " + e.getMessage()); } catch (Exception e) { linker.markAsFailed(propFile, e.getMessage()); throw new RetrievalMethodException("Uknown error accured while downloading " + file + " from " + remoteSite + " -- bailing out : " + e.getMessage(), e); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy