examples.PgeConfigFiles.pge-config.xml Maven / Gradle / Ivy

Go to download
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more contributor
license agreements.  See the NOTICE.txt file distributed with this work for
additional information regarding copyright ownership.  The ASF licenses this
file to you under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License.  You may obtain a copy of
the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
License for the specific language governing permissions and limitations under
the License.

TODO(bfoster): Make this a working example.
-->
<pgeConfig>
    
    <import namespace="optional-NS" file="absolute-or-relative-to-this-file-another-pgeConfig-import-file-path"/>
    
    <!-- input file for science PGE that need to be created  -->
    <dynInputFiles>
        
        <!-- one or more of these -->
        <file path="path-with-name-to-create" writerClass="class-path-to-SciPgeConfigFileWriter" args="zero-or-more-args-comma-segregated"/>
        
    </dynInputFiles>
    
    <!-- how to run science PGE -->
    <exe dir="set-working-dir-for-script" shellType="script-type">
        
        <!-- one or more of these -->
        <cmd>line-in-script-file</cmd>
        
    </exe>
    
    <!-- files to ingest -->
    <output>

        <!-- one or more of these -->
        <!-- if a dir path is specified WITHOUT specifying an associated 
             set of files tags, along with the regExp and metFileWriterClass
             and args required to produce metadata CAS-PGE will REQUIRE that 
             you provide (as part of the tasks.xml definition for this task:
             
             PGETask/Ingest/MimeExtractorRepo - Path to AutoDetectProductCrawler's 
                                                MimeExtractorRepo XML config
                                                
             And an AutoDetectProductCrawler will be created in lieu of the 
             StdProductCrawler. In addition you will not see the extracted
             metadata produced as .met files in the job working directory for
             these CAS-PGE jobs as the extracted metadata won't be serialized.
         -->
        <dir path="path-to-an-output-dir" createBeforeExe="true-or-false" />
        
        <dir path="path-to-an-output-dir" createBeforeExe="true-or-false">
          <!-- back compatability re-introduced in OODT-667
            by specifying one or more files you cause CAS-PGE to generate a StdProductCrawler,
            to crawl generated metadata files derived from calling the metFileWriterClass implementation
            on files matching Java RegEx or file name (exact match) in path-to-an-output-dir. Met files are serialized and stored in the job
            directory where the CAS-PGE job ran, and are then ingested into the file manager along
            with the origin data file.
            
           -->
          <files regExp="java-regex" 
                 name="file-name"
                 metFileWriterClass="org.apache.oodt.cas.pge.writers.PcsMetFileWriter.subclass" 
                 args="zero-or-more-args-comma-segregated">
                 
             <!-- 
               A convention for renaming the files as they are processed on output.
               By default uses the PathUtilsNamingConvention.
              -->
             <renamingConv namingExpr="/[SomePathUtils]/[Metadata]" envReplace="true">
               <metadata key="some-key" val="some-val"/>
               <metadata key="some-other-key" val="zero-or-more-vals-comma-segregated"/>                 
             </renamingConv>      
          </files>
        </dir>

    </output>
    
    <!-- metadata keys you want to set -->
    <customMetadata>
        
        <!-- one or more of these -->
        <metadata key="name-of-metadata-field" val="metadata-value"/>
        
    </customMetadata>
    
</pgeConfig>