All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.cloudgraph.mapreduce.GraphXmlMapper Maven / Gradle / Ivy

/**
 * Copyright 2017 TerraMeta Software, Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.cloudgraph.mapreduce;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * Supplies fully realized data {@link GraphWritable graphs} as the input value
 * to MapReduce Mapper client subclasses, the input key being an
 * offset into the processed file and the value being a {@link GraphWritable}
 * assembled from a single SDO graph XML line read from the file. Supports
 * detection of changes to the input data graph, and propagation of mutations to
 * the underlying data store.
 * 
 * The data graphs supplied to the code>Mapper are ready to further
 * modify or simply commit as is, for Mapper clients wishing to
 * modify input graphs and commit changes within the map phase. See the below
 * code sample based on the Wikipedia domain model which adds a page link to
 * each input graph.
 * 

* *

 * public class PageGraphImporter extends GraphXmlMapper<LongWritable, GraphWritable> {
 *   @Override
 *   public void map(LongWritable offset, GraphWritable graph, Context context) throws IOException {
 * 
 *     Page page = (Page) graph.getDataGraph().getRootObject();
 *     page.setPageTitle("New Page1");
 * 
 *     // commit above changes
 *     super.commit(row, graph, context);
 *   }
 * }
 * 
* *

* *

* Data graphs of any size of complexity may be supplied to MapReduce jobs * including graphs where the underlying domain model contains instances of * multiple inheritance. The set of data graphs is provided to a MapReduce job * using a query, typically * supplied using {@link GraphMapReduceSetup}. *

*

* Data graphs are assembled within a {@link GraphXmlRecordReader} based on the * line oriented XML graph data read from an underlying file, and are passed to * client {@link GraphXmlMapper} extensions. *

* * @param * the output key type * @param * the output value type * * @see org.cloudgraph.mapreduce.GraphWritable * @see org.cloudgraph.mapreduce.GraphXmlRecordReader * @see org.cloudgraph.hbase.mapreduce.GraphMapReduceSetup * * @author Scott Cinnamond * @since 0.5.8 */ public class GraphXmlMapper extends Mapper { private static Log log = LogFactory.getLog(GraphXmlMapper.class); public GraphXmlMapper() { } @Override public void map(LongWritable row, GraphWritable graph, Context context) throws IOException { // no behavior } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy