All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.addons.hbase.example.HBaseReadExample Maven / Gradle / Ivy

The newest version!
/***********************************************************************************************************************
 *
 * Copyright (C) 2010 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/

package eu.stratosphere.addons.hbase.example;

import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;

import eu.stratosphere.addons.hbase.TableInputFormat;
import eu.stratosphere.addons.hbase.common.HBaseKey;
import eu.stratosphere.addons.hbase.common.HBaseResult;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.ProgramDescription;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.GenericDataSource;
import eu.stratosphere.api.java.record.io.CsvOutputFormat;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;

/**
 * Implements a word count which takes the input file and counts the number of
 * the occurrences of each word in the file.
 */
public class HBaseReadExample implements Program, ProgramDescription {
	
	public static class MyTableInputFormat extends  TableInputFormat {
		
		private static final long serialVersionUID = 1L;

		private final byte[] META_FAMILY = "meta".getBytes();
		
		private final byte[] USER_COLUMN = "user".getBytes();
		
		private final byte[] TIMESTAMP_COLUMN = "timestamp".getBytes();
		
		private final byte[] TEXT_FAMILY = "text".getBytes();
		
		private final byte[] TWEET_COLUMN = "tweet".getBytes();
		
		public MyTableInputFormat() {
			super();
			
		}
		
		@Override
		protected HTable createTable(Configuration parameters) {
			return super.createTable(parameters);
		}
		
		@Override
		protected Scan createScanner(Configuration parameters) {
			Scan scan = new Scan ();
			scan.addColumn (META_FAMILY, USER_COLUMN);
			scan.addColumn (META_FAMILY, TIMESTAMP_COLUMN);
			scan.addColumn (TEXT_FAMILY, TWEET_COLUMN);
			return scan;
		}
		
		StringValue row_string = new StringValue();
		StringValue user_string = new StringValue();
		StringValue timestamp_string = new StringValue();
		StringValue tweet_string = new StringValue();
		
		@Override
		public void mapResultToRecord(Record record, HBaseKey key,
				HBaseResult result) {
			Result res = result.getResult();
			res.getRow();
			record.setField(0, toString(row_string, res.getRow()));
			record.setField(1, toString (user_string, res.getValue(META_FAMILY, USER_COLUMN)));
			record.setField(2, toString (timestamp_string, res.getValue(META_FAMILY, TIMESTAMP_COLUMN)));
			record.setField(3, toString (tweet_string, res.getValue(TEXT_FAMILY, TWEET_COLUMN)));
		}
		
		private final StringValue toString (StringValue string, byte[] bytes) {
			string.setValueAscii(bytes, 0, bytes.length);
			return string;
		}
		
	}
	

	@Override
	public Plan getPlan(String... args) {
		// parse job parameters
		int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
		String output    = (args.length > 1 ? args[1] : "");

		GenericDataSource source = new GenericDataSource(new MyTableInputFormat(), "HBase Input");
		source.setParameter(TableInputFormat.INPUT_TABLE, "twitter");
		source.setParameter(TableInputFormat.CONFIG_LOCATION, "/etc/hbase/conf/hbase-site.xml");
		FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, source, "HBase String dump");
		CsvOutputFormat.configureRecordFormat(out)
			.recordDelimiter('\n')
			.fieldDelimiter(' ')
			.field(StringValue.class, 0)
			.field(StringValue.class, 1)
			.field(StringValue.class, 2)
			.field(StringValue.class, 3);
		
		Plan plan = new Plan(out, "HBase access Example");
		plan.setDefaultParallelism(numSubTasks);
		return plan;
	}


	@Override
	public String getDescription() {
		return "Parameters: [numSubStasks] [input] [output]";
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy