All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.addons.hbase.HBaseTableSource Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flink.addons.hbase;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.table.sources.BatchTableSource;
import org.apache.flink.table.sources.ProjectableTableSource;
import org.apache.flink.types.Row;
import org.apache.flink.util.Preconditions;
import org.apache.hadoop.conf.Configuration;

import java.util.Map;

/**
 * Creates a TableSource to scan an HBase table.
 *
 * The table name and required HBase configuration is passed during {@link HBaseTableSource} construction.
 * Use {@link #addColumn(String, String, Class)} to specify the family, qualifier, and type of columns to scan.
 *
 * The TableSource returns {@link Row} with nested Rows for each column family.
 *
 * The HBaseTableSource is used as shown in the example below.
 *
 * 
 * {@code
 * HBaseTableSource hSrc = new HBaseTableSource(conf, "hTable");
 * hSrc.addColumn("fam1", "col1", byte[].class);
 * hSrc.addColumn("fam1", "col2", Integer.class);
 * hSrc.addColumn("fam2", "col1", String.class);
 *
 * tableEnv.registerTableSource("hTable", hSrc);
 * Table res = tableEnv.sql("SELECT t.fam2.col1, SUM(t.fam1.col2) FROM hTable AS t GROUP BY t.fam2.col1");
 * }
 * 
* */ public class HBaseTableSource implements BatchTableSource, ProjectableTableSource { private Configuration conf; private String tableName; private HBaseTableSchema schema; /** * The HBase configuration and the name of the table to read. * * @param conf hbase configuration * @param tableName the tableName */ public HBaseTableSource(Configuration conf, String tableName) { this.conf = conf; this.tableName = Preconditions.checkNotNull(tableName, "Table name"); this.schema = new HBaseTableSchema(); } /** * Adds a column defined by family, qualifier, and type to the table schema. * * @param family the family name * @param qualifier the qualifier name * @param clazz the data type of the qualifier */ public void addColumn(String family, String qualifier, Class clazz) { this.schema.addColumn(family, qualifier, clazz); } /** * Specifies the charset to parse Strings to HBase byte[] keys and String values. * * @param charset Name of the charset to use. */ public void setCharset(String charset) { this.schema.setCharset(charset); } @Override public TypeInformation getReturnType() { String[] famNames = schema.getFamilyNames(); TypeInformation[] typeInfos = new TypeInformation[famNames.length]; int i = 0; for (String family : famNames) { typeInfos[i] = new RowTypeInfo(schema.getQualifierTypes(family), schema.getQualifierNames(family)); i++; } return new RowTypeInfo(typeInfos, famNames); } @Override public DataSet getDataSet(ExecutionEnvironment execEnv) { return execEnv.createInput(new HBaseRowInputFormat(conf, tableName, schema), getReturnType()); } @Override public HBaseTableSource projectFields(int[] fields) { String[] famNames = schema.getFamilyNames(); HBaseTableSource newTableSource = new HBaseTableSource(this.conf, tableName); // Extract the family from the given fields for(int field : fields) { String family = famNames[field]; Map> familyInfo = schema.getFamilyInfo(family); for(String qualifier : familyInfo.keySet()) { // create the newSchema newTableSource.addColumn(family, qualifier, familyInfo.get(qualifier).getTypeClass()); } } return newTableSource; } @Override public String explainSource() { return ""; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy