Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/package org.apache.paimon.flink.source;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.connector.source.Boundedness;
import org.apache.flink.api.connector.source.Source;
import org.apache.flink.api.connector.source.SourceReader;
import org.apache.flink.api.connector.source.SourceReaderContext;
import org.apache.flink.api.connector.source.SplitEnumerator;
import org.apache.flink.api.connector.source.SplitEnumeratorContext;
import org.apache.flink.api.connector.source.lib.util.IteratorSourceEnumerator;
import org.apache.flink.api.connector.source.lib.util.IteratorSourceReader;
import org.apache.flink.api.connector.source.lib.util.IteratorSourceSplit;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import org.apache.flink.core.memory.DataInputDeserializer;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputSerializer;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
import org.apache.flink.table.types.logical.BigIntType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.util.NumberSequenceIterator;
import org.apache.flink.util.SplittableIterator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.NoSuchElementException;
importstatic org.apache.flink.util.Preconditions.checkArgument;
importstatic org.apache.flink.util.Preconditions.checkNotNull;
/** A data source that produces a sequence of numbers (longs) to {@link RowData}. */publicclassNumberSequenceRowSourceimplementsSource<
RowData,
NumberSequenceRowSource.NumberSequenceSplit,
Collection>,
ResultTypeQueryable{
privatestaticfinallong serialVersionUID = 1L;
/** The starting number in the sequence, inclusive. */privatefinallong from;
/** The end number in the sequence, inclusive. */privatefinallong to;
/**
* Creates a new {@code NumberSequenceSource} that produces parallel sequences covering the
* range {@code from} to {@code to} (both boundaries are inclusive).
*/publicNumberSequenceRowSource(long from, long to){
checkArgument(from <= to, "'from' must be <= 'to'");
this.from = from;
this.to = to;
}
publiclonggetFrom(){
return from;
}
publiclonggetTo(){
return to;
}
// ------------------------------------------------------------------------// source methods// ------------------------------------------------------------------------@Overridepublic TypeInformationgetProducedType(){
return InternalTypeInfo.of(RowType.of(new BigIntType(false)));
}
@Overridepublic Boundedness getBoundedness(){
return Boundedness.BOUNDED;
}
@Overridepublic SourceReadercreateReader(
SourceReaderContext readerContext){
returnnew IteratorSourceReader<>(readerContext);
}
@Overridepublic SplitEnumerator> createEnumerator(
final SplitEnumeratorContext enumContext) {
final List splits =
splitNumberRange(from, to, enumContext.currentParallelism());
returnnew IteratorSourceEnumerator<>(enumContext, splits);
}
@Overridepublic SplitEnumerator> restoreEnumerator(
final SplitEnumeratorContext enumContext,
Collection checkpoint) {
returnnew IteratorSourceEnumerator<>(enumContext, checkpoint);
}
@Overridepublic SimpleVersionedSerializergetSplitSerializer(){
returnnew SplitSerializer();
}
@Overridepublic SimpleVersionedSerializer>
getEnumeratorCheckpointSerializer() {
returnnew CheckpointSerializer();
}
protected ListsplitNumberRange(long from, long to, int numSplits){
final NumberSequenceIterator[] subSequences =
new NumberSequenceIterator(from, to).split(numSplits);
final ArrayList splits = new ArrayList<>(subSequences.length);
int splitId = 1;
for (NumberSequenceIterator seq : subSequences) {
if (seq.hasNext()) {
splits.add(
new NumberSequenceSplit(
String.valueOf(splitId++), seq.getCurrent(), seq.getTo()));
}
}
return splits;
}
// ------------------------------------------------------------------------// splits & checkpoint// ------------------------------------------------------------------------/** A split of the source, representing a number sub-sequence. */publicstaticclassNumberSequenceSplitimplementsIteratorSourceSplit<
RowData, NumberSequenceRowSource.NumberSequenceIterator> {
privatefinal String splitId;
privatefinallong from;
privatefinallong to;
publicNumberSequenceSplit(String splitId, long from, long to){
checkArgument(from <= to, "'from' must be <= 'to'");
this.splitId = checkNotNull(splitId);
this.from = from;
this.to = to;
}
@Overridepublic String splitId(){
return splitId;
}
publiclongfrom(){
return from;
}
publiclongto(){
return to;
}
@SuppressWarnings("ClassEscapesDefinedScope")
@Overridepublic NumberSequenceRowSource.NumberSequenceIterator getIterator(){
returnnew NumberSequenceRowSource.NumberSequenceIterator(from, to);
}
@SuppressWarnings("ClassEscapesDefinedScope")
@Overridepublic IteratorSourceSplitgetUpdatedSplitForIterator(
final NumberSequenceRowSource.NumberSequenceIterator iterator){
returnnew NumberSequenceSplit(splitId, iterator.getCurrent(), iterator.getTo());
}
@Overridepublic String toString(){
return String.format("NumberSequenceSplit [%d, %d] (%s)", from, to, splitId);
}
}
privatestaticfinalclassSplitSerializerimplementsSimpleVersionedSerializer{
privatestaticfinalint CURRENT_VERSION = 1;
@OverridepublicintgetVersion(){
return CURRENT_VERSION;
}
@Overridepublicbyte[] serialize(NumberSequenceSplit split) throws IOException {
checkArgument(
split.getClass() == NumberSequenceSplit.class, "cannot serialize subclasses");
// We will serialize 2 longs (16 bytes) plus the UFT representation of the string (2 +// length)final DataOutputSerializer out =
new DataOutputSerializer(split.splitId().length() + 18);
serializeV1(out, split);
return out.getCopyOfBuffer();
}
@Overridepublic NumberSequenceSplit deserialize(int version, byte[] serialized)throws IOException {
if (version != CURRENT_VERSION) {
thrownew IOException("Unrecognized version: " + version);
}
final DataInputDeserializer in = new DataInputDeserializer(serialized);
return deserializeV1(in);
}
staticvoidserializeV1(DataOutputView out, NumberSequenceSplit split)throws IOException {
out.writeUTF(split.splitId());
out.writeLong(split.from());
out.writeLong(split.to());
}
static NumberSequenceSplit deserializeV1(DataInputView in)throws IOException {
returnnew NumberSequenceSplit(in.readUTF(), in.readLong(), in.readLong());
}
}
privatestaticfinalclassCheckpointSerializerimplementsSimpleVersionedSerializer> {
privatestaticfinalint CURRENT_VERSION = 1;
@OverridepublicintgetVersion(){
return CURRENT_VERSION;
}
@Overridepublicbyte[] serialize(Collection checkpoint) throws IOException {
// Each split needs 2 longs (16 bytes) plus the UFT representation of the string (2 +// length)// Assuming at most 4 digit split IDs, 22 bytes per split avoids any intermediate array// resizing.// plus four bytes for the length fieldfinal DataOutputSerializer out = new DataOutputSerializer(checkpoint.size() * 22 + 4);
out.writeInt(checkpoint.size());
for (NumberSequenceSplit split : checkpoint) {
SplitSerializer.serializeV1(out, split);
}
return out.getCopyOfBuffer();
}
@Overridepublic Collectiondeserialize(int version, byte[] serialized)throws IOException {
if (version != CURRENT_VERSION) {
thrownew IOException("Unrecognized version: " + version);
}
final DataInputDeserializer in = new DataInputDeserializer(serialized);
finalint num = in.readInt();
final ArrayList result = new ArrayList<>(num);
for (int remaining = num; remaining > 0; remaining--) {
result.add(SplitSerializer.deserializeV1(in));
}
return result;
}
}
privatestaticclassNumberSequenceIteratorextendsSplittableIterator{
privatestaticfinallong serialVersionUID = 1L;
/** The last number returned by the iterator. */privatefinallong to;
/** The next number to be returned. */privatelong current;
/**
* Creates a new splittable iterator, returning the range [from, to]. Both boundaries of the
* interval are inclusive.
*
* @param from The first number returned by the iterator.
* @param to The last number returned by the iterator.
*/publicNumberSequenceIterator(long from, long to){
if (from > to) {
thrownew IllegalArgumentException(
"The 'to' value must not be smaller than the 'from' value.");
}
this.current = from;
this.to = to;
}
/**
* Internal constructor to allow for empty iterators.
*
* @param from The first number returned by the iterator.
* @param to The last number returned by the iterator.
* @param unused A dummy parameter to disambiguate the constructor.
*/@SuppressWarnings("unused")
privateNumberSequenceIterator(long from, long to, boolean unused){
this.current = from;
this.to = to;
}
publiclonggetCurrent(){
returnthis.current;
}
publiclonggetTo(){
returnthis.to;
}
@OverridepublicbooleanhasNext(){
return current <= to;
}
@Overridepublic RowData next(){
if (current <= to) {
return GenericRowData.of(current++);
} else {
thrownew NoSuchElementException();
}
}
@Overridepublicvoidremove(){
thrownew UnsupportedOperationException();
}
@Overridepublic NumberSequenceIterator[] split(int numPartitions) {
if (numPartitions < 1) {
thrownew IllegalArgumentException("The number of partitions must be at least 1.");
}
if (numPartitions == 1) {
returnnew NumberSequenceIterator[] {new NumberSequenceIterator(current, to)};
}
// here, numPartitions >= 2 !!!long elementsPerSplit;
if (to - current + 1 >= 0) {
elementsPerSplit = (to - current + 1) / numPartitions;
} else {
// long overflow of the range.// we compute based on half the distance, to prevent the overflow.// in most cases it holds that: current < 0 and to > 0, except for: to == 0 and// current// == Long.MIN_VALUE// the later needs a special casefinallong halfDiff; // must be positiveif (current == Long.MIN_VALUE) {
// this means to >= 0
halfDiff = (Long.MAX_VALUE / 2 + 1) + to / 2;
} else {
long posFrom = -current;
if (posFrom > to) {
halfDiff = to + ((posFrom - to) / 2);
} else {
halfDiff = posFrom + ((to - posFrom) / 2);
}
}
elementsPerSplit = halfDiff / numPartitions * 2;
}
// figure out how many get one in additionlong numWithExtra = -(elementsPerSplit * numPartitions) + to - current + 1;
// based on rounding errors, we may have lost oneif (numWithExtra > numPartitions) {
elementsPerSplit++;
numWithExtra -= numPartitions;
if (numWithExtra > numPartitions) {
thrownew RuntimeException("Bug in splitting logic. Too much rounding loss.");
}
}
NumberSequenceIterator[] iters = new NumberSequenceIterator[numPartitions];
long curr = current;
int i = 0;
for (; i < numWithExtra; i++) {
long next = curr + elementsPerSplit + 1;
iters[i] = new NumberSequenceIterator(curr, next - 1);
curr = next;
}
for (; i < numPartitions; i++) {
long next = curr + elementsPerSplit;
iters[i] = new NumberSequenceIterator(curr, next - 1, true);
curr = next;
}
return iters;
}
@OverridepublicintgetMaximumNumberOfSplits(){
if (to >= Integer.MAX_VALUE
|| current <= Integer.MIN_VALUE
|| to - current + 1 >= Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
} else {
return (int) (to - current + 1);
}
}
}
}