
org.pragmaticminds.crunch.api.pipe.EvaluationPipeline Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of crunch-api Show documentation
Show all versions of crunch-api Show documentation
Programing Interfaces for other CRUNCH Projects.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.pragmaticminds.crunch.api.pipe;
import com.google.common.base.Preconditions;
import org.pragmaticminds.crunch.api.exceptions.IdentifierAlreadyExistsException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
* This is a meta class. It describes the structures to build a pipeline for Crunch processing.
* It holds a list of SubStreams, which are holding lists of {@link EvaluationFunction}s.
* With this class one can create something like a Flink pipeline.
*
* @author Erwin Wagasow
* Created by Erwin Wagasow on 01.08.2018
*/
public class EvaluationPipeline implements Serializable {
private final String identifier;
private final List> subStreams;
/**
* private constructor for the builder
* @param identifier of the {@link EvaluationPipeline}
* @param subStreams of the {@link EvaluationPipeline}, containing all {@link EvaluationFunction}s of the
* pipeline to process
*/
private EvaluationPipeline(String identifier, List> subStreams) {
this.identifier = identifier;
this.subStreams = subStreams;
}
// getter
public String getIdentifier() {
return identifier;
}
public List> getSubStreams() {
return subStreams;
}
/**
* Creates a builder for this class
* @return a builder
*/
public static Builder builder() { return new Builder<>(); }
/**
* this Builder creates new instances of {@link EvaluationPipeline} class
*/
public static final class Builder implements Serializable {
private String identifier;
private List> subStreams;
private Builder() {}
/**
* set the identifier
* @param identifier
* @return
*/
public Builder withIdentifier(String identifier) {
this.identifier = identifier;
return this;
}
public Builder withSubStreams(List> subStreams) {
if(this.subStreams == null){
this.subStreams = subStreams;
}else{
this.subStreams.addAll(subStreams);
}
return this;
}
public Builder withSubStream(SubStream subStream) {
if(this.subStreams == null){
this.subStreams = new ArrayList<>();
}
this.subStreams.add(subStream);
return this;
}
public Builder but() {
return new Builder().withIdentifier(identifier).withSubStreams(subStreams);
}
public EvaluationPipeline build() {
checkConstructorParameters(identifier, subStreams);
return new EvaluationPipeline(identifier, subStreams);
}
/**
* Checks if {@link SubStream} identifiers are null or used multiple times.
* Checks if the identifier of the {@link EvaluationPipeline} is not null
* @param identifier of the {@link EvaluationPipeline}
* @param subStreams list of {@link SubStream}s of the {@link EvaluationPipeline}
* @throws IdentifierAlreadyExistsException as it says
*/
private void checkConstructorParameters(String identifier, List> subStreams) {
Preconditions.checkNotNull(identifier, "the identifier of the EvaluationPipeline is not set");
Preconditions.checkNotNull(subStreams, "the SubStreams of the EvaluationPipeline are not set");
Preconditions.checkArgument(!subStreams.isEmpty(), "the SubStream of the EvaluationPipeline is empty");
Set identifiers = subStreams.stream()
.map(SubStream::getIdentifier)
.peek(identifier1 -> Preconditions.checkNotNull(
identifier1,
String.format("An identifier is null in EvaluationPipeline: %s", identifier)
))
.collect(Collectors.toSet());
if(identifiers.size() != subStreams.size()){
throw new IdentifierAlreadyExistsException(
String.format("A SubStream identifier is already in use in EvaluationPipeline: %s", identifier)
);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy