
com.dimajix.flowman.model.ResourceIdentifier.scala Maven / Gradle / Ivy
/*
* Copyright (C) 2018 The Flowman Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dimajix.flowman.model
import java.net.URI
import java.net.URL
import java.util.Locale
import java.util.regex.Pattern
import scala.annotation.tailrec
import org.apache.hadoop.fs.Path
import com.dimajix.flowman.catalog.TableIdentifier
import com.dimajix.flowman.fs.File
import com.dimajix.flowman.fs.FileSystem
import com.dimajix.flowman.fs.GlobPattern
object ResourceIdentifier {
def apply(category: String, name: String, partition: Map[String, String]) : ResourceIdentifier = {
SimpleResourceIdentifier(category, name, partition)
}
def ofFile(file:File): GlobbingResourceIdentifier =
GlobbingResourceIdentifier("file", file.toString)
def ofFile(file:Path): GlobbingResourceIdentifier =
GlobbingResourceIdentifier("file", file.toString)
def ofLocal(file:Path): GlobbingResourceIdentifier = {
if (file.toUri.getScheme == null)
GlobbingResourceIdentifier("file", "file:" + file.toString)
else
GlobbingResourceIdentifier("file", file.toString)
}
def ofLocal(file:java.io.File): GlobbingResourceIdentifier = {
GlobbingResourceIdentifier("file", "file:" + file.toString)
}
def ofHiveDatabase(database:String): RegexResourceIdentifier =
RegexResourceIdentifier("hiveDatabase", database, caseSensitive=false)
def ofHiveTable(table:TableIdentifier): RegexResourceIdentifier =
ofHiveTable(table.table, table.space.headOption)
def ofHiveTable(table:String): RegexResourceIdentifier =
RegexResourceIdentifier("hiveTable", table, caseSensitive=false)
def ofHiveTable(table:String, database:Option[String]): RegexResourceIdentifier =
RegexResourceIdentifier("hiveTable", fqTable(table, database), caseSensitive=false)
def ofHivePartition(table:TableIdentifier, partition:Map[String,Any]): RegexResourceIdentifier =
ofHivePartition(table.table, table.space.headOption, partition)
def ofHivePartition(table:String, partition:Map[String,Any]): RegexResourceIdentifier =
RegexResourceIdentifier("hiveTablePartition", table, partition.map { case(k,v) => k -> v.toString }, caseSensitive=false)
def ofHivePartition(table:String, database:Option[String], partition:Map[String,Any]): RegexResourceIdentifier =
RegexResourceIdentifier("hiveTablePartition", fqTable(table, database), partition.map { case(k,v) => k -> v.toString }, caseSensitive=false)
def ofJdbcDatabase(database:String): RegexResourceIdentifier =
RegexResourceIdentifier("jdbcDatabase", database)
def ofJdbcTable(table:TableIdentifier): RegexResourceIdentifier =
ofJdbcTable(table.table, table.space.headOption)
def ofJdbcTable(table:String, database:Option[String]): RegexResourceIdentifier =
RegexResourceIdentifier("jdbcTable", fqTable(table, database))
def ofJdbcTable(table:String): RegexResourceIdentifier =
RegexResourceIdentifier("jdbcTable", table)
def ofJdbcQuery(query:String): SimpleResourceIdentifier =
SimpleResourceIdentifier("jdbcQuery", "")
def ofJdbcTablePartition(table:TableIdentifier, partition:Map[String,Any]): RegexResourceIdentifier =
ofJdbcTablePartition(table.table, table.space.headOption, partition)
def ofJdbcTablePartition(table:String, database:Option[String], partition:Map[String,Any]): RegexResourceIdentifier =
RegexResourceIdentifier("jdbcTablePartition", fqTable(table, database), partition.map { case(k,v) => k -> v.toString })
def ofURL(url:URL): RegexResourceIdentifier =
RegexResourceIdentifier("url", url.toURI.normalize().toURL.toString)
def ofURI(uri:URI): RegexResourceIdentifier =
RegexResourceIdentifier("url", uri.normalize().toString)
private def fqTable(table:String, database:Option[String]) : String = database.filter(_.nonEmpty).map(_ + ".").getOrElse("") + table
}
/**
* A ResourceIdentifier is used to identify a physical resource which is either produced or consumed by a
* target during a lifecycle phase. ResourceIdentifiers therefore play a crucial role in determining the correct
* execution order of all targets
*/
abstract class ResourceIdentifier extends Product with Serializable {
val category:String
val name:String
val partition:Map[String,String]
final def isEmpty : Boolean = name.isEmpty
final def nonEmpty : Boolean = name.nonEmpty
/**
* Provides a nice textual representation of the ResourceIdentifier used for console output
* @return
*/
def text : String = {
val part =
if (partition.nonEmpty)
partition.map(kv => kv._1 + "=" + kv._2).mkString("[",",","]")
else
""
s"$category:$name$part"
}
/**
* Create new ResourceIdentifiers by exploding the powerset of all partitions
* @return
*/
def explodePartitions() : Seq[ResourceIdentifier] = {
@tailrec
def pwr(t: Set[String], ps: Set[Set[String]]): Set[Set[String]] =
if (t.isEmpty) ps
else pwr(t.tail, ps ++ (ps map (_ + t.head)))
val ps = pwr(partition.keySet, Set(Set.empty[String])) //Powerset of ∅ is {∅}
ps.toSeq.map(keys => withPartition(partition.filterKeys(keys.contains)))
}
/**
* Makes a copy of this resource with a different partition
* @param partition
* @return
*/
def withPartition(partition:Map[String,String]) : ResourceIdentifier
/**
* Returns true if this ResourceIdentifier is either equal to the other one or if it describes a resource which
* actually contains the other one.
* @param other
* @return
*/
final def contains(other:ResourceIdentifier) : Boolean = {
category == other.category &&
containsName(other) &&
containsPartition(other)
}
final def intersects(other:ResourceIdentifier) : Boolean = {
this.contains(other) || other.contains(this)
}
protected def containsName(other:ResourceIdentifier) : Boolean = {
name == other.name
}
/**
* Check that the current partition also holds the partition of the other resource. This is the case if all
* partition values are also set in the other resource
* @param other
* @return
*/
protected def containsPartition(other:ResourceIdentifier) : Boolean = {
partition.forall(p => other.partition.get(p._1).contains(p._2))
}
}
/**
* This is the simplest ResourceIdentifier, which simply performs exact matches of the resource name
* @param category
* @param name
* @param partition
*/
final case class SimpleResourceIdentifier(override val category:String, override val name:String, override val partition:Map[String,String] = Map())
extends ResourceIdentifier
{
override def withPartition(partition:Map[String,String]) : ResourceIdentifier = copy(partition=partition)
}
/**
* This ResourceIdentifier performs matches using globbing logic in order to detect if another resource is contained
* within this resource. Globbing only makes sense for file based resources, for other types ypu should either use the
* SimpleResourceIdentifier or the RegexResourceIdentifier
* @param category
* @param name
* @param partition
*/
final case class GlobbingResourceIdentifier(override val category:String, override val name:String, override val partition:Map[String,String] = Map())
extends ResourceIdentifier
{
private lazy val globPattern = GlobPattern(name)
override def withPartition(partition:Map[String,String]) : ResourceIdentifier = copy(partition=partition)
override protected def containsName(other:ResourceIdentifier) : Boolean = {
@tailrec
def isParent(parent:String, child:String) : Boolean = {
if (parent == child) {
true
}
else {
val c2 = FileSystem.getParent(child)
if (c2 != null)
isParent(parent, c2)
else
false
}
}
// Test simple case: Perfect match
if (name == other.name) {
true
}
// Test if this is parent
else if (isParent(name, other.name)) {
true
}
// Test if wildcards do match
else if (globPattern.hasWildcard) {
globPattern.matches(other.name)
}
else {
false
}
}
}
/**
* The RegexResourceIdentifier performs matches against other resources using a regular expression. This can be useful
* for table names or similar resources.
* @param category
* @param name
* @param partition
*/
final case class RegexResourceIdentifier(override val category:String, override val name:String, override val partition:Map[String,String] = Map(), caseSensitive:Boolean=true)
extends ResourceIdentifier
{
private lazy val regex = {
val opts =
if (caseSensitive) Pattern.DOTALL
else Pattern.DOTALL | Pattern.CASE_INSENSITIVE
Pattern.compile(name, opts)
}
override def withPartition(partition:Map[String,String]) : ResourceIdentifier = copy(partition=partition)
override protected def containsName(other:ResourceIdentifier) : Boolean = {
// Test simple case: Perfect match
if (caseSensitive && name == other.name) {
true
}
else if (!caseSensitive && name.toLowerCase(Locale.ROOT) == other.name.toLowerCase(Locale.ROOT)) {
true
}
// Test if wildcards do match
else {
regex.matcher(other.name).matches
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy