io.github.randyridgley.cdk.datalake.constructs.package-info Maven / Gradle / Ivy
/**
* cdk-datalake-constructs
*
* Very experimental until version 1.0.
* This is my attempt at simplifying deploying various datalake strategies in AWS with the CDK.
*
* Table of Contents
*
*
* - Features
* - Installation
* - Usage
*
*
* - Documentation
*
*
* - Supporting this project
* - License
*
*
*
Features
*
*
* - Easy to Start - Create a Datalake in a few lines.
* - Easy to Expand - Expand into multiple accounts and into a data mesh.
* - Easy to Admin - Initial governance created on deploy.
*
*
*
Installation
*
* TypeScript/JavaScript
*
*
* $ npm install @randyridgley/cdk-datalake-constructs
*
*
* Python
*
*
* $ pip install cdk-datalake-constructs
*
*
* .Net
*
*
* $ nuget install CDK.Datalake.Constructs
*
* # See more: https://www.nuget.org/packages/CDK.Datalake.Constructs/
*
*
*
Usage
*
*
Basic
*
*
* import { DataLake } from '@randyridgley/cdk-datalake-constructs';
*
* const taxiPipes: Array<dl.Pipeline> = [
* pipelines.YellowPipeline(),
* pipelines.GreenPipeline(),
* ]
*
* const dataProducts: Array<dl.DataProduct> = [{
* pipelines: taxiPipes,
* accountId: lakeAccountId,
* dataCatalogAccountId: '123456789012',
* databaseName: 'taxi-product'
* }]
*
* // deploy to local account
* new dl.DataLake(this, 'LocalDataLake', {
* name: 'data-lake',
* accountId: centralAccountId,
* region: 'us-east-1',
* policyTags: {
* "classification": "public,confidential,highlyconfidential,restricted,critical",
* "owner": "product,central,consumer"
* },
* stageName: Stage.PROD,
* dataProducts: dataProducts,
* createDefaultDatabase: false
* });
*
*
*
Data Mesh
*
* You can setup cross account access and pre-created policy tags for TBAC access in Lake Formation
*
*
* const lakeAccountId = app.node.tryGetContext('lakeAccountId')
* const centralAccountId = app.node.tryGetContext('centralAccountId')
* const consumerAccountId = app.node.tryGetContext('consumerAccountId')
*
* const taxiPipes: Array<dl.Pipeline> = [
* pipelines.YellowPipeline(),
* pipelines.GreenPipeline(),
* ]
*
* const dataProducts: Array<dl.DataProduct> = [{
* pipelines: taxiPipes,
* accountId: lakeAccountId,
* dataCatalogAccountId: centralAccountId,
* databaseName: 'taxi-product'
* }]
*
* // deploy to the central account
* new dl.DataLake(this, 'CentralDataLake', {
* name: 'central-lake',
* accountId: centralAccountId,
* region: 'us-east-1',
* policyTags: {
* "classification": "public,confidential,highlyconfidential,restricted,critical",
* "owner": "product,central,consumer"
* },
* stageName: Stage.PROD,
* crossAccount: {
* consumerAccountIds: [consumerAccountId, lakeAccountId],
* dataCatalogOwnerAccountId: centralAccountId,
* region: 'us-east-1', // this is still only single region today
* },
* dataProducts: dataProducts,
* createDefaultDatabase: true
* });
*
* // deploy to the data product account
* const datalake = new dl.DataLake(this, 'LocalDataLake', {
* name: 'local-lake',
* accountId: lakeAccountId,
* region: 'us-east-1',
* stageName: Stage.PROD,
* dataProducts: dataProducts,
* createDefaultDatabase: true
* });
*
* // Optionally add custom resource to download public data set products
* datalake.createDownloaderCustomResource(accountId, region, props.stageName)
*
* // deploy to consumer account
* const datalake = new dl.DataLake(this, 'ConsumerDataLake', {
* name: 'consumer-lake',
* accountId: consumerAccountId,
* region: 'us-east-1',
* stageName: Stage.PROD,
* policyTags: {
* "access": "analyst,engineer,marketing"
* },
* createDefaultDatabase: true
* });
*
*
*
Documentation
*
*
Construct API Reference
*
* See API.md.
*
*
Supporting this project
*
* I'm working on this project in my free time, if you like my project, or found it helpful and would like to support me any contributions are much appreciated! ❤️
*
*
License
*
* This project is distributed under the MIT.
*/
@software.amazon.jsii.Stability(software.amazon.jsii.Stability.Level.Stable)
package io.github.randyridgley.cdk.datalake.constructs;