google.privacy.dlp.v2.storage.proto Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of proto-google-cloud-dlp-v2 Show documentation
Show all versions of proto-google-cloud-dlp-v2 Show documentation
PROTO library for proto-google-cloud-dlp-v2
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.privacy.dlp.v2;
import "google/api/resource.proto";
import "google/protobuf/timestamp.proto";
option csharp_namespace = "Google.Cloud.Dlp.V2";
option go_package = "cloud.google.com/go/dlp/apiv2/dlppb;dlppb";
option java_multiple_files = true;
option java_outer_classname = "DlpStorage";
option java_package = "com.google.privacy.dlp.v2";
option php_namespace = "Google\\Cloud\\Dlp\\V2";
option ruby_package = "Google::Cloud::Dlp::V2";
// Type of information detected by the API.
message InfoType {
// Name of the information type. Either a name of your choosing when
// creating a CustomInfoType, or one of the names listed
// at
// https://cloud.google.com/sensitive-data-protection/docs/infotypes-reference
// when specifying a built-in type. When sending Cloud DLP results to Data
// Catalog, infoType names should conform to the pattern
// `[A-Za-z0-9$_-]{1,64}`.
string name = 1;
// Optional version name for this InfoType.
string version = 2;
// Optional custom sensitivity for this InfoType.
// This only applies to data profiling.
SensitivityScore sensitivity_score = 3;
}
// Score is calculated from of all elements in the data profile.
// A higher level means the data is more sensitive.
message SensitivityScore {
// Various sensitivity score levels for resources.
enum SensitivityScoreLevel {
// Unused.
SENSITIVITY_SCORE_UNSPECIFIED = 0;
// No sensitive information detected. The resource isn't publicly
// accessible.
SENSITIVITY_LOW = 10;
// Unable to determine sensitivity.
SENSITIVITY_UNKNOWN = 12;
// Medium risk. Contains personally identifiable information (PII),
// potentially sensitive data, or fields with free-text data that are at a
// higher risk of having intermittent sensitive data. Consider limiting
// access.
SENSITIVITY_MODERATE = 20;
// High risk. Sensitive personally identifiable information (SPII) can be
// present. Exfiltration of data can lead to user data loss.
// Re-identification of users might be possible. Consider limiting usage and
// or removing SPII.
SENSITIVITY_HIGH = 30;
}
// The sensitivity score applied to the resource.
SensitivityScoreLevel score = 1;
}
// Coarse-grained confidence level of how well a particular finding
// satisfies the criteria to match a particular infoType.
//
// Likelihood is calculated based on the number of signals a
// finding has that implies that the finding matches the infoType. For
// example, a string that has an '@' and a '.com' is more likely to be a
// match for an email address than a string that only has an '@'.
//
// In general, the highest likelihood level has the strongest signals that
// indicate a match. That is, a finding with a high likelihood has a low chance
// of being a false positive.
//
// For more information about each likelihood level
// and how likelihood works, see [Match
// likelihood](https://cloud.google.com/sensitive-data-protection/docs/likelihood).
enum Likelihood {
// Default value; same as POSSIBLE.
LIKELIHOOD_UNSPECIFIED = 0;
// Highest chance of a false positive.
VERY_UNLIKELY = 1;
// High chance of a false positive.
UNLIKELY = 2;
// Some matching signals. The default value.
POSSIBLE = 3;
// Low chance of a false positive.
LIKELY = 4;
// Confidence level is high. Lowest chance of a false positive.
VERY_LIKELY = 5;
}
// A reference to a StoredInfoType to use with scanning.
message StoredType {
// Resource name of the requested `StoredInfoType`, for example
// `organizations/433245324/storedInfoTypes/432452342` or
// `projects/project-id/storedInfoTypes/432452342`.
string name = 1;
// Timestamp indicating when the version of the `StoredInfoType` used for
// inspection was created. Output-only field, populated by the system.
google.protobuf.Timestamp create_time = 2;
}
// Custom information type provided by the user. Used to find domain-specific
// sensitive information configurable to the data in question.
message CustomInfoType {
// Custom information type based on a dictionary of words or phrases. This can
// be used to match sensitive information specific to the data, such as a list
// of employee IDs or job titles.
//
// Dictionary words are case-insensitive and all characters other than letters
// and digits in the unicode [Basic Multilingual
// Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
// will be replaced with whitespace when scanning for matches, so the
// dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
// "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
// surrounding any match must be of a different type than the adjacent
// characters within the word, so letters must be next to non-letters and
// digits next to non-digits. For example, the dictionary word "jen" will
// match the first three letters of the text "jen123" but will return no
// matches for "jennifer".
//
// Dictionary words containing a large number of characters that are not
// letters or digits may result in unexpected findings because such characters
// are treated as whitespace. The
// [limits](https://cloud.google.com/sensitive-data-protection/limits) page
// contains details about the size limits of dictionaries. For dictionaries
// that do not fit within these constraints, consider using
// `LargeCustomDictionaryConfig` in the `StoredInfoType` API.
message Dictionary {
// Message defining a list of words or phrases to search for in the data.
message WordList {
// Words or phrases defining the dictionary. The dictionary must contain
// at least one phrase and every phrase must contain at least 2 characters
// that are letters or digits. [required]
repeated string words = 1;
}
// The potential places the data can be read from.
oneof source {
// List of words or phrases to search for.
WordList word_list = 1;
// Newline-delimited file of words in Cloud Storage. Only a single file
// is accepted.
CloudStoragePath cloud_storage_path = 3;
}
}
// Message defining a custom regular expression.
message Regex {
// Pattern defining the regular expression. Its syntax
// (https://github.com/google/re2/wiki/Syntax) can be found under the
// google/re2 repository on GitHub.
string pattern = 1;
// The index of the submatch to extract as findings. When not
// specified, the entire match is returned. No more than 3 may be included.
repeated int32 group_indexes = 2;
}
// Message for detecting output from deidentification transformations
// such as
// [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/sensitive-data-protection/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
// These types of transformations are
// those that perform pseudonymization, thereby producing a "surrogate" as
// output. This should be used in conjunction with a field on the
// transformation such as `surrogate_info_type`. This CustomInfoType does
// not support the use of `detection_rules`.
message SurrogateType {}
// Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
// `CustomInfoType` to alter behavior under certain circumstances, depending
// on the specific details of the rule. Not supported for the `surrogate_type`
// custom infoType.
message DetectionRule {
// Message for specifying a window around a finding to apply a detection
// rule.
message Proximity {
// Number of characters before the finding to consider. For tabular data,
// if you want to modify the likelihood of an entire column of findngs,
// set this to 1. For more information, see
// [Hotword example: Set the match likelihood of a table column]
// (https://cloud.google.com/sensitive-data-protection/docs/creating-custom-infotypes-likelihood#match-column-values).
int32 window_before = 1;
// Number of characters after the finding to consider.
int32 window_after = 2;
}
// Message for specifying an adjustment to the likelihood of a finding as
// part of a detection rule.
message LikelihoodAdjustment {
// How the likelihood will be modified.
oneof adjustment {
// Set the likelihood of a finding to a fixed value.
Likelihood fixed_likelihood = 1;
// Increase or decrease the likelihood by the specified number of
// levels. For example, if a finding would be `POSSIBLE` without the
// detection rule and `relative_likelihood` is 1, then it is upgraded to
// `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
// Likelihood may never drop below `VERY_UNLIKELY` or exceed
// `VERY_LIKELY`, so applying an adjustment of 1 followed by an
// adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
// a final likelihood of `LIKELY`.
int32 relative_likelihood = 2;
}
}
// The rule that adjusts the likelihood of findings within a certain
// proximity of hotwords.
message HotwordRule {
// Regular expression pattern defining what qualifies as a hotword.
Regex hotword_regex = 1;
// Range of characters within which the entire hotword must reside.
// The total length of the window cannot exceed 1000 characters.
// The finding itself will be included in the window, so that hotwords can
// be used to match substrings of the finding itself. Suppose you
// want Cloud DLP to promote the likelihood of the phone number
// regex "\(\d{3}\) \d{3}-\d{4}" if the area code is known to be the
// area code of a company's office. In this case, use the hotword regex
// "\(xxx\)", where "xxx" is the area code in question.
//
// For tabular data, if you want to modify the likelihood of an entire
// column of findngs, see
// [Hotword example: Set the match likelihood of a table column]
// (https://cloud.google.com/sensitive-data-protection/docs/creating-custom-infotypes-likelihood#match-column-values).
Proximity proximity = 2;
// Likelihood adjustment to apply to all matching findings.
LikelihoodAdjustment likelihood_adjustment = 3;
}
// Type of hotword rule.
oneof type {
// Hotword-based detection rule.
HotwordRule hotword_rule = 1;
}
}
// Type of exclusion rule.
enum ExclusionType {
// A finding of this custom info type will not be excluded from results.
EXCLUSION_TYPE_UNSPECIFIED = 0;
// A finding of this custom info type will be excluded from final results,
// but can still affect rule execution.
EXCLUSION_TYPE_EXCLUDE = 1;
}
// CustomInfoType can either be a new infoType, or an extension of built-in
// infoType, when the name matches one of existing infoTypes and that infoType
// is specified in `InspectContent.info_types` field. Specifying the latter
// adds findings to the one detected by the system. If built-in info type is
// not specified in `InspectContent.info_types` list then the name is treated
// as a custom info type.
InfoType info_type = 1;
// Likelihood to return for this CustomInfoType. This base value can be
// altered by a detection rule if the finding meets the criteria specified by
// the rule. Defaults to `VERY_LIKELY` if not specified.
Likelihood likelihood = 6;
// Type of custom detector.
oneof type {
// A list of phrases to detect as a CustomInfoType.
Dictionary dictionary = 2;
// Regular expression based CustomInfoType.
Regex regex = 3;
// Message for detecting output from deidentification transformations that
// support reversing.
SurrogateType surrogate_type = 4;
// Load an existing `StoredInfoType` resource for use in
// `InspectDataSource`. Not currently supported in `InspectContent`.
StoredType stored_type = 5;
}
// Set of detection rules to apply to all findings of this CustomInfoType.
// Rules are applied in order that they are specified. Not supported for the
// `surrogate_type` CustomInfoType.
repeated DetectionRule detection_rules = 7;
// If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
// to be returned. It still can be used for rules matching.
ExclusionType exclusion_type = 8;
// Sensitivity for this CustomInfoType. If this CustomInfoType extends an
// existing InfoType, the sensitivity here will take precedence over that of
// the original InfoType. If unset for a CustomInfoType, it will default to
// HIGH.
// This only applies to data profiling.
SensitivityScore sensitivity_score = 9;
}
// General identifier of a data field in a storage service.
message FieldId {
// Name describing the field.
string name = 1;
}
// Datastore partition ID.
// A partition ID identifies a grouping of entities. The grouping is always
// by project and namespace, however the namespace ID may be empty.
//
// A partition ID contains several dimensions:
// project ID and namespace ID.
message PartitionId {
// The ID of the project to which the entities belong.
string project_id = 2;
// If not empty, the ID of the namespace to which the entities belong.
string namespace_id = 4;
}
// A representation of a Datastore kind.
message KindExpression {
// The name of the kind.
string name = 1;
}
// Options defining a data set within Google Cloud Datastore.
message DatastoreOptions {
// A partition ID identifies a grouping of entities. The grouping is always
// by project and namespace, however the namespace ID may be empty.
PartitionId partition_id = 1;
// The kind to process.
KindExpression kind = 2;
}
// Definitions of file type groups to scan. New types will be added to this
// list.
enum FileType {
// Includes all files.
FILE_TYPE_UNSPECIFIED = 0;
// Includes all file extensions not covered by another entry. Binary
// scanning attempts to convert the content of the file to utf_8 to scan
// the file.
// If you wish to avoid this fall back, specify one or more of the other
// file types in your storage scan.
BINARY_FILE = 1;
// Included file extensions:
// asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
// dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
// mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
// properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
// shtml, shtm, xhtml, lhs, ics, ini, java, js, json, jsonl, kix, kml,
// ocaml, md, txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd,
// yml, yaml.
TEXT_FILE = 2;
// Included file extensions:
// bmp, gif, jpg, jpeg, jpe, png. Setting
// [bytes_limit_per_file][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
// or
// [bytes_limit_per_file_percent][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
// has no effect on image files. Image inspection is restricted to the
// `global`, `us`, `asia`, and `europe` regions.
IMAGE = 3;
// Microsoft Word files larger than 30 MB will be scanned as binary files.
// Included file extensions:
// docx, dotx, docm, dotm. Setting `bytes_limit_per_file` or
// `bytes_limit_per_file_percent` has no effect on Word files.
WORD = 5;
// PDF files larger than 30 MB will be scanned as binary files.
// Included file extensions:
// pdf. Setting `bytes_limit_per_file` or `bytes_limit_per_file_percent`
// has no effect on PDF files.
PDF = 6;
// Included file extensions:
// avro
AVRO = 7;
// Included file extensions:
// csv
CSV = 8;
// Included file extensions:
// tsv
TSV = 9;
// Microsoft PowerPoint files larger than 30 MB will be scanned as binary
// files. Included file extensions:
// pptx, pptm, potx, potm, pot. Setting `bytes_limit_per_file` or
// `bytes_limit_per_file_percent` has no effect on PowerPoint files.
POWERPOINT = 11;
// Microsoft Excel files larger than 30 MB will be scanned as binary files.
// Included file extensions:
// xlsx, xlsm, xltx, xltm. Setting `bytes_limit_per_file` or
// `bytes_limit_per_file_percent` has no effect on Excel files.
EXCEL = 12;
}
// Message representing a set of files in a Cloud Storage bucket. Regular
// expressions are used to allow fine-grained control over which files in the
// bucket to include.
//
// Included files are those that match at least one item in `include_regex` and
// do not match any items in `exclude_regex`. Note that a file that matches
// items from both lists will _not_ be included. For a match to occur, the
// entire file path (i.e., everything in the url after the bucket name) must
// match the regular expression.
//
// For example, given the input `{bucket_name: "mybucket", include_regex:
// ["directory1/.*"], exclude_regex:
// ["directory1/excluded.*"]}`:
//
// * `gs://mybucket/directory1/myfile` will be included
// * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
// across `/`)
// * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
// full path doesn't match any items in `include_regex`)
// * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
// matches an item in `exclude_regex`)
//
// If `include_regex` is left empty, it will match all files by default
// (this is equivalent to setting `include_regex: [".*"]`).
//
// Some other common use cases:
//
// * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
// files in `mybucket` except for .pdf files
// * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
// include all files directly under `gs://mybucket/directory/`, without matching
// across `/`
message CloudStorageRegexFileSet {
// The name of a Cloud Storage bucket. Required.
string bucket_name = 1;
// A list of regular expressions matching file paths to include. All files in
// the bucket that match at least one of these regular expressions will be
// included in the set of files, except for those that also match an item in
// `exclude_regex`. Leaving this field empty will match all files by default
// (this is equivalent to including `.*` in the list).
//
// Regular expressions use RE2
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
// under the google/re2 repository on GitHub.
repeated string include_regex = 2;
// A list of regular expressions matching file paths to exclude. All files in
// the bucket that match at least one of these regular expressions will be
// excluded from the scan.
//
// Regular expressions use RE2
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
// under the google/re2 repository on GitHub.
repeated string exclude_regex = 3;
}
// Options defining a file or a set of files within a Cloud Storage
// bucket.
message CloudStorageOptions {
// Set of files to scan.
message FileSet {
// The Cloud Storage url of the file(s) to scan, in the format
// `gs:///`. Trailing wildcard in the path is allowed.
//
// If the url ends in a trailing slash, the bucket or directory represented
// by the url will be scanned non-recursively (content in sub-directories
// will not be scanned). This means that `gs://mybucket/` is equivalent to
// `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
// `gs://mybucket/directory/*`.
//
// Exactly one of `url` or `regex_file_set` must be set.
string url = 1;
// The regex-filtered set of files to scan. Exactly one of `url` or
// `regex_file_set` must be set.
CloudStorageRegexFileSet regex_file_set = 2;
}
// How to sample bytes if not all bytes are scanned. Meaningful only when used
// in conjunction with bytes_limit_per_file. If not specified, scanning would
// start from the top.
enum SampleMethod {
// No sampling.
SAMPLE_METHOD_UNSPECIFIED = 0;
// Scan from the top (default).
TOP = 1;
// For each file larger than bytes_limit_per_file, randomly pick the offset
// to start scanning. The scanned bytes are contiguous.
RANDOM_START = 2;
}
// The set of one or more files to scan.
FileSet file_set = 1;
// Max number of bytes to scan from a file. If a scanned file's size is bigger
// than this value then the rest of the bytes are omitted. Only one of
// `bytes_limit_per_file` and `bytes_limit_per_file_percent` can be specified.
// This field can't be set if de-identification is requested. For certain file
// types, setting this field has no effect. For more information, see [Limits
// on bytes scanned per
// file](https://cloud.google.com/sensitive-data-protection/docs/supported-file-types#max-byte-size-per-file).
int64 bytes_limit_per_file = 4;
// Max percentage of bytes to scan from a file. The rest are omitted. The
// number of bytes scanned is rounded down. Must be between 0 and 100,
// inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one of
// bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
// This field can't be set if de-identification is requested. For certain file
// types, setting this field has no effect. For more information, see [Limits
// on bytes scanned per
// file](https://cloud.google.com/sensitive-data-protection/docs/supported-file-types#max-byte-size-per-file).
int32 bytes_limit_per_file_percent = 8;
// List of file type groups to include in the scan.
// If empty, all files are scanned and available data format processors
// are applied. In addition, the binary content of the selected files
// is always scanned as well.
// Images are scanned only as binary if the specified region
// does not support image inspection and no file_types were specified.
// Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
repeated FileType file_types = 5;
// How to sample the data.
SampleMethod sample_method = 6;
// Limits the number of files to scan to this percentage of the input FileSet.
// Number of files scanned is rounded down. Must be between 0 and 100,
// inclusively. Both 0 and 100 means no limit. Defaults to 0.
int32 files_limit_percent = 7;
}
// Message representing a set of files in Cloud Storage.
message CloudStorageFileSet {
// The url, in the format `gs:///`. Trailing wildcard in the
// path is allowed.
string url = 1;
}
// Message representing a single file or path in Cloud Storage.
message CloudStoragePath {
// A URL representing a file or path (no wildcards) in Cloud Storage.
// Example: `gs://[BUCKET_NAME]/dictionary.txt`
string path = 1;
}
// Options defining BigQuery table and row identifiers.
message BigQueryOptions {
// How to sample rows if not all rows are scanned. Meaningful only when used
// in conjunction with either rows_limit or rows_limit_percent. If not
// specified, rows are scanned in the order BigQuery reads them.
enum SampleMethod {
// No sampling.
SAMPLE_METHOD_UNSPECIFIED = 0;
// Scan groups of rows in the order BigQuery provides (default). Multiple
// groups of rows may be scanned in parallel, so results may not appear in
// the same order the rows are read.
TOP = 1;
// Randomly pick groups of rows to scan.
RANDOM_START = 2;
}
// Complete BigQuery table reference.
BigQueryTable table_reference = 1;
// Table fields that may uniquely identify a row within the table. When
// `actions.saveFindings.outputConfig.table` is specified, the values of
// columns specified here are available in the output table under
// `location.content_locations.record_location.record_key.id_values`. Nested
// fields such as `person.birthdate.year` are allowed.
repeated FieldId identifying_fields = 2;
// Max number of rows to scan. If the table has more rows than this value, the
// rest of the rows are omitted. If not set, or if set to 0, all rows will be
// scanned. Only one of rows_limit and rows_limit_percent can be specified.
// Cannot be used in conjunction with TimespanConfig.
int64 rows_limit = 3;
// Max percentage of rows to scan. The rest are omitted. The number of rows
// scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
// 100 means no limit. Defaults to 0. Only one of rows_limit and
// rows_limit_percent can be specified. Cannot be used in conjunction with
// TimespanConfig.
//
// Caution: A [known
// issue](https://cloud.google.com/sensitive-data-protection/docs/known-issues#bq-sampling)
// is causing the `rowsLimitPercent` field to behave unexpectedly. We
// recommend using `rowsLimit` instead.
int32 rows_limit_percent = 6;
// How to sample the data.
SampleMethod sample_method = 4;
// References to fields excluded from scanning. This allows you to skip
// inspection of entire columns which you know have no findings.
// When inspecting a table, we recommend that you inspect all columns.
// Otherwise, findings might be affected because hints from excluded columns
// will not be used.
repeated FieldId excluded_fields = 5;
// Limit scanning only to these fields.
// When inspecting a table, we recommend that you inspect all columns.
// Otherwise, findings might be affected because hints from excluded columns
// will not be used.
repeated FieldId included_fields = 7;
}
// Shared message indicating Cloud storage type.
message StorageConfig {
// Configuration of the timespan of the items to include in scanning.
// Currently only supported when inspecting Cloud Storage and BigQuery.
message TimespanConfig {
// Exclude files, tables, or rows older than this value.
// If not set, no lower time limit is applied.
google.protobuf.Timestamp start_time = 1;
// Exclude files, tables, or rows newer than this value.
// If not set, no upper time limit is applied.
google.protobuf.Timestamp end_time = 2;
// Specification of the field containing the timestamp of scanned items.
// Used for data sources like Datastore and BigQuery.
//
// **For BigQuery**
//
// If this value is not specified and the table was modified between the
// given start and end times, the entire table will be scanned. If this
// value is specified, then rows are filtered based on the given start and
// end times. Rows with a `NULL` value in the provided BigQuery column are
// skipped.
// Valid data types of the provided BigQuery column are: `INTEGER`, `DATE`,
// `TIMESTAMP`, and `DATETIME`.
//
// If your BigQuery table is [partitioned at ingestion
// time](https://cloud.google.com/bigquery/docs/partitioned-tables#ingestion_time),
// you can use any of the following pseudo-columns as your timestamp field.
// When used with Cloud DLP, these pseudo-column names are case sensitive.
//
// - `_PARTITIONTIME`
// - `_PARTITIONDATE`
// - `_PARTITION_LOAD_TIME`
//
// **For Datastore**
//
// If this value is specified, then entities are filtered based on the given
// start and end times. If an entity does not contain the provided timestamp
// property or contains empty or invalid values, then it is included.
// Valid data types of the provided timestamp property are: `TIMESTAMP`.
//
// See the
// [known
// issue](https://cloud.google.com/sensitive-data-protection/docs/known-issues#bq-timespan)
// related to this operation.
FieldId timestamp_field = 3;
// When the job is started by a JobTrigger we will automatically figure out
// a valid start_time to avoid scanning files that have not been modified
// since the last time the JobTrigger executed. This will be based on the
// time of the execution of the last run of the JobTrigger or the timespan
// end_time used in the last run of the JobTrigger.
//
// **For BigQuery**
//
// Inspect jobs triggered by automatic population will scan data that is at
// least three hours old when the job starts. This is because streaming
// buffer rows are not read during inspection and reading up to the current
// timestamp will result in skipped rows.
//
// See the [known
// issue](https://cloud.google.com/sensitive-data-protection/docs/known-issues#recently-streamed-data)
// related to this operation.
bool enable_auto_population_of_timespan_config = 4;
}
// Type of storage system to inspect.
oneof type {
// Google Cloud Datastore options.
DatastoreOptions datastore_options = 2;
// Cloud Storage options.
CloudStorageOptions cloud_storage_options = 3;
// BigQuery options.
BigQueryOptions big_query_options = 4;
// Hybrid inspection options.
HybridOptions hybrid_options = 9;
}
// Configuration of the timespan of the items to include in scanning.
TimespanConfig timespan_config = 6;
}
// Configuration to control jobs where the content being inspected is outside
// of Google Cloud Platform.
message HybridOptions {
// A short description of where the data is coming from. Will be stored once
// in the job. 256 max length.
string description = 1;
// These are labels that each inspection request must include within their
// 'finding_labels' map. Request may contain others, but any missing one of
// these will be rejected.
//
// Label keys must be between 1 and 63 characters long and must conform
// to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
//
// No more than 10 keys can be required.
repeated string required_finding_label_keys = 2;
// To organize findings, these labels will be added to each finding.
//
// Label keys must be between 1 and 63 characters long and must conform
// to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
//
// Label values must be between 0 and 63 characters long and must conform
// to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
//
// No more than 10 labels can be associated with a given finding.
//
// Examples:
//
// * `"environment" : "production"`
// * `"pipeline" : "etl"`
map labels = 3;
// If the container is a table, additional information to make findings
// meaningful such as the columns that are primary keys.
TableOptions table_options = 4;
}
// Row key for identifying a record in BigQuery table.
message BigQueryKey {
// Complete BigQuery table reference.
BigQueryTable table_reference = 1;
// Row number inferred at the time the table was scanned. This value is
// nondeterministic, cannot be queried, and may be null for inspection
// jobs. To locate findings within a table, specify
// `inspect_job.storage_config.big_query_options.identifying_fields` in
// `CreateDlpJobRequest`.
int64 row_number = 2;
}
// Record key for a finding in Cloud Datastore.
message DatastoreKey {
// Datastore entity key.
Key entity_key = 1;
}
// A unique identifier for a Datastore entity.
// If a key's partition ID or any of its path kinds or names are
// reserved/read-only, the key is reserved/read-only.
// A reserved/read-only key is forbidden in certain documented contexts.
message Key {
// A (kind, ID/name) pair used to construct a key path.
//
// If either name or ID is set, the element is complete.
// If neither is set, the element is incomplete.
message PathElement {
// The kind of the entity.
// A kind matching regex `__.*__` is reserved/read-only.
// A kind must not contain more than 1500 bytes when UTF-8 encoded.
// Cannot be `""`.
string kind = 1;
// The type of ID.
oneof id_type {
// The auto-allocated ID of the entity.
// Never equal to zero. Values less than zero are discouraged and may not
// be supported in the future.
int64 id = 2;
// The name of the entity.
// A name matching regex `__.*__` is reserved/read-only.
// A name must not be more than 1500 bytes when UTF-8 encoded.
// Cannot be `""`.
string name = 3;
}
}
// Entities are partitioned into subsets, currently identified by a project
// ID and namespace ID.
// Queries are scoped to a single partition.
PartitionId partition_id = 1;
// The entity path.
// An entity path consists of one or more elements composed of a kind and a
// string or numerical identifier, which identify entities. The first
// element identifies a _root entity_, the second element identifies
// a _child_ of the root entity, the third element identifies a child of the
// second entity, and so forth. The entities identified by all prefixes of
// the path are called the element's _ancestors_.
//
// A path can never be empty, and a path can have at most 100 elements.
repeated PathElement path = 2;
}
// Message for a unique key indicating a record that contains a finding.
message RecordKey {
// Type of key
oneof type {
// BigQuery key
DatastoreKey datastore_key = 2;
// Datastore key
BigQueryKey big_query_key = 3;
}
// Values of identifying columns in the given row. Order of values matches
// the order of `identifying_fields` specified in the scanning request.
repeated string id_values = 5;
}
// Message defining the location of a BigQuery table. A table is uniquely
// identified by its project_id, dataset_id, and table_name. Within a query
// a table is often referenced with a string in the format of:
// `:.` or
// `..`.
message BigQueryTable {
// The Google Cloud Platform project ID of the project containing the table.
// If omitted, project ID is inferred from the API call.
string project_id = 1;
// Dataset ID of the table.
string dataset_id = 2;
// Name of the table.
string table_id = 3;
}
// Message defining the location of a BigQuery table with the projectId inferred
// from the parent project.
message TableReference {
// Dataset ID of the table.
string dataset_id = 1;
// Name of the table.
string table_id = 2;
}
// Message defining a field of a BigQuery table.
message BigQueryField {
// Source table of the field.
BigQueryTable table = 1;
// Designated field in the BigQuery table.
FieldId field = 2;
}
// An entity in a dataset is a field or set of fields that correspond to a
// single person. For example, in medical records the `EntityId` might be a
// patient identifier, or for financial records it might be an account
// identifier. This message is used when generalizations or analysis must take
// into account that multiple rows correspond to the same entity.
message EntityId {
// Composite key indicating which field contains the entity identifier.
FieldId field = 1;
}
// Instructions regarding the table content being inspected.
message TableOptions {
// The columns that are the primary keys for table objects included in
// ContentItem. A copy of this cell's value will stored alongside alongside
// each finding so that the finding can be traced to the specific row it came
// from. No more than 3 may be provided.
repeated FieldId identifying_fields = 1;
}