All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.druid.query.lookup.namespace.UriExtractionNamespace Maven / Gradle / Ivy

There is a newer version: 0.12.3
Show newest version
/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.query.lookup.namespace;

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.druid.guice.annotations.Json;
import io.druid.java.util.common.IAE;
import io.druid.java.util.common.UOE;
import io.druid.java.util.common.jackson.JacksonUtils;
import io.druid.java.util.common.parsers.CSVParser;
import io.druid.java.util.common.parsers.DelimitedParser;
import io.druid.java.util.common.parsers.JSONPathFieldSpec;
import io.druid.java.util.common.parsers.JSONPathFieldType;
import io.druid.java.util.common.parsers.JSONPathParser;
import io.druid.java.util.common.parsers.JSONPathSpec;
import io.druid.java.util.common.parsers.Parser;
import org.joda.time.Period;

import javax.annotation.Nullable;
import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 *
 */
@JsonTypeName("uri")
public class UriExtractionNamespace implements ExtractionNamespace
{
  @JsonProperty
  private final URI uri;
  @JsonProperty
  private final URI uriPrefix;
  @JsonProperty
  private final FlatDataParser namespaceParseSpec;
  @JsonProperty
  private final String fileRegex;
  @JsonProperty
  private final Period pollPeriod;

  @JsonCreator
  public UriExtractionNamespace(
      @JsonProperty(value = "uri", required = false)
          URI uri,
      @JsonProperty(value = "uriPrefix", required = false)
          URI uriPrefix,
      @JsonProperty(value = "fileRegex", required = false)
          String fileRegex,
      @JsonProperty(value = "namespaceParseSpec", required = true)
          FlatDataParser namespaceParseSpec,
      @Min(0) @Nullable @JsonProperty(value = "pollPeriod", required = false)
          Period pollPeriod,
      @Deprecated
      @JsonProperty(value = "versionRegex", required = false)
          String versionRegex
  )
  {
    this.uri = uri;
    this.uriPrefix = uriPrefix;
    if ((uri != null) == (uriPrefix != null)) {
      throw new IAE("Either uri xor uriPrefix required");
    }
    this.namespaceParseSpec = Preconditions.checkNotNull(namespaceParseSpec, "namespaceParseSpec");
    this.pollPeriod = pollPeriod == null ? Period.ZERO : pollPeriod;
    this.fileRegex = fileRegex == null ? versionRegex : fileRegex;
    if (fileRegex != null && versionRegex != null) {
      throw new IAE("Cannot specify both versionRegex and fileRegex. versionRegex is deprecated");
    }

    if (uri != null && this.fileRegex != null) {
      throw new IAE("Cannot define both uri and fileRegex");
    }

    if (this.fileRegex != null) {
      try {
        Pattern.compile(this.fileRegex);
      }
      catch (PatternSyntaxException ex) {
        throw new IAE(ex, "Could not parse `fileRegex` [%s]", this.fileRegex);
      }
    }
  }

  public String getFileRegex()
  {
    return fileRegex;
  }

  public FlatDataParser getNamespaceParseSpec()
  {
    return this.namespaceParseSpec;
  }

  public URI getUri()
  {
    return uri;
  }

  public URI getUriPrefix()
  {
    return uriPrefix;
  }

  @Override
  public long getPollMs()
  {
    return pollPeriod.toStandardDuration().getMillis();
  }

  @Override
  public String toString()
  {
    return "UriExtractionNamespace{" +
           "uri=" + uri +
           ", uriPrefix=" + uriPrefix +
           ", namespaceParseSpec=" + namespaceParseSpec +
           ", fileRegex='" + fileRegex + '\'' +
           ", pollPeriod=" + pollPeriod +
           '}';
  }

  @Override
  public boolean equals(Object o)
  {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }

    UriExtractionNamespace that = (UriExtractionNamespace) o;

    if (getUri() != null ? !getUri().equals(that.getUri()) : that.getUri() != null) {
      return false;
    }
    if (getUriPrefix() != null ? !getUriPrefix().equals(that.getUriPrefix()) : that.getUriPrefix() != null) {
      return false;
    }
    if (!getNamespaceParseSpec().equals(that.getNamespaceParseSpec())) {
      return false;
    }
    if (getFileRegex() != null ? !getFileRegex().equals(that.getFileRegex()) : that.getFileRegex() != null) {
      return false;
    }
    return pollPeriod.equals(that.pollPeriod);

  }

  @Override
  public int hashCode()
  {
    int result = getUri() != null ? getUri().hashCode() : 0;
    result = 31 * result + (getUriPrefix() != null ? getUriPrefix().hashCode() : 0);
    result = 31 * result + getNamespaceParseSpec().hashCode();
    result = 31 * result + (getFileRegex() != null ? getFileRegex().hashCode() : 0);
    result = 31 * result + pollPeriod.hashCode();
    return result;
  }

  private static class DelegateParser implements Parser
  {
    private final Parser delegate;
    private final String key;
    private final String value;

    private DelegateParser(
        Parser delegate,
        @NotNull String key,
        @NotNull String value
    )
    {
      this.delegate = delegate;
      this.key = key;
      this.value = value;
    }

    @Override
    public Map parseToMap(String input)
    {
      final Map inner = delegate.parseToMap(input);
      final String k = Preconditions.checkNotNull(
          inner.get(key),
          "Key column [%s] missing data in line [%s]",
          key,
          input
      ).toString(); // Just in case is long
      final Object val = inner.get(value);
      if (val == null) {
        // Skip null or missing values, treat them as if there were no row at all.
        return ImmutableMap.of();
      }
      return ImmutableMap.of(k, val.toString());
    }

    @Override
    public void setFieldNames(Iterable fieldNames)
    {
      delegate.setFieldNames(fieldNames);
    }

    @Override
    public List getFieldNames()
    {
      return delegate.getFieldNames();
    }
  }

  @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "format")
  @JsonSubTypes(value = {
      @JsonSubTypes.Type(name = "csv", value = CSVFlatDataParser.class),
      @JsonSubTypes.Type(name = "tsv", value = TSVFlatDataParser.class),
      @JsonSubTypes.Type(name = "customJson", value = JSONFlatDataParser.class),
      @JsonSubTypes.Type(name = "simpleJson", value = ObjectMapperFlatDataParser.class)
  })
  public interface FlatDataParser
  {
    Parser getParser();
  }

  @JsonTypeName("csv")
  public static class CSVFlatDataParser implements FlatDataParser
  {
    private final Parser parser;
    private final List columns;
    private final String keyColumn;
    private final String valueColumn;

    @JsonCreator
    public CSVFlatDataParser(
        @JsonProperty("columns") List columns,
        @JsonProperty("keyColumn") final String keyColumn,
        @JsonProperty("valueColumn") final String valueColumn,
        @JsonProperty("hasHeaderRow") boolean hasHeaderRow,
        @JsonProperty("skipHeaderRows") int skipHeaderRows
    )
    {
      Preconditions.checkArgument(
          Preconditions.checkNotNull(columns, "`columns` list required").size() > 1,
          "Must specify more than one column to have a key value pair"
      );

      Preconditions.checkArgument(
          !(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
          "Must specify both `keyColumn` and `valueColumn` or neither `keyColumn` nor `valueColumn`"
      );
      this.columns = columns;
      this.keyColumn = Strings.isNullOrEmpty(keyColumn) ? columns.get(0) : keyColumn;
      this.valueColumn = Strings.isNullOrEmpty(valueColumn) ? columns.get(1) : valueColumn;
      Preconditions.checkArgument(
          columns.contains(this.keyColumn),
          "Column [%s] not found int columns: %s",
          this.keyColumn,
          Arrays.toString(columns.toArray())
      );
      Preconditions.checkArgument(
          columns.contains(this.valueColumn),
          "Column [%s] not found int columns: %s",
          this.valueColumn,
          Arrays.toString(columns.toArray())
      );

      this.parser = new DelegateParser(
          new CSVParser(null, columns, hasHeaderRow, skipHeaderRows),
          this.keyColumn,
          this.valueColumn
      );
    }

    @VisibleForTesting
    CSVFlatDataParser(
        List columns,
        String keyColumn,
        String valueColumn
    )
    {
      this(columns, keyColumn, valueColumn, false, 0);
    }

    @JsonProperty
    public List getColumns()
    {
      return columns;
    }

    @JsonProperty
    public String getKeyColumn()
    {
      return this.keyColumn;
    }

    @JsonProperty
    public String getValueColumn()
    {
      return this.valueColumn;
    }

    @Override
    public Parser getParser()
    {
      return parser;
    }

    @Override
    public boolean equals(final Object o)
    {
      if (this == o) {
        return true;
      }
      if (o == null || getClass() != o.getClass()) {
        return false;
      }
      final CSVFlatDataParser that = (CSVFlatDataParser) o;
      return Objects.equals(columns, that.columns) &&
             Objects.equals(keyColumn, that.keyColumn) &&
             Objects.equals(valueColumn, that.valueColumn);
    }

    @Override
    public int hashCode()
    {
      return Objects.hash(columns, keyColumn, valueColumn);
    }

    @Override
    public String toString()
    {
      return "CSVFlatDataParser{" +
             "columns=" + columns +
             ", keyColumn='" + keyColumn + '\'' +
             ", valueColumn='" + valueColumn + '\'' +
             '}';
    }
  }

  @JsonTypeName("tsv")
  public static class TSVFlatDataParser implements FlatDataParser
  {
    private final Parser parser;
    private final List columns;
    private final String delimiter;
    private final String listDelimiter;
    private final String keyColumn;
    private final String valueColumn;

    @JsonCreator
    public TSVFlatDataParser(
        @JsonProperty("columns") List columns,
        @JsonProperty("delimiter") String delimiter,
        @JsonProperty("listDelimiter") String listDelimiter,
        @JsonProperty("keyColumn") final String keyColumn,
        @JsonProperty("valueColumn") final String valueColumn,
        @JsonProperty("hasHeaderRow") boolean hasHeaderRow,
        @JsonProperty("skipHeaderRows") int skipHeaderRows
    )
    {
      Preconditions.checkArgument(
          Preconditions.checkNotNull(columns, "`columns` list required").size() > 1,
          "Must specify more than one column to have a key value pair"
      );
      final DelimitedParser delegate = new DelimitedParser(
          Strings.emptyToNull(delimiter),
          Strings.emptyToNull(listDelimiter),
          hasHeaderRow,
          skipHeaderRows
      );
      Preconditions.checkArgument(
          !(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
          "Must specify both `keyColumn` and `valueColumn` or neither `keyColumn` nor `valueColumn`"
      );
      delegate.setFieldNames(columns);
      this.columns = columns;
      this.delimiter = delimiter;
      this.listDelimiter = listDelimiter;
      this.keyColumn = Strings.isNullOrEmpty(keyColumn) ? columns.get(0) : keyColumn;
      this.valueColumn = Strings.isNullOrEmpty(valueColumn) ? columns.get(1) : valueColumn;
      Preconditions.checkArgument(
          columns.contains(this.keyColumn),
          "Column [%s] not found int columns: %s",
          this.keyColumn,
          Arrays.toString(columns.toArray())
      );
      Preconditions.checkArgument(
          columns.contains(this.valueColumn),
          "Column [%s] not found int columns: %s",
          this.valueColumn,
          Arrays.toString(columns.toArray())
      );

      this.parser = new DelegateParser(delegate, this.keyColumn, this.valueColumn);
    }

    @VisibleForTesting
    TSVFlatDataParser(
        List columns,
        String delimiter,
        String listDelimiter,
        String keyColumn,
        String valueColumn
    )
    {
      this(columns, delimiter, listDelimiter, keyColumn, valueColumn, false, 0);
    }

    @JsonProperty
    public List getColumns()
    {
      return columns;
    }

    @JsonProperty
    public String getKeyColumn()
    {
      return this.keyColumn;
    }

    @JsonProperty
    public String getValueColumn()
    {
      return this.valueColumn;
    }

    @JsonProperty
    public String getListDelimiter()
    {
      return listDelimiter;
    }

    @JsonProperty
    public String getDelimiter()
    {
      return delimiter;
    }

    @Override
    public Parser getParser()
    {
      return parser;
    }

    @Override
    public boolean equals(final Object o)
    {
      if (this == o) {
        return true;
      }
      if (o == null || getClass() != o.getClass()) {
        return false;
      }
      final TSVFlatDataParser that = (TSVFlatDataParser) o;
      return Objects.equals(columns, that.columns) &&
             Objects.equals(delimiter, that.delimiter) &&
             Objects.equals(listDelimiter, that.listDelimiter) &&
             Objects.equals(keyColumn, that.keyColumn) &&
             Objects.equals(valueColumn, that.valueColumn);
    }

    @Override
    public int hashCode()
    {
      return Objects.hash(columns, delimiter, listDelimiter, keyColumn, valueColumn);
    }

    @Override
    public String toString()
    {
      return "TSVFlatDataParser{" +
             "columns=" + columns +
             ", delimiter='" + delimiter + '\'' +
             ", listDelimiter='" + listDelimiter + '\'' +
             ", keyColumn='" + keyColumn + '\'' +
             ", valueColumn='" + valueColumn + '\'' +
             '}';
    }
  }

  @JsonTypeName("customJson")
  public static class JSONFlatDataParser implements FlatDataParser
  {
    private final Parser parser;
    private final String keyFieldName;
    private final String valueFieldName;

    @JsonCreator
    public JSONFlatDataParser(
        @JacksonInject @Json ObjectMapper jsonMapper,
        @JsonProperty("keyFieldName") final String keyFieldName,
        @JsonProperty("valueFieldName") final String valueFieldName
    )
    {
      Preconditions.checkArgument(!Strings.isNullOrEmpty(keyFieldName), "[keyFieldName] cannot be empty");
      Preconditions.checkArgument(!Strings.isNullOrEmpty(valueFieldName), "[valueFieldName] cannot be empty");
      this.keyFieldName = keyFieldName;
      this.valueFieldName = valueFieldName;

      // Copy jsonMapper; don't want to share canonicalization tables, etc., with the global ObjectMapper.
      this.parser = new DelegateParser(
          new JSONPathParser(
              new JSONPathSpec(
                  false,
                  ImmutableList.of(
                      new JSONPathFieldSpec(JSONPathFieldType.ROOT, keyFieldName, keyFieldName),
                      new JSONPathFieldSpec(JSONPathFieldType.ROOT, valueFieldName, valueFieldName)
                  )
              ),
              jsonMapper.copy()
          ),
          keyFieldName,
          valueFieldName
      );
    }

    @JsonProperty
    public String getKeyFieldName()
    {
      return this.keyFieldName;
    }

    @JsonProperty
    public String getValueFieldName()
    {
      return this.valueFieldName;
    }

    @Override
    public Parser getParser()
    {
      return this.parser;
    }

    @Override
    public boolean equals(final Object o)
    {
      if (this == o) {
        return true;
      }
      if (o == null || getClass() != o.getClass()) {
        return false;
      }
      final JSONFlatDataParser that = (JSONFlatDataParser) o;
      return Objects.equals(keyFieldName, that.keyFieldName) &&
             Objects.equals(valueFieldName, that.valueFieldName);
    }

    @Override
    public int hashCode()
    {
      return Objects.hash(keyFieldName, valueFieldName);
    }

    @Override
    public String toString()
    {
      return "JSONFlatDataParser{" +
             "keyFieldName='" + keyFieldName + '\'' +
             ", valueFieldName='" + valueFieldName + '\'' +
             '}';
    }
  }

  @JsonTypeName("simpleJson")
  public static class ObjectMapperFlatDataParser implements FlatDataParser
  {
    private final Parser parser;

    @JsonCreator
    public ObjectMapperFlatDataParser(
        final @JacksonInject @Json ObjectMapper jsonMapper
    )
    {
      // There's no point canonicalizing field names, we expect them to all be unique.
      final JsonFactory jsonFactory = jsonMapper.getFactory().copy();
      jsonFactory.configure(JsonFactory.Feature.CANONICALIZE_FIELD_NAMES, false);

      parser = new Parser()
      {
        @Override
        public Map parseToMap(String input)
        {
          try {
            return jsonFactory.createParser(input).readValueAs(JacksonUtils.TYPE_REFERENCE_MAP_STRING_STRING);
          }
          catch (IOException e) {
            throw Throwables.propagate(e);
          }
        }

        @Override
        public void setFieldNames(Iterable fieldNames)
        {
          throw new UOE("No field names available");
        }

        @Override
        public List getFieldNames()
        {
          throw new UOE("No field names available");
        }
      };
    }

    @Override
    public Parser getParser()
    {
      return parser;
    }

    @Override
    public boolean equals(Object o)
    {
      if (this == o) {
        return true;
      }
      if (o == null || getClass() != o.getClass()) {
        return false;
      }

      return true;
    }

    @Override
    public int hashCode()
    {
      return 0;
    }

    @Override
    public String toString()
    {
      return "ObjectMapperFlatDataParser{}";
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy