All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.google.sheets.SheetsClient Maven / Gradle / Ivy

There is a newer version: 468
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.google.sheets;

import com.google.api.client.auth.oauth2.Credential;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.http.HttpRequestInitializer;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.sheets.v4.Sheets;
import com.google.api.services.sheets.v4.SheetsScopes;
import com.google.api.services.sheets.v4.model.ValueRange;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.UncheckedExecutionException;
import com.google.inject.Inject;
import io.airlift.log.Logger;
import io.trino.cache.EvictableCacheBuilder;
import io.trino.cache.NonEvictableLoadingCache;
import io.trino.spi.TrinoException;
import io.trino.spi.type.VarcharType;

import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.util.Base64;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

import static com.google.api.client.googleapis.javanet.GoogleNetHttpTransport.newTrustedTransport;
import static com.google.common.base.Throwables.throwIfInstanceOf;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.cache.SafeCaches.buildNonEvictableCache;
import static io.trino.plugin.google.sheets.SheetsErrorCode.SHEETS_BAD_CREDENTIALS_ERROR;
import static io.trino.plugin.google.sheets.SheetsErrorCode.SHEETS_INSERT_ERROR;
import static io.trino.plugin.google.sheets.SheetsErrorCode.SHEETS_METASTORE_ERROR;
import static io.trino.plugin.google.sheets.SheetsErrorCode.SHEETS_TABLE_LOAD_ERROR;
import static io.trino.plugin.google.sheets.SheetsErrorCode.SHEETS_UNKNOWN_TABLE_ERROR;
import static java.lang.Math.toIntExact;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.MILLISECONDS;

public class SheetsClient
{
    public static final String DEFAULT_RANGE = "$1:$10000";
    public static final String RANGE_SEPARATOR = "#";
    private static final Logger log = Logger.get(SheetsClient.class);

    private static final String APPLICATION_NAME = "trino google sheets integration";
    private static final JsonFactory JSON_FACTORY = JacksonFactory.getDefaultInstance();

    private static final List SCOPES = ImmutableList.of(SheetsScopes.SPREADSHEETS);

    private static final String INSERT_VALUE_OPTION = "RAW";
    private static final String INSERT_DATA_OPTION = "INSERT_ROWS";

    private final NonEvictableLoadingCache> tableSheetMappingCache;
    private final LoadingCache>> sheetDataCache;

    private final Optional metadataSheetId;

    private final Sheets sheetsService;

    @Inject
    public SheetsClient(SheetsConfig config)
    {
        this.metadataSheetId = config.getMetadataSheetId();

        try {
            this.sheetsService = new Sheets.Builder(newTrustedTransport(), JSON_FACTORY, setTimeout(getCredentials(config), config)).setApplicationName(APPLICATION_NAME).build();
        }
        catch (GeneralSecurityException | IOException e) {
            throw new TrinoException(SHEETS_BAD_CREDENTIALS_ERROR, e);
        }
        long expiresAfterWriteMillis = config.getSheetsDataExpireAfterWrite().toMillis();
        long maxCacheSize = config.getSheetsDataMaxCacheSize();

        this.tableSheetMappingCache = buildNonEvictableCache(
                CacheBuilder.newBuilder().expireAfterWrite(expiresAfterWriteMillis, MILLISECONDS).maximumSize(maxCacheSize),
                new CacheLoader<>()
                {
                    @Override
                    public Optional load(String tableName)
                    {
                        return getSheetExpressionForTable(tableName);
                    }

                    @Override
                    public Map> loadAll(Iterable tableList)
                    {
                        return getAllTableSheetExpressionMapping();
                    }
                });

        this.sheetDataCache = EvictableCacheBuilder.newBuilder()
                .expireAfterWrite(expiresAfterWriteMillis, MILLISECONDS)
                .maximumSize(maxCacheSize)
                .build(CacheLoader.from(this::readAllValuesFromSheetExpression));
    }

    public Optional getTable(SheetsConnectorTableHandle tableHandle)
    {
        if (tableHandle instanceof SheetsNamedTableHandle namedTableHandle) {
            return getTable(namedTableHandle.tableName());
        }
        if (tableHandle instanceof SheetsSheetTableHandle sheetTableHandle) {
            return getTableFromValues(readAllValuesFromSheet(sheetTableHandle.getSheetExpression()));
        }
        throw new IllegalStateException("Found unexpected table handle type " + tableHandle);
    }

    public Optional getTable(String tableName)
    {
        List> values = readAllValues(tableName);
        return getTableFromValues(values);
    }

    public Optional getTableFromValues(List> values)
    {
        List> stringValues = convertToStringValues(values);
        if (stringValues.size() > 0) {
            ImmutableList.Builder columns = ImmutableList.builder();
            Set columnNames = new HashSet<>();
            // Assuming 1st line is always header
            List header = stringValues.get(0);
            int count = 0;
            for (int i = 0; i < header.size(); i++) {
                String columnValue = header.get(i).toLowerCase(ENGLISH);
                // when empty or repeated column header, adding a placeholder column name
                if (columnValue.isEmpty() || columnNames.contains(columnValue)) {
                    columnValue = "column_" + ++count;
                }
                columnNames.add(columnValue);
                columns.add(new SheetsColumnHandle(columnValue, VarcharType.VARCHAR, i));
            }
            List> dataValues = stringValues.subList(1, values.size()); // removing header info
            return Optional.of(new SheetsTable(columns.build(), dataValues));
        }
        return Optional.empty();
    }

    public Set getTableNames()
    {
        if (metadataSheetId.isEmpty()) {
            return ImmutableSet.of();
        }
        ImmutableSet.Builder tables = ImmutableSet.builder();
        try {
            List> tableMetadata = sheetDataCache.getUnchecked(metadataSheetId.get());
            for (int i = 1; i < tableMetadata.size(); i++) {
                if (tableMetadata.get(i).size() > 0) {
                    tables.add(String.valueOf(tableMetadata.get(i).get(0)));
                }
            }
            return tables.build();
        }
        catch (UncheckedExecutionException e) {
            throwIfInstanceOf(e.getCause(), TrinoException.class);
            throw new TrinoException(SHEETS_METASTORE_ERROR, e);
        }
    }

    public List> readAllValues(String tableName)
    {
        try {
            String sheetExpression = getCachedSheetExpressionForTable(tableName);
            return readAllValuesFromSheet(sheetExpression);
        }
        catch (UncheckedExecutionException e) {
            throwIfInstanceOf(e.getCause(), TrinoException.class);
            throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "Error loading data for table: " + tableName, e);
        }
    }

    public List> readAllValuesFromSheet(String sheetExpression)
    {
        try {
            return sheetDataCache.getUnchecked(sheetExpression);
        }
        catch (UncheckedExecutionException e) {
            throwIfInstanceOf(e.getCause(), TrinoException.class);
            throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "Error loading data for sheet: " + sheetExpression, e);
        }
    }

    public void insertIntoSheet(String sheetExpression, List> rows)
    {
        ValueRange body = new ValueRange().setValues(rows);
        SheetsSheetIdAndRange sheetIdAndRange = new SheetsSheetIdAndRange(sheetExpression);
        try {
            sheetsService.spreadsheets().values().append(sheetIdAndRange.getSheetId(), sheetIdAndRange.getRange(), body)
                    .setValueInputOption(INSERT_VALUE_OPTION)
                    .setInsertDataOption(INSERT_DATA_OPTION)
                    .execute();
        }
        catch (IOException e) {
            throw new TrinoException(SHEETS_INSERT_ERROR, "Error inserting data to sheet: ", e);
        }

        // Flush the cache contents for the table that was written to.
        // This is a best-effort solution, since the Google Sheets API seems to be eventually consistent.
        // If the table written to will be queried directly afterward the inserts might not have been propagated yet.
        // and the users needs to wait till the cached version alters out.
        sheetDataCache.invalidate(sheetExpression);
    }

    public static List> convertToStringValues(List> values)
    {
        return values.stream()
                .map(columns -> columns.stream().map(String::valueOf).collect(toImmutableList()))
                .collect(toImmutableList());
    }

    private Optional getSheetExpressionForTable(String tableName)
    {
        Map> tableSheetMap = getAllTableSheetExpressionMapping();
        if (!tableSheetMap.containsKey(tableName)) {
            return Optional.empty();
        }
        return tableSheetMap.get(tableName);
    }

    public String getCachedSheetExpressionForTable(String tableName)
    {
        return tableSheetMappingCache.getUnchecked(tableName)
                .orElseThrow(() -> new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Sheet expression not found for table " + tableName));
    }

    private Map> getAllTableSheetExpressionMapping()
    {
        if (metadataSheetId.isEmpty()) {
            return ImmutableMap.of();
        }
        ImmutableMap.Builder> tableSheetMap = ImmutableMap.builder();
        List> data = readAllValuesFromSheetExpression(metadataSheetId.get());
        // first line is assumed to be sheet header
        for (int i = 1; i < data.size(); i++) {
            if (data.get(i).size() >= 2) {
                String tableId = String.valueOf(data.get(i).get(0));
                String sheetId = String.valueOf(data.get(i).get(1));
                tableSheetMap.put(tableId.toLowerCase(Locale.ENGLISH), Optional.of(sheetId));
            }
        }
        return tableSheetMap.buildOrThrow();
    }

    private static Credential getCredentials(SheetsConfig sheetsConfig)
    {
        if (sheetsConfig.getCredentialsFilePath().isPresent()) {
            try (InputStream in = new FileInputStream(sheetsConfig.getCredentialsFilePath().get())) {
                return credentialFromStream(in);
            }
            catch (IOException e) {
                throw new TrinoException(SHEETS_BAD_CREDENTIALS_ERROR, e);
            }
        }

        if (sheetsConfig.getCredentialsKey().isPresent()) {
            try {
                return credentialFromStream(
                                new ByteArrayInputStream(Base64.getDecoder().decode(sheetsConfig.getCredentialsKey().get())));
            }
            catch (IOException e) {
                throw new TrinoException(SHEETS_BAD_CREDENTIALS_ERROR, e);
            }
        }

        throw new TrinoException(SHEETS_BAD_CREDENTIALS_ERROR, "No sheets credentials were provided");
    }

    private static Credential credentialFromStream(InputStream inputStream)
            throws IOException
    {
        return GoogleCredential.fromStream(inputStream).createScoped(SCOPES);
    }

    private List> readAllValuesFromSheetExpression(String sheetExpression)
    {
        try {
            // by default loading up to 10k rows from the first tab of the sheet
            String defaultRange = DEFAULT_RANGE;
            String[] tableOptions = sheetExpression.split(RANGE_SEPARATOR);
            String sheetId = tableOptions[0];
            if (tableOptions.length > 1) {
                defaultRange = tableOptions[1];
            }
            log.debug("Accessing sheet id [%s] with range [%s]", sheetId, defaultRange);
            List> values = sheetsService.spreadsheets().values().get(sheetId, defaultRange).execute().getValues();
            if (values == null) {
                throw new TrinoException(SHEETS_TABLE_LOAD_ERROR, "No non-empty cells found in sheet: " + sheetExpression);
            }
            return values;
        }
        catch (IOException e) {
            // TODO: improve error to a {Table|Sheet}NotFoundException
            // is a backwards incompatible error code change from SHEETS_UNKNOWN_TABLE_ERROR -> NOT_FOUND
            throw new TrinoException(SHEETS_UNKNOWN_TABLE_ERROR, "Failed reading data from sheet: " + sheetExpression, e);
        }
    }

    private HttpRequestInitializer setTimeout(HttpRequestInitializer requestInitializer, SheetsConfig config)
    {
        requireNonNull(config.getConnectionTimeout(), "connectionTimeout is null");
        requireNonNull(config.getReadTimeout(), "readTimeout is null");
        requireNonNull(config.getWriteTimeout(), "writeTimeout is null");

        return httpRequest -> {
            requestInitializer.initialize(httpRequest);
            httpRequest.setConnectTimeout(toIntExact(config.getConnectionTimeout().toMillis()));
            httpRequest.setReadTimeout(toIntExact(config.getReadTimeout().toMillis()));
            httpRequest.setWriteTimeout(toIntExact(config.getWriteTimeout().toMillis()));
        };
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy