All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nl.basjes.parse.useragent.trino.ParseUserAgentFunctionClientHints Maven / Gradle / Ivy

/*
 * Yet Another UserAgent Analyzer
 * Copyright (C) 2013-2024 Niels Basjes
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package nl.basjes.parse.useragent.trino;

import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.spi.block.Block;
import io.trino.spi.block.BufferedMapValueBuilder;
import io.trino.spi.block.SqlMap;
import io.trino.spi.block.VariableWidthBlock;
import io.trino.spi.function.Description;
import io.trino.spi.function.ScalarFunction;
import io.trino.spi.function.SqlType;
import io.trino.spi.function.TypeParameter;
import io.trino.spi.type.MapType;
import io.trino.spi.type.Type;
import nl.basjes.parse.useragent.UserAgent;
import nl.basjes.parse.useragent.UserAgentAnalyzer;
import nl.basjes.parse.useragent.Version;

import java.util.Map;
import java.util.TreeMap;

import static io.trino.spi.type.VarcharType.VARCHAR;
import static nl.basjes.parse.useragent.UserAgent.USERAGENT_HEADER;

public final class ParseUserAgentFunctionClientHints {

    private final BufferedMapValueBuilder mapValueBuilder;

    public ParseUserAgentFunctionClientHints(@TypeParameter("map(varchar,varchar)") Type mapType) {
        mapValueBuilder = BufferedMapValueBuilder.createBuffered((MapType) mapType);
    }

    // NOTE: We currently cannot make an instance with only the wanted fields.
    //       We only know the required parameters the moment the call is done.
    //       At that point it is too late to create an optimized instance.
    private static ThreadLocal threadLocalUserAgentAnalyzer =
        ThreadLocal.withInitial(() ->
            UserAgentAnalyzer
                .newBuilder()
                .hideMatcherLoadStats()
                .withCache(10000)
                .immediateInitialization()
                .build());


    private static Slice getSlice(Block elements, int i) {
        return ((VariableWidthBlock) elements.getUnderlyingValueBlock()).getSlice(i);
    }

    @ScalarFunction("parse_user_agent")
    @Description("Tries to parse and analyze the provided useragent string and extract as many attributes " +
        "as possible. Uses Yauaa (Yet Another UserAgent Analyzer) version " + Version.PROJECT_VERSION + ". " +
        "See https://yauaa.basjes.nl/udf/trino/ for documentation.")
    @SqlType("map(varchar, varchar)")
    public SqlMap parseUserAgent(@SqlType("array(varchar)") Block input) throws IllegalArgumentException {
        UserAgentAnalyzer userAgentAnalyzer = threadLocalUserAgentAnalyzer.get();

        Map requestHeaders = new TreeMap<>();
        int i = 0;

        int inputLength = input.getPositionCount();
        while (i < inputLength) {
            Slice parameterSlice = getSlice(input, i);
            if (parameterSlice == null) {
                throw new IllegalArgumentException("Null argument provided to ParseUserAgent.");
            }
            String parameter = parameterSlice.toStringUtf8();
            if (parameter.isEmpty()) {
                throw new IllegalArgumentException("Empty argument provided to ParseUserAgent.");
            }
            if (userAgentAnalyzer.supportedClientHintHeaders().stream().anyMatch(parameter::equalsIgnoreCase) ||
                USERAGENT_HEADER.equalsIgnoreCase(parameter)) {
                String value;
                if (i + 1 >= inputLength) {
                    throw new IllegalArgumentException("Invalid last element in argument list (was a header name which requires a value to follow)");
                } else {
                    value = getSlice(input, i+1).toStringUtf8();
                    i++;
                }
                requestHeaders.put(parameter, value);
                i++;
                continue;
            }
            if (i == 0) {
                requestHeaders.put(USERAGENT_HEADER, getSlice(input, i).toStringUtf8());
                i++;
                continue;
            }
            throw new IllegalArgumentException("Bad argument list for ParseUserAgent: \"" + parameter + "\"");
        }

        UserAgent userAgent = userAgentAnalyzer.parse(requestHeaders);

        Map resultMap = userAgent.toMap(userAgentAnalyzer.getAllPossibleFieldNamesSorted());

        return mapValueBuilder.build(resultMap.size(), (keyBuilder, valueBuilder) -> {
            resultMap.forEach((key, value) -> {
                VARCHAR.writeSlice(keyBuilder, Slices.utf8Slice(key));
                VARCHAR.writeSlice(valueBuilder, Slices.utf8Slice(value));
            });
        });
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy