All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opensearch.ingest.useragent.UserAgentParser Maven / Gradle / Ivy

The newest version!
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.ingest.useragent;

import org.opensearch.OpenSearchParseException;
import org.opensearch.common.xcontent.LoggingDeprecationHandler;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.core.xcontent.NamedXContentRegistry;
import org.opensearch.core.xcontent.XContentParser;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

final class UserAgentParser {

    private final UserAgentCache cache;
    private final List uaPatterns = new ArrayList<>();
    private final List osPatterns = new ArrayList<>();
    private final List devicePatterns = new ArrayList<>();
    private final String name;

    UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) {
        this.name = name;
        this.cache = cache;

        try {
            init(regexStream);
        } catch (IOException e) {
            throw new OpenSearchParseException("error parsing regular expression file", e);
        }
    }

    private void init(InputStream regexStream) throws IOException {
        // EMPTY is safe here because we don't use namedObject
        XContentParser yamlParser = XContentType.YAML.xContent()
            .createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, regexStream);

        XContentParser.Token token = yamlParser.nextToken();

        if (token == XContentParser.Token.START_OBJECT) {
            token = yamlParser.nextToken();

            for (; token != null; token = yamlParser.nextToken()) {
                if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("user_agent_parsers")) {
                    List> parserConfigurations = readParserConfigurations(yamlParser);

                    for (Map map : parserConfigurations) {
                        uaPatterns.add(
                            new UserAgentSubpattern(
                                compilePattern(map.get("regex"), map.get("regex_flag")),
                                map.get("family_replacement"),
                                map.get("v1_replacement"),
                                map.get("v2_replacement"),
                                map.get("v3_replacement"),
                                map.get("v4_replacement")
                            )
                        );
                    }
                } else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("os_parsers")) {
                    List> parserConfigurations = readParserConfigurations(yamlParser);

                    for (Map map : parserConfigurations) {
                        osPatterns.add(
                            new UserAgentSubpattern(
                                compilePattern(map.get("regex"), map.get("regex_flag")),
                                map.get("os_replacement"),
                                map.get("os_v1_replacement"),
                                map.get("os_v2_replacement"),
                                map.get("os_v3_replacement"),
                                map.get("os_v4_replacement")
                            )
                        );
                    }
                } else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("device_parsers")) {
                    List> parserConfigurations = readParserConfigurations(yamlParser);

                    for (Map map : parserConfigurations) {
                        devicePatterns.add(
                            new UserAgentSubpattern(
                                compilePattern(map.get("regex"), map.get("regex_flag")),
                                map.get("device_replacement"),
                                null,
                                null,
                                null,
                                null
                            )
                        );
                    }
                }
            }
        }

        if (uaPatterns.isEmpty() && osPatterns.isEmpty() && devicePatterns.isEmpty()) {
            throw new OpenSearchParseException("not a valid regular expression file");
        }
    }

    private Pattern compilePattern(String regex, String regex_flag) {
        // Only flag present in the current default regexes.yaml
        if (regex_flag != null && regex_flag.equals("i")) {
            return Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
        } else {
            return Pattern.compile(regex);
        }
    }

    private List> readParserConfigurations(XContentParser yamlParser) throws IOException {
        List> patternList = new ArrayList<>();

        XContentParser.Token token = yamlParser.nextToken();
        if (token != XContentParser.Token.START_ARRAY) {
            throw new OpenSearchParseException("malformed regular expression file, should continue with 'array' after 'object'");
        }

        token = yamlParser.nextToken();
        if (token != XContentParser.Token.START_OBJECT) {
            throw new OpenSearchParseException("malformed regular expression file, expecting 'object'");
        }

        while (token == XContentParser.Token.START_OBJECT) {
            token = yamlParser.nextToken();

            if (token != XContentParser.Token.FIELD_NAME) {
                throw new OpenSearchParseException("malformed regular expression file, should continue with 'field_name' after 'array'");
            }

            Map regexMap = new HashMap<>();
            for (; token == XContentParser.Token.FIELD_NAME; token = yamlParser.nextToken()) {
                String fieldName = yamlParser.currentName();

                token = yamlParser.nextToken();
                String fieldValue = yamlParser.text();
                regexMap.put(fieldName, fieldValue);
            }

            patternList.add(regexMap);

            token = yamlParser.nextToken();
        }

        return patternList;
    }

    List getUaPatterns() {
        return uaPatterns;
    }

    List getOsPatterns() {
        return osPatterns;
    }

    List getDevicePatterns() {
        return devicePatterns;
    }

    String getName() {
        return name;
    }

    public Details parse(String agentString) {
        Details details = cache.get(name, agentString);

        if (details == null) {
            VersionedName userAgent = findMatch(uaPatterns, agentString);
            VersionedName operatingSystem = findMatch(osPatterns, agentString);
            VersionedName device = findMatch(devicePatterns, agentString);

            details = new Details(userAgent, operatingSystem, device);

            cache.put(name, agentString, details);
        }

        return details;
    }

    private VersionedName findMatch(List possiblePatterns, String agentString) {
        VersionedName name;
        for (UserAgentSubpattern pattern : possiblePatterns) {
            name = pattern.match(agentString);

            if (name != null) {
                return name;
            }
        }

        return null;
    }

    static final class Details {
        public final VersionedName userAgent;
        public final VersionedName operatingSystem;
        public final VersionedName device;

        Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) {
            this.userAgent = userAgent;
            this.operatingSystem = operatingSystem;
            this.device = device;
        }
    }

    static final class VersionedName {
        public final String name;
        public final String major;
        public final String minor;
        public final String patch;
        public final String build;

        VersionedName(String name, String major, String minor, String patch, String build) {
            this.name = name;
            this.major = major;
            this.minor = minor;
            this.patch = patch;
            this.build = build;
        }
    }

    /**
     * One of: user agent, operating system, device
     */
    static final class UserAgentSubpattern {
        private final Pattern pattern;
        private final String nameReplacement, v1Replacement, v2Replacement, v3Replacement, v4Replacement;

        UserAgentSubpattern(
            Pattern pattern,
            String nameReplacement,
            String v1Replacement,
            String v2Replacement,
            String v3Replacement,
            String v4Replacement
        ) {
            this.pattern = pattern;
            this.nameReplacement = nameReplacement;
            this.v1Replacement = v1Replacement;
            this.v2Replacement = v2Replacement;
            this.v3Replacement = v3Replacement;
            this.v4Replacement = v4Replacement;
        }

        public VersionedName match(String agentString) {
            String name = null, major = null, minor = null, patch = null, build = null;
            Matcher matcher = pattern.matcher(agentString);

            if (!matcher.find()) {
                return null;
            }

            int groupCount = matcher.groupCount();

            if (nameReplacement != null) {
                if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) {
                    name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)));
                } else {
                    name = nameReplacement;
                }
            } else if (groupCount >= 1) {
                name = matcher.group(1);
            }

            if (v1Replacement != null) {
                major = v1Replacement;
            } else if (groupCount >= 2) {
                major = matcher.group(2);
            }

            if (v2Replacement != null) {
                minor = v2Replacement;
            } else if (groupCount >= 3) {
                minor = matcher.group(3);
            }

            if (v3Replacement != null) {
                patch = v3Replacement;
            } else if (groupCount >= 4) {
                patch = matcher.group(4);
            }

            if (v4Replacement != null) {
                build = v4Replacement;
            } else if (groupCount >= 5) {
                build = matcher.group(5);
            }

            return name == null ? null : new VersionedName(name, major, minor, patch, build);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy