All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.ingest.useragent.UserAgentParser Maven / Gradle / Ivy

There is a newer version: 7.10.2
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.ingest.useragent;

import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

final class UserAgentParser {

    private final UserAgentCache cache;
    private final List uaPatterns = new ArrayList<>();
    private final List osPatterns = new ArrayList<>();
    private final List devicePatterns = new ArrayList<>();
    private final String name;

    UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) {
        this.name = name;
        this.cache = cache;

        try {
            init(regexStream);
        } catch (IOException e) {
            throw new ElasticsearchParseException("error parsing regular expression file", e);
        }
    }

    private void init(InputStream regexStream) throws IOException {
        // EMPTY is safe here because we don't use namedObject
        XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(NamedXContentRegistry.EMPTY,
            LoggingDeprecationHandler.INSTANCE, regexStream);

        XContentParser.Token token = yamlParser.nextToken();

        if (token == XContentParser.Token.START_OBJECT) {
            token = yamlParser.nextToken();

            for (; token != null; token = yamlParser.nextToken()) {
                if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("user_agent_parsers")) {
                    List> parserConfigurations = readParserConfigurations(yamlParser);

                    for (Map map : parserConfigurations) {
                        uaPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
                                map.get("family_replacement"), map.get("v1_replacement"), map.get("v2_replacement"),
                                map.get("v3_replacement"), map.get("v4_replacement")));
                    }
                }
                else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("os_parsers")) {
                    List> parserConfigurations = readParserConfigurations(yamlParser);

                    for (Map map : parserConfigurations) {
                        osPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
                                map.get("os_replacement"), map.get("os_v1_replacement"), map.get("os_v2_replacement"),
                                map.get("os_v3_replacement"), map.get("os_v4_replacement")));
                    }
                }
                else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("device_parsers")) {
                    List> parserConfigurations = readParserConfigurations(yamlParser);

                    for (Map map : parserConfigurations) {
                        devicePatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
                                map.get("device_replacement"), null, null, null, null));
                    }
                }
            }
        }

        if (uaPatterns.isEmpty() && osPatterns.isEmpty() && devicePatterns.isEmpty()) {
            throw new ElasticsearchParseException("not a valid regular expression file");
        }
    }

    private Pattern compilePattern(String regex, String regex_flag) {
        // Only flag present in the current default regexes.yaml
        if (regex_flag != null && regex_flag.equals("i")) {
            return Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
        } else {
            return Pattern.compile(regex);
        }
    }

    private List> readParserConfigurations(XContentParser yamlParser) throws IOException {
        List > patternList = new ArrayList<>();

        XContentParser.Token token = yamlParser.nextToken();
        if (token != XContentParser.Token.START_ARRAY) {
            throw new ElasticsearchParseException("malformed regular expression file, should continue with 'array' after 'object'");
        }

        token = yamlParser.nextToken();
        if (token != XContentParser.Token.START_OBJECT) {
            throw new ElasticsearchParseException("malformed regular expression file, expecting 'object'");
        }

        while (token == XContentParser.Token.START_OBJECT) {
            token = yamlParser.nextToken();

            if (token != XContentParser.Token.FIELD_NAME) {
                throw new ElasticsearchParseException("malformed regular expression file, should continue with 'field_name' after 'array'");
            }

            Map regexMap = new HashMap<>();
            for (; token == XContentParser.Token.FIELD_NAME; token = yamlParser.nextToken()) {
                String fieldName = yamlParser.currentName();

                token = yamlParser.nextToken();
                String fieldValue = yamlParser.text();
                regexMap.put(fieldName, fieldValue);
            }

            patternList.add(regexMap);

            token = yamlParser.nextToken();
        }

        return patternList;
    }

    List getUaPatterns() {
        return uaPatterns;
    }

    List getOsPatterns() {
        return osPatterns;
    }

    List getDevicePatterns() {
        return devicePatterns;
    }

    String getName() {
        return name;
    }

    public Details parse(String agentString) {
        Details details = cache.get(name, agentString);

        if (details == null) {
            VersionedName userAgent = findMatch(uaPatterns, agentString);
            VersionedName operatingSystem = findMatch(osPatterns, agentString);
            VersionedName device = findMatch(devicePatterns, agentString);

            details = new Details(userAgent, operatingSystem, device);

            cache.put(name, agentString, details);
        }

        return details;
    }

    private VersionedName findMatch(List possiblePatterns, String agentString) {
        VersionedName name;
        for (UserAgentSubpattern pattern : possiblePatterns) {
            name = pattern.match(agentString);

            if (name != null) {
                return name;
            }
        }

        return null;
    }

    static final class Details {
        public final VersionedName userAgent;
        public final VersionedName operatingSystem;
        public final VersionedName device;

        Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) {
            this.userAgent = userAgent;
            this.operatingSystem = operatingSystem;
            this.device = device;
        }
    }

    static final class VersionedName {
        public final String name;
        public final String major;
        public final String minor;
        public final String patch;
        public final String build;

        VersionedName(String name, String major, String minor, String patch, String build) {
            this.name = name;
            this.major = major;
            this.minor = minor;
            this.patch = patch;
            this.build = build;
        }
    }

    /**
     * One of: user agent, operating system, device
     */
    static final class UserAgentSubpattern {
        private final Pattern pattern;
        private final String nameReplacement, v1Replacement, v2Replacement, v3Replacement, v4Replacement;

        UserAgentSubpattern(Pattern pattern, String nameReplacement,
                String v1Replacement, String v2Replacement, String v3Replacement, String v4Replacement) {
          this.pattern = pattern;
          this.nameReplacement = nameReplacement;
          this.v1Replacement = v1Replacement;
          this.v2Replacement = v2Replacement;
          this.v3Replacement = v3Replacement;
          this.v4Replacement = v4Replacement;
        }

        public VersionedName match(String agentString) {
          String name = null, major = null, minor = null, patch = null, build = null;
          Matcher matcher = pattern.matcher(agentString);

          if (!matcher.find()) {
            return null;
          }

          int groupCount = matcher.groupCount();

          if (nameReplacement != null) {
            if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) {
              name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)));
            } else {
              name = nameReplacement;
            }
          } else if (groupCount >= 1) {
            name = matcher.group(1);
          }

          if (v1Replacement != null) {
            major = v1Replacement;
          } else if (groupCount >= 2) {
            major = matcher.group(2);
          }

          if (v2Replacement != null) {
            minor = v2Replacement;
          } else if (groupCount >= 3) {
            minor = matcher.group(3);
          }

          if (v3Replacement != null) {
              patch = v3Replacement;
          } else if (groupCount >= 4) {
              patch = matcher.group(4);
          }

          if (v4Replacement != null) {
              build = v4Replacement;
          } else if (groupCount >= 5) {
              build = matcher.group(5);
          }

          return name == null ? null : new VersionedName(name, major, minor, patch, build);
        }
      }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy