All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.metaeffekt.artifact.analysis.version.token.VersionTokenizer Maven / Gradle / Ivy

There is a newer version: 0.132.0
Show newest version
/*
 * Copyright 2021-2024 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.metaeffekt.artifact.analysis.version.token;

import com.metaeffekt.artifact.analysis.version.VersionModifier;
import org.metaeffekt.core.inventory.processor.model.Constants;

import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

public abstract class VersionTokenizer {

    private final static int TOKENIZER_STATE_NONE = 0;
    private final static int TOKENIZER_STATE_NUMBER = 1;
    private final static int TOKENIZER_STATE_LETTER = 2;
    /**
     * Regular expression pattern for Git commit hashes.
     * 

* This pattern matches Git commit hashes that: *

    *
  • Are preceded by a dash, underscore, or space
  • *
  • Contain 7 to 40 hexadecimal digits (a-f, A-F, 0-9)
  • *
  • Contain at least one letter (a-f or A-F)
  • *
  • Contain at least one digit (0-9)
  • *
*/ private final static Pattern GIT_HASH_PATTERN = Pattern.compile("[-_ ](?=.*[a-fA-F])(?=.*[0-9])[a-fA-F0-9]{7,40}$"); public static String buildEffectiveVersionString(String version, String update) { final String effectiveVersion = Constants.ASTERISK.equals(version) || "-".equals(version) ? null : version; final String effectiveUpdate = Constants.ASTERISK.equals(update) || "-".equals(update) ? null : update; return effectiveVersion != null && effectiveUpdate != null ? effectiveVersion + "_" + effectiveUpdate : effectiveVersion != null ? effectiveVersion : effectiveUpdate; } public static List tokenize(String version, String update) { final String versionString = buildEffectiveVersionString(version, update); return tokenize(versionString); } public static List tokenize(String versionString) { final List tokens = new ArrayList<>(); if (versionString == null) { return tokens; } final String gitHashRemovedVersionString; if (containsGitHash(versionString)) { gitHashRemovedVersionString = GIT_HASH_PATTERN.matcher(versionString).replaceAll(""); } else { gitHashRemovedVersionString = versionString; } final String versionModifiersPreprocessed = gitHashRemovedVersionString .replace("release-candidate", "rc"); final String effectiveVersionString = versionModifiersPreprocessed; final StringBuilder buffer = new StringBuilder(); int state = TOKENIZER_STATE_NONE; VersionTokenType currentTokenType = VersionTokenType.OTHER; for (int i = 0; i < effectiveVersionString.length(); i++) { final char currentChar = effectiveVersionString.charAt(i); final char previousChar = i > 0 ? effectiveVersionString.charAt(i - 1) : (char) -1; final char nextChar = i + 1 < effectiveVersionString.length() ? effectiveVersionString.charAt(i + 1) : (char) -1; final int bufferLength = buffer.length(); if (tokenizerIsValidNumberCharacterForSemVer(bufferLength, state, bufferLength > 0 && buffer.charAt(bufferLength - 1) == '.', currentChar) || (currentChar == '.' && tokenizerIsValidNumberCharacterForSemVer(bufferLength, state, true, previousChar) && tokenizerIsValidNumberCharacterForSemVer(bufferLength, state, true, nextChar))) { if (state == TOKENIZER_STATE_NUMBER) { buffer.append(currentChar); } else { appendBufferToTokens(buffer, currentTokenType, tokens); buffer.append(currentChar); state = TOKENIZER_STATE_NUMBER; } currentTokenType = VersionTokenType.NUMBER_OR_SEMVER; } else if (Character.isLetter(currentChar)) { if (state == TOKENIZER_STATE_LETTER) { buffer.append(currentChar); } else { appendBufferToTokens(buffer, currentTokenType, tokens); buffer.append(currentChar); state = TOKENIZER_STATE_LETTER; } currentTokenType = VersionTokenType.STRING; } else { appendBufferToTokens(buffer, currentTokenType, tokens); buffer.append(currentChar); state = TOKENIZER_STATE_NONE; if (tokenizerIsSeparator(currentChar)) { currentTokenType = VersionTokenType.SEPARATOR; } else { currentTokenType = VersionTokenType.OTHER; } } } appendBufferToTokens(buffer, currentTokenType, tokens); // now locate certain patterns in the tokens and replace them with special tokens: // - find all "r" tokens before a NUMBER_OR_SEMVER token and replace their value with "rev" // - find VERSION_MODIFIERS tokens and the token after them and replace them with a single token of type VERSION_MODIFIER // - find NUMBER_OR_SEMVER tokens that are yyyyMMddXX, where the XX represent optional digits and replace them with a single token of type DATE // - find all STRING tokens that come after a VERSION_MODIFIER token and append the STRING token to the VERSION_MODIFIER token // - remove all STRING tokens == "v" that are before a NUMBER_OR_SEMVER token // - remove all SEPARATOR tokens at the end // - find chained VERSION_MODIFIERS and join them into a single token by adding the second one to the first one final List effectiveTokens = new ArrayList<>(); for (int i = 0; i < tokens.size(); i++) { final VersionToken token = tokens.get(i); if (token.getType() == VersionTokenType.STRING) { final String tokenValue = token.getValue(); final String lowercaseTokenValue = tokenValue.toLowerCase(); final VersionToken nextToken = i + 1 < tokens.size() ? tokens.get(i + 1) : null; final VersionToken nextNonSeparatorToken = findNextNonSeparatorToken(tokens, i); final VersionToken latestEffectiveToken = effectiveTokens.isEmpty() ? null : effectiveTokens.get(effectiveTokens.size() - 1); if (lowercaseTokenValue.equals("r") || lowercaseTokenValue.equals("u") || lowercaseTokenValue.equals("t") || lowercaseTokenValue.equals("p") || lowercaseTokenValue.equals("m")) { final boolean isNextNumberOrDate = nextNonSeparatorToken != null && (nextNonSeparatorToken.getType() == VersionTokenType.NUMBER_OR_SEMVER || nextNonSeparatorToken.getType() == VersionTokenType.DATE); if (isNextNumberOrDate) { tokens.remove(i); if (lowercaseTokenValue.equals("r")) { tokens.add(i, new VersionToken("rev", VersionTokenType.STRING)); } else if (lowercaseTokenValue.equals("t")) { tokens.add(i, new VersionToken("trial", VersionTokenType.STRING)); } else if (lowercaseTokenValue.equals("p")) { tokens.add(i, new VersionToken("patch", VersionTokenType.STRING)); } else if (lowercaseTokenValue.equals("m")) { tokens.add(i, new VersionToken("milestone", VersionTokenType.STRING)); } else { // lowercaseTokenValue.equals("u") tokens.add(i, new VersionToken("update", VersionTokenType.STRING)); } i--; continue; } } final VersionModifier versionModifier = VersionModifier.fromStringName(lowercaseTokenValue); if (versionModifier != null) { if (nextNonSeparatorToken != null) { if (nextNonSeparatorToken.getType() == VersionTokenType.NUMBER_OR_SEMVER) { effectiveTokens.add(new VersionToken(versionModifier.getNames()[0], VersionTokenType.VERSION_MODIFIER, nextNonSeparatorToken)); tokens.remove(nextNonSeparatorToken); continue; } } effectiveTokens.add(new VersionToken(lowercaseTokenValue, VersionTokenType.VERSION_MODIFIER)); continue; } if (lowercaseTokenValue.equals("v") && nextToken != null) { if (nextToken.getType() == VersionTokenType.NUMBER_OR_SEMVER) { continue; } } if (latestEffectiveToken != null) { if (latestEffectiveToken.getType() == VersionTokenType.VERSION_MODIFIER) { latestEffectiveToken.addSubToken(token); continue; } } } if (token.getType() == VersionTokenType.NUMBER_OR_SEMVER) { final String tokenValue = token.getValue(); if (tokenValue.length() >= 8) { // extract yyyyMMdd part final String datePart = tokenValue.substring(0, 8); try { // if parsing is successful, the date is valid LocalDate.parse(datePart, DateTimeFormatter.BASIC_ISO_DATE); effectiveTokens.add(new VersionToken(tokenValue, VersionTokenType.DATE)); continue; } catch (DateTimeParseException ignored) { // the date is not valid, ignore this token } } } if (token.getType() != VersionTokenType.SEPARATOR) { effectiveTokens.add(token); } } for (int i = effectiveTokens.size() - 1; i >= 0; i--) { final VersionToken token = effectiveTokens.get(i); final VersionToken previousToken = i > 0 ? effectiveTokens.get(i - 1) : null; if (token.getType() == VersionTokenType.VERSION_MODIFIER && previousToken != null && previousToken.getType() == VersionTokenType.VERSION_MODIFIER) { previousToken.addSubToken(token); effectiveTokens.remove(i); } } return effectiveTokens; } private static boolean containsGitHash(String version) { return GIT_HASH_PATTERN.matcher(version).find(); } private static VersionToken findNextNonSeparatorToken(List tokens, int startIndex) { for (int i = startIndex + 1; i < tokens.size(); i++) { final VersionToken token = tokens.get(i); if (token.getType() != VersionTokenType.SEPARATOR) { return token; } } return null; } private final static char[] VERSION_SEPARATORS = new char[]{'.', '-', '_', ' ', '/', '\\', ':', '+', '~', '(', ')', '[', ']', '{', '}'}; private static boolean tokenizerIsSeparator(char currentChar) { for (char separator : VERSION_SEPARATORS) { if (currentChar == separator) { return true; } } return false; } private static boolean tokenizerIsValidNumberCharacterForSemVer(int bufferLength, int state, boolean allowXtoMatch, char character) { return Character.isDigit(character) || (bufferLength > 0 && state == TOKENIZER_STATE_NUMBER && (allowXtoMatch) && character == 'x'); } private static void appendBufferToTokens(StringBuilder buffer, VersionTokenType type, List tokens) { final String trimmedBuffer = buffer.toString().trim(); if (!trimmedBuffer.isEmpty()) { tokens.add(new VersionToken(trimmedBuffer, type)); buffer.setLength(0); // clear the buffer } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy