All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.vertx.json.schema.impl.Format Maven / Gradle / Ivy

package io.vertx.json.schema.impl;

import java.net.IDN;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.regex.Pattern;

import static java.util.regex.Pattern.CASE_INSENSITIVE;

public class Format {

  private static final Pattern BASE16 = Pattern.compile("[0-9a-f]", CASE_INSENSITIVE);
  private static final Pattern BASE32 = Pattern.compile("^(?:[A-Z2-7]{8})*(?:[A-Z2-7]{2}={6}|[A-Z2-7]{4}={4}|[A-Z2-7]{5}={3}|[A-Z2-7]{7}=)?$");
  private static final Pattern BASE64 = Pattern.compile("^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{4}|[A-Za-z0-9+/]{3}=|[A" +
    "-Za-z0-9+/]{2}==)$");
  private static final Pattern RELATIVE_JSON_POINTER = Pattern.compile("^(?:0|[1-9][0-9]*)(?:#|(?:\\/(?:[^~/]|~0|~1)" +
    "*)*)$");
  private static final Pattern JSON_POINTER_URI_FRAGMENT = Pattern.compile("^#(?:\\/(?:[a-z0-9_\\-.!$&'()*+,;" +
    ":=@]|%[0-9a-f]{2}|~0|~1)*)*$", CASE_INSENSITIVE);
  private static final Pattern JSON_POINTER = Pattern.compile("^(?:\\/(?:[^~/]|~0|~1)*)*$");
  private static final Pattern UUID = Pattern.compile("^(?:urn:uuid:)?[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}$", CASE_INSENSITIVE);
  private static final Pattern Z_ANCHOR = Pattern.compile("[^\\\\]\\\\Z");
  // optimized http://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
  private static final Pattern IPV6 = Pattern.compile("^((([0-9a-f]{1,4}:){7}([0-9a-f]{1,4}|:))|(([0-9a-f]{1,4}:){6}(:[0-9a-f]{1,4}|((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9a-f]{1,4}:){5}(((:[0-9a-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9a-f]{1,4}:){4}(((:[0-9a-f]{1,4}){1,3})|((:[0-9a-f]{1,4})?:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9a-f]{1,4}:){3}(((:[0-9a-f]{1,4}){1,4})|((:[0-9a-f]{1,4}){0,2}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9a-f]{1,4}:){2}(((:[0-9a-f]{1,4}){1,5})|((:[0-9a-f]{1,4}){0,3}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9a-f]{1,4}:){1}(((:[0-9a-f]{1,4}){1,6})|((:[0-9a-f]{1,4}){0,4}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(:(((:[0-9a-f]{1,4}){1,7})|((:[0-9a-f]{1,4}){0,5}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:)))$", CASE_INSENSITIVE);

  // optimized https://www.safaribooksonline.com/library/view/regular-expressions-cookbook/9780596802837/ch07s16.html
  private static final Pattern IPV4 = Pattern.compile("^(?:(?:25[0-5]|2[0-4]\\d|[1]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)$");
  private static final Pattern HOSTNAME = Pattern.compile("^(?=.{1,253}\\.?$)[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\\.[a-z0-9](?:[-0-9a-z]{0,61}[0-9a-z])?)*\\.?$", CASE_INSENSITIVE);

  // https://github.com/ExodusMovement/schemasafe/blob/master/src/formats.js
  private static final Pattern EMAIL_HOST = Pattern.compile("^[a-z0-9.-]+$", CASE_INSENSITIVE);
  private static final Pattern EMAIL_NAME = Pattern.compile("^[a-z0-9.!#$%&'*+/=?^_`\\{|\\}~-]+$", CASE_INSENSITIVE);
  private static final Pattern EMAIL_HOST_PART = Pattern.compile("^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$", CASE_INSENSITIVE);
  // For the source: https://gist.github.com/dperini/729294
  // For test cases: https://mathiasbynens.be/demo/url-regex
  private static final Pattern URL_ = Pattern.compile("^(?:(?:https?|ftp):\\/\\/)(?:\\S+(?::\\S*)?@)?(?:(?!10(?:\\.\\d{1,3}){3})(?!127(?:\\.\\d{1,3}){3})(?!169\\.254(?:\\.\\d{1,3}){2})(?!192\\.168(?:\\.\\d{1,3}){2})(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))(?::\\d{2,5})?(?:\\/[^\\s]*)?$", CASE_INSENSITIVE | Pattern.UNICODE_CASE);
  //IDN emails can have - and . throughout unlike a normal email address. The email host part can also have the same.
  private static final Pattern IDN_EMAIL_PUNY = Pattern.compile("^xn--[a-z0-9-.]*@[a-z0-9-.]*$");
  private static final Pattern IDN_EMAIL_HOST = Pattern.compile("^[a-z0-9-.]*");
  private static final Pattern IDN_EMAIL_NAME = Pattern.compile("^xn--[a-z0-9-.]*$");
  private static final Pattern IDN_EMAIL_HOST_PART = Pattern.compile("^[a-z0-9-]([a-z0-9-]{0,61}[a-z0-9-])?$");
  private static final Pattern NOT_URI_FRAGMENT = Pattern.compile("\\/|:");
  private static final Pattern URI_PATTERN = Pattern.compile("^(?:[a-z][a-z0-9+\\-.]*:)(?:\\/?\\/(?:(?:[a-z0-9\\-._~!$&'()*+,;=:]|%[0-9a-f]{2})*@)?(?:\\[(?:(?:(?:(?:[0-9a-f]{1,4}:){6}|::(?:[0-9a-f]{1,4}:){5}|(?:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){4}|(?:(?:[0-9a-f]{1,4}:){0,1}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){3}|(?:(?:[0-9a-f]{1,4}:){0,2}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){2}|(?:(?:[0-9a-f]{1,4}:){0,3}[0-9a-f]{1,4})?::[0-9a-f]{1,4}:|(?:(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4})?::)(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?))|(?:(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4})?::[0-9a-f]{1,4}|(?:(?:[0-9a-f]{1,4}:){0,6}[0-9a-f]{1,4})?::)|[Vv][0-9a-f]+\\.[a-z0-9\\-._~!$&'()*+,;=:]+)\\]|(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)|(?:[a-z0-9\\-._~!$&'()*+,;=]|%[0-9a-f]{2})*)(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})*)*|\\/(?:(?:[a-z0-9\\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})+(?:\\/(?:[a-z0-9\\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})*)*)?|(?:[a-z0-9\\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})+(?:\\/(?:[a-z0-9\\-._~!$&'()*+,;=:@]|%[0-9a-f]{2})*)*)(?:\\?(?:[a-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9a-f]{2})*)?(?:#(?:[a-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9a-f]{2})*)?$", CASE_INSENSITIVE);
  private static final Pattern URIREF = Pattern.compile("^(?:[a-z][a-z0-9+\\-.]*:)?(?:\\/?\\/(?:(?:[a-z0-9\\-._~!$&'()*+,;=:]|%[0-9a-f]{2})*@)?(?:\\[(?:(?:(?:(?:[0-9a-f]{1,4}:){6}|::(?:[0-9a-f]{1,4}:){5}|(?:[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){4}|(?:(?:[0-9a-f]{1,4}:){0,1}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){3}|(?:(?:[0-9a-f]{1,4}:){0,2}[0-9a-f]{1,4})?::(?:[0-9a-f]{1,4}:){2}|(?:(?:[0-9a-f]{1,4}:){0,3}[0-9a-f]{1,4})?::[0-9a-f]{1,4}:|(?:(?:[0-9a-f]{1,4}:){0,4}[0-9a-f]{1,4})?::)(?:[0-9a-f]{1,4}:[0-9a-f]{1,4}|(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?))|(?:(?:[0-9a-f]{1,4}:){0,5}[0-9a-f]{1,4})?::[0-9a-f]{1,4}|(?:(?:[0-9a-f]{1,4}:){0,6}[0-9a-f]{1,4})?::)|[Vv][0-9a-f]+\\.[a-z0-9\\-._~!$&'()*+,;=:]+)\\]|(?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)|(?:[a-z0-9\\-._~!$&'\"()*+,;=]|%[0-9a-f]{2})*)(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~!$&'\"()*+,;=:@]|%[0-9a-f]{2})*)*|\\/(?:(?:[a-z0-9\\-._~!$&'\"()*+,;=:@]|%[0-9a-f]{2})+(?:\\/(?:[a-z0-9\\-._~!$&'\"()*+,;=:@]|%[0-9a-f]{2})*)*)?|(?:[a-z0-9\\-._~!$&'\"()*+,;=:@]|%[0-9a-f]{2})+(?:\\/(?:[a-z0-9\\-._~!$&'\"()*+,;=:@]|%[0-9a-f]{2})*)*)?(?:\\?(?:[a-z0-9\\-._~!$&'\"()*+,;=:@/?]|%[0-9a-f]{2})*)?(?:#(?:[a-z0-9\\-._~!$&'\"()*+,;=:@/?]|%[0-9a-f]{2})*)?$", CASE_INSENSITIVE);
  // uri-template: https://tools.ietf.org/html/rfc6570
  private static final Pattern URITEMPLATE = Pattern.compile("^(?:(?:[^\\x00-\\x20\"'<>%\\\\^`\\{|\\}]|%[0-9a-f]{2})|\\{[+#./;?&=,!@|]?(?:[a-z0-9_]|%[0-9a-f]{2})+(?::[1-9][0-9]{0,3}|\\*)?(?:,(?:[a-z0-9_]|%[0-9a-f]{2})+(?::[1-9][0-9]{0,3}|\\*)?)*\\})*$", CASE_INSENSITIVE);
  private static final Pattern DURATION_A = Pattern.compile("^P\\d+([.,]\\d+)?W$");
  private static final Pattern DURATION_B = Pattern.compile("^P[\\dYMDTHS]*(\\d[.,]\\d+)?[YMDHS]$");
  private static final Pattern DURATION_C = Pattern.compile("^P([.,\\d]+Y)?([.,\\d]+M)?([.,\\d]+D)?(T([.,\\d]+H)?([.,\\d]+M)?([.,\\d]+S)?)?$");
  private static final Pattern FASTDATETIME = Pattern.compile("^\\d\\d\\d\\d-[0-1]\\d-[0-3]\\d[t\\s](?:[0-2]\\d:[0-5]\\d:[0-5]\\d|23:59:60)(?:\\.\\d+)?(?:z?|[+-]\\d\\d(?::?\\d\\d)?)$", CASE_INSENSITIVE);
  //IDN Puny code is only ever in this format. xn--abc.xyz
  private static final Pattern IDN_HOSTNAME_PUNY = Pattern.compile("^xn--[a-z0-9-.]*$");

  // This is checking various other combinations of invalid characters/combinations of invalid characters.
  private static final Pattern IDN_HOSTNAME_UNICODE = Pattern
    .compile("^.*\\u302e.*|(^.*?[^l]\\u00b7.|.*l\\u00b7[^l]|\\u00b7$|^\\u00b7.)|(.*\\u30fB[^\\u3041\\u30A1\\u4e08].*|^\\u30fB$)|(^[\\u05f3\\u05f4].*)$"
      , Pattern.UNICODE_CHARACTER_CLASS);

  // This is checking the start of the word for Mc,Me or Mn characters.
  // Mc -> Spacing_Combining_Mark
  // Me -> Enclosing_Mark
  // Mn -> Non_Spacing_Mark (excluding if Virma \\u094d follows
  private static final Pattern IDN_HOSTNAME_STARTING_ERRORS = Pattern
    .compile("^\\p{gc=Mc}|^\\p{gc=Me}|^\\p{gc=Mn}(?!\\u094d)", Pattern.UNICODE_CHARACTER_CLASS);
  // date-time: http://tools.ietf.org/html/rfc3339#section-5.6
  private static final Pattern FASTTIME = Pattern.compile("^(?:[0-2]\\d:[0-5]\\d:[0-5]\\d|23:59:60)(?:\\.\\d+)?(?:z|[+-]\\d\\d(?::?\\d\\d)?)?$", CASE_INSENSITIVE);
  private static final Pattern VALID_LEAP_SECONDS = Pattern.compile("^23:59:60(?:\\.\\d+)?(?:z|[+-]00(?::?00)?)?$", CASE_INSENSITIVE);

  // date: http://tools.ietf.org/html/rfc3339#section-5.6
  private static final Pattern FASTDATE = Pattern.compile("^\\d\\d\\d\\d-[0-1]\\d-[0-3]\\d$");

  public static boolean fastFormat(String format, String value) {
    switch (format) {
      case "byte":
        return checkPattern(value, BASE64);
      case "date":
        return testDate(value);
      case "time":
        return testTime(value);
      case "date-time":
        return testDateTime(value);
      case "duration":
        return testDuration(value);
      case "uri":
        // http://jmrware.com/articles/2009/uri_regexp/URI_regex.html + optional protocol + required "."
        return checkPattern(value, NOT_URI_FRAGMENT) && checkPattern(value, URI_PATTERN);
      case "uri-reference":
        return checkPattern(value, URIREF);
      case "uri-template":
        return checkPattern(value, URITEMPLATE);
      case "url":
        return checkPattern(value,URL_);
      case "email":
        return testEmail(value);
      case "hostname":
        return checkPattern(value, HOSTNAME);
      case "ipv4":
        return checkPattern(value, IPV4);
      case "ipv6":
        return checkPattern(value, IPV6);
      case "regex":
        return testRegex(value);
      case "uuid":
        return checkPattern(value, UUID);
      case "json-pointer":
        return checkPattern(value, JSON_POINTER);
      case "json-pointer-uri-fragment":
        return checkPattern(value, JSON_POINTER_URI_FRAGMENT);
      case "relative-json-pointer":
        return checkPattern(value, RELATIVE_JSON_POINTER);
      case "idn-hostname":
        return testIdnHostname(value);
      case "idn-email":
        return testIdnEmail(value);
      default:
        // unknown formats are assumed true, e.g.: idn-hostname, binary
        return true;
    }
  }

  public static boolean testContentEncoding(String format, String value) {
    switch(format) {
      case "base64":
        return checkPattern(value, BASE64);
      case "base32":
        return checkPattern(value, BASE32);
      case "base16":
        return checkPattern(value, BASE16);
      default:
        return true;
    }
  }

  private static boolean checkPattern(String value, Pattern p) {
    return p.matcher(value).find();
  }

  private static boolean testRegex(String value) {
    if (checkPattern(value, Z_ANCHOR)) {
      return false;
    }
    try {
      Pattern.compile(value);
      return true;
    } catch (RuntimeException e) {
      return false;
    }
  }

  private static boolean testEmail(String value) {
    return testEmail(value, EMAIL_HOST, EMAIL_NAME, EMAIL_HOST_PART);
  }

  private static boolean testEmail(String value, Pattern emailHostMatcher, Pattern emailNameMatcher, Pattern emailHostPartMatcher) {
    if (value.charAt(0) == '"') {
      return false;
    }
    final String[] parts = value.split("@");
    if (
      parts.length != 2 ||
        Utils.Strings.empty(parts[0]) ||
        Utils.Strings.empty(parts[1]) ||
        parts[0].length() > 64 ||
        parts[1].length() > 253
    ) {
      return false;
    }
    if (parts[0].charAt(0) == '.' || parts[0].endsWith(".") || parts[0].contains("..")) {
      return false;
    }
    if (
      !checkPattern( parts[1], emailHostMatcher)||
        !checkPattern(parts[0], emailNameMatcher)
    ) {
      return false;
    }
    for (String part : parts[1].split("\\.")) {
      if (!checkPattern(part, emailHostPartMatcher)) {
        return false;
      }
    }
    return true;
  }

  private static boolean testDuration(String value) {
    return value.length() > 1 &&
      value.length() < 80 &&
      (checkPattern(value, DURATION_A) ||
        (checkPattern(value, DURATION_B) &&
          checkPattern(value, DURATION_C)));
  }

  private static boolean testDateTime(String value) {
    if(!checkPattern(value, FASTDATETIME)) {
      return false;
    }
    return validateISOTime(DateTimeFormatter.ISO_DATE_TIME, value);
  }

  private static boolean testTime(String value) {
    if(!checkPattern(value, FASTTIME)) {
      return false;
    }

    //The built-in ISO_TIME does not account for leap seconds.
    // So if it IS a leap second (or matches) just assume that it's OK
    if(checkPattern(value, VALID_LEAP_SECONDS)) {
      return true;
    }

    return validateISOTime(DateTimeFormatter.ISO_TIME, value);
  }

  private static boolean testDate(String value) {
    if (!checkPattern(value, FASTDATE)) {
      return false;
    }

    return validateISOTime(DateTimeFormatter.ISO_DATE, value);
  }

  private static boolean validateISOTime(DateTimeFormatter formatter, String value) {
    try {
      formatter.parse(value);
      return true;
    } catch (DateTimeParseException e) {
      return false;
    }
  }

  private static boolean testIdnHostname(String value) {
    try {
      return !checkPattern(value, IDN_HOSTNAME_STARTING_ERRORS) && !checkPattern(value, IDN_HOSTNAME_UNICODE) && checkPattern(IDN.toASCII(value), IDN_HOSTNAME_PUNY);
    } catch(Exception e) {
      return false;
    }
  }

  private static boolean testIdnEmail(String value) {
    try {
      String asciiVersion = IDN.toASCII(value);
      //we were given a non IDN email, so just check if that email is valid or not. No need to do any other checks.
      if(asciiVersion.equals(value)) {
        return testEmail(asciiVersion);
      }

      return checkPattern(asciiVersion, IDN_EMAIL_PUNY) && testEmail(asciiVersion, IDN_EMAIL_HOST, IDN_EMAIL_NAME, IDN_EMAIL_HOST_PART);
    } catch(Exception e) {
      return false;
    }
  }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy