org.java_websocket.util.Charsetfunctions Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of Java-WebSocket Show documentation
A barebones WebSocket client and server implementation written 100% in Java
There is a newer version: 1.5.7
/*
 * Copyright (c) 2010-2020 Nathan Rajlich
 *
 *  Permission is hereby granted, free of charge, to any person
 *  obtaining a copy of this software and associated documentation
 *  files (the "Software"), to deal in the Software without
 *  restriction, including without limitation the rights to use,
 *  copy, modify, merge, publish, distribute, sublicense, and/or sell
 *  copies of the Software, and to permit persons to whom the
 *  Software is furnished to do so, subject to the following
 *  conditions:
 *
 *  The above copyright notice and this permission notice shall be
 *  included in all copies or substantial portions of the Software.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 *  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 *  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 *  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 *  OTHER DEALINGS IN THE SOFTWARE.
 */

package org.java_websocket.util;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import org.java_websocket.exceptions.InvalidDataException;
import org.java_websocket.framing.CloseFrame;

public class Charsetfunctions {

  /**
   * Private constructor for real static class
   */
  private Charsetfunctions() {
  }

  private static final CodingErrorAction codingErrorAction = CodingErrorAction.REPORT;

  /*
   * @return UTF-8 encoding in bytes
   */
  public static byte[] utf8Bytes(String s) {
    return s.getBytes(StandardCharsets.UTF_8);
  }

  /*
   * @return ASCII encoding in bytes
   */
  public static byte[] asciiBytes(String s) {
    return s.getBytes(StandardCharsets.US_ASCII);
  }

  public static String stringAscii(byte[] bytes) {
    return stringAscii(bytes, 0, bytes.length);
  }

  public static String stringAscii(byte[] bytes, int offset, int length) {
    return new String(bytes, offset, length, StandardCharsets.US_ASCII);
  }

  public static String stringUtf8(byte[] bytes) throws InvalidDataException {
    return stringUtf8(ByteBuffer.wrap(bytes));
  }

  public static String stringUtf8(ByteBuffer bytes) throws InvalidDataException {
    CharsetDecoder decode = StandardCharsets.UTF_8.newDecoder();
    decode.onMalformedInput(codingErrorAction);
    decode.onUnmappableCharacter(codingErrorAction);
    String s;
    try {
      bytes.mark();
      s = decode.decode(bytes).toString();
      bytes.reset();
    } catch (CharacterCodingException e) {
      throw new InvalidDataException(CloseFrame.NO_UTF8, e);
    }
    return s;
  }

  /**
   * Implementation of the "Flexible and Economical UTF-8 Decoder" algorithm by Björn Höhrmann
   * (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/)
   */
  private static final int[] utf8d = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, // 00..1f
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, // 20..3f
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, // 40..5f
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      0, // 60..7f
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
      9, // 80..9f
      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
      7, // a0..bf
      8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      2, // c0..df
      0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // e0..ef
      0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // f0..ff
      0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
      1, // s1..s2
      1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1,
      1, // s3..s4
      1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1,
      1, // s5..s6
      1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
      // s7..s8
  };

  /**
   * Check if the provided BytebBuffer contains a valid utf8 encoded string.
   * 
   * Using the algorithm "Flexible and Economical UTF-8 Decoder" by Björn Höhrmann
   * (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/)
   *
   * @param data the ByteBuffer
   * @param off  offset (for performance reasons)
   * @return does the ByteBuffer contain a valid utf8 encoded string
   */
  public static boolean isValidUTF8(ByteBuffer data, int off) {
    int len = data.remaining();
    if (len < off) {
      return false;
    }
    int state = 0;
    for (int i = off; i < len; ++i) {
      state = utf8d[256 + (state << 4) + utf8d[(0xff & data.get(i))]];
      if (state == 1) {
        return false;
      }
    }
    return true;
  }

  /**
   * Calling isValidUTF8 with offset 0
   *
   * @param data the ByteBuffer
   * @return does the ByteBuffer contain a valid utf8 encoded string
   */
  public static boolean isValidUTF8(ByteBuffer data) {
    return isValidUTF8(data, 0);
  }

}