All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.mmm.util.io.api.ByteOrderMark Maven / Gradle / Ivy

/* Copyright (c) The m-m-m Team, Licensed under the Apache License, Version 2.0
 * http://www.apache.org/licenses/LICENSE-2.0 */
package net.sf.mmm.util.io.api;

/**
 * This type represents a Byte-Order-Mark (BOM) of an
 * Unicode-Transformation-Format (UTF).
 *
 * @author Joerg Hohwiller (hohwille at users.sourceforge.net)
 * @since 1.0.1
 */
public enum ByteOrderMark {

  /**
   * The {@link ByteOrderMark} for {@link EncodingUtil#ENCODING_UTF_8 UTF-8}:
* {@code 0xef 0xbb 0xbf} */ UTF_8() { @Override public String getEncoding() { return EncodingUtil.ENCODING_UTF_8; } @Override public byte[] getBytes() { return MAGIC_BYTES_UTF8; } }, /** * The {@link ByteOrderMark} for {@link EncodingUtil#ENCODING_UTF_16_BE UTF-16BE}:
* {@code 0xfe 0xff} */ UTF_16_BE() { @Override public String getEncoding() { return EncodingUtil.ENCODING_UTF_16_BE; } @Override public byte[] getBytes() { return MAGIC_BYTES_UTF16_BE; } }, /** * The {@link ByteOrderMark} for {@link EncodingUtil#ENCODING_UTF_16_LE UTF16-LE}:
* {@code 0xff 0xfe} */ UTF_16_LE() { @Override public String getEncoding() { return EncodingUtil.ENCODING_UTF_16_LE; } @Override public byte[] getBytes() { return MAGIC_BYTES_UTF16_LE; } }, /** * The {@link ByteOrderMark} for {@link EncodingUtil#ENCODING_UTF_32_BE UTF-32BE}:
* {@code 0x00 0x00 0xfe 0xff} */ UTF_32_BE() { @Override public String getEncoding() { return EncodingUtil.ENCODING_UTF_32_BE; } @Override public byte[] getBytes() { return MAGIC_BYTES_UTF32_BE; } }, /** * The {@link ByteOrderMark} for {@link EncodingUtil#ENCODING_UTF_32_LE UTF-32LE}:
* {@code 0xff 0xfe 0x00 0x00} */ UTF_32_LE() { @Override public String getEncoding() { return EncodingUtil.ENCODING_UTF_32_LE; } @Override public byte[] getBytes() { return MAGIC_BYTES_UTF32_LE; } }; private static final byte[] MAGIC_BYTES_UTF8 = new byte[] { (byte) 0xef, (byte) 0xbb, (byte) 0xbf }; private static final byte[] MAGIC_BYTES_UTF16_BE = new byte[] { (byte) 0xfe, (byte) 0xff }; private static final byte[] MAGIC_BYTES_UTF16_LE = new byte[] { (byte) 0xff, (byte) 0xfe }; private static final byte[] MAGIC_BYTES_UTF32_BE = new byte[] { 0x00, 0x00, (byte) 0xfe, (byte) 0xff }; private static final byte[] MAGIC_BYTES_UTF32_LE = new byte[] { (byte) 0xff, (byte) 0xfe, 0x00, 0x00 }; /** * This method gets the encoding indicated by this {@link ByteOrderMark}. * * @return the encoding. */ public abstract String getEncoding(); /** * This method gets the number of bytes of this {@link ByteOrderMark}. * * @return the length. */ public final int getLength() { return getBytes().length; } /** * This method detects if this {@link ByteOrderMark} is present in the given {@code bytes}.
* NOTE:
* A BOM may only occur at the head of your data (file, payload, etc.).
* ATTENTION:
* Please note that binary data may accidently have header bytes that represent this {@link ByteOrderMark}. This * method can NOT know this and will return {@code true} even if the data is NOT encoded with the * {@link #getEncoding() according encoding}. Therefore you should only use this method for the header of textual * data. * * @param bytes is the buffer with the bytes to check. * @param offset is the index of the first data-byte in {@code bytes}. Will typically be {@code 0} . * @return {@code true} if this {@link ByteOrderMark BOM} was detected in the */ public final boolean isPresent(byte[] bytes, int offset) { byte[] bom = getBytes(); if (offset + bom.length <= bytes.length) { for (int i = 0; i < bom.length; i++) { if (bytes[offset + i] != bom[i]) { return false; } } return true; } return false; } /** * This method gets the bytes of this BOM. * * @return the magic bytes of this BOM. */ protected abstract byte[] getBytes(); /** * This method detects the {@link ByteOrderMark} that may be {@link #isPresent(byte[], int) present} in the given * {@code bytes} starting at {@code offset}.
* ATTENTION:
* Please note that binary data may accidently have header bytes that represent a {@link ByteOrderMark}. This method * can NOT know this and will return that {@link ByteOrderMark} even if the data is NOT encoded with the * {@link #getEncoding() according encoding}. Therefore you should only use this method for the header of textual * data. * * @param bytes is the buffer with the bytes to check. * @param offset is the index of the first data-byte in {@code bytes}. Will typically be {@code 0} . * @return the detected {@link ByteOrderMark} or {@code null} if the given {@code bytes} have no BOM. */ public static ByteOrderMark detect(byte[] bytes, int offset) { for (ByteOrderMark bom : values()) { if (bom.isPresent(bytes, offset)) { return bom; } } return null; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy