All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.myire.util.Ascii Maven / Gradle / Ivy

/*
 * Copyright 2011, 2016 Peter Franzen. All rights reserved.
 *
 * Licensed under the Apache License v2.0: http://www.apache.org/licenses/LICENSE-2.0
 */
package org.myire.util;

import org.myire.annotation.Unreachable;


/**
 * Utility methods for checking if a {@code byte} or a {@code char} is within the range of ASCII
 * characters ({@code 0x00-0x7F}} or a subrange thereof.
 *
 * @author Peter Franzen
 */
public final class Ascii
{
    /**
     * Private constructor to disallow instantiations of utility method class.
     */
    @Unreachable
    private Ascii()
    {
        // Empty default ctor, defined to override access scope.
    }


    /**
     * Check if a {@code byte} is in the ASCII range {@code 0x00-0x7F}.
     *
     * @param pByte The {@code byte} to check.
     *
     * @return  True if {@code pByte} is in the range {@code 0x00-0x7F}, false otherwise.
     */
    static public boolean isAscii(byte pByte)
    {
        // Only the 7 lowest bits may be set. Bytes with the high bit set are negative and will have
        // the sign preserved when extended to an int in the bitwise operation (JLS 5.1.2), meaning
        // that 0x80 is extended to 0xffffff80. This has no effect on the bitmask used below, since
        // it is the high bit in the least significant byte that is of interest, and that bit will
        // be set irrespective of sign preservation.
        return (pByte & 0x80) == 0;
    }


    /**
     * Check if a {@code char} is an ASCII character.
     *
     * @param pChar The {@code char} to check.
     *
     * @return  True if {@code pChar} is in the range {@code 0x00-0x7F}, false otherwise.
     */
    static public boolean isAscii(char pChar)
    {
        // Only the 7 lowest bits may be set.
        // A char is widened to an int by zero-extending the char value (JLS 5.1.2).
        return (pChar & 0xff80) == 0;
    }


    /**
     * Check if a {@code byte} is in the ASCII range A-Z ({@code 0x41-0x5A}).
     *
     * @param pByte The {@code byte} to check.
     *
     * @return  True if {@code pByte} is in the range {@code 0x41-0x5A}, false otherwise.
     */
    static public boolean isAsciiAZ(byte pByte)
    {
        // The three most significant bits must be 010, otherwise the high nibble cannot be
        // 0x4 or 0x5 (0100 or 0101).
        return (pByte & 0xe0) == 0x40
               &&
               // Check if the 5 least significant bits are correct. They must be in the range
               // 00001 - 11010  (0x41 - 0x5a), i.e. have a value between 0x01 and 0x1a (1 and 26).
               // This is checked by setting the bit corresponding to the value of these 5 bits in a
               // 32-bit value, and masking it with a 32-bit value where bits 1-26 are set
               // (0x07fffffe).
               // Example: if the 5 bits are 10010 (18) the bitwise operation is
               // 0x40000 & 0x07fffffe
               // which is non-zero, indicating that the value is in the range.
               // The value 0x5b has the value 0x1b (27) in the low 5 bits, 1 << 27 == 0x8000000,
               // 0x08000000 & 0x07fffffe == 0, i.e. the value is not in the range.
               ((1 << (pByte & 0x1f)) & 0x07fffffe) != 0;
    }


    /**
     * Check if a {@code char} is in the ASCII range A-Z ({@code 0x41-0x5A}).
     *
     * @param pChar The {@code char} to check.
     *
     * @return  True if {@code pChar} is in the range {@code 0x41-0x5A}, false otherwise.
     */
    static public boolean isAsciiAZ(char pChar)
    {
        // The high byte must be 0 and the three most significant bits in the low byte must be 010,
        // otherwise the high nibble in the low byte cannot be 0x4 or 0x5 (0100 or 0101).
        return (pChar & 0xffe0) == 0x40
               &&
               // Check if the 5 least significant bits in the low byte are correct, see
               // isAsciiAZ(byte) for an explanation of this bit operation.
               ((1 << (pChar & 0x1f)) & 0x07fffffe) != 0;
    }


    /**
     * Check if a {@code byte} is in the ASCII range a-z ({@code 0x61-0x7A}).
     *
     * @param pByte The {@code byte} to check.
     *
     * @return  True if {@code pByte} is in the range {@code 0x61-0x7A}, false otherwise.
     */
    static public boolean isAsciiaz(byte pByte)
    {
        // If the three most significant bits aren't 011 the high nibble cannot be 0x6 or 0x7 (0110
        // or 0111).
        return (pByte & 0xe0) == 0x60
               &&
               // Check if the 5 least significant bits are correct. They must have the same pattern
               // as in the isAsciiAZ(byte), and the check is therefore the same as in that method.
               ((1 << (pByte & 0x1f)) & 0x07fffffe) != 0;
    }


    /**
     * Check if a {@code char} is in the ASCII range a-z ({@code 0x61-0x7A}).
     *
     * @param pChar The {@code char} to check.
     *
     * @return  True if {@code pChar} is in the range {@code 0x61-0x7A}, false otherwise.
     */
    static public boolean isAsciiaz(char pChar)
    {
        // The high byte must be 0 and the three most significant bits in the low byte must be 011,
        // otherwise the high nibble in the low byte cannot be 0x6 or 0x7 (0110 or 0111).
        return (pChar & 0xffe0) == 0x60
               &&
               // Check if the 5 least significant bits are correct, see isAsciiAZ(byte) for an
               // explanation of this bit operation.
               ((1 << (pChar & 0x1f)) & 0x07fffffe) != 0;
    }


    /**
     * Check if a {@code byte} is in the ASCII range A-Z ({@code 0x41-0x5A}) or a-z
     * ({@code 0x61-0x7A}).
     *
     * @param pByte The {@code byte} to check.
     *
     * @return  True if {@code pByte} is in the range {@code 0x41-0x5A} or in the range
     *          {@code 0x61-0x7A}, false otherwise.
     */
    static public boolean isAsciiAZaz(byte pByte)
    {
        // If the two most significant bits aren't 01 the high nibble cannot be 0x4, 0x5, 0x6 or
        // 0x7 (0100, 0101, 0110 or 0111).
        return (pByte & 0xc0) == 0x40
               &&
               // Check if the 6 least significant bits are correct. They must be in the range
               // 000001 - 011010 (0x41 - 0x5a) or in the range 100001 - 111010 (0x61 - 0x7a), i.e.
               // have a value between 0x01 and 0x1a (1 and 26) or between 0x21 and 0x3a (33 and
               // 58). This is checked by setting the bit corresponding to the value of these 6 bits
               // in a 64-bit value, and masking it with a 64-bit value where bits 1-26 and 33-58
               // are set (0x07fffffe07fffffe).
               // Example: if the 6 bits are 10010 (18) the bitwise operation is
               // 0x40000 & 0x07fffffe07fffffe
               // which is non-zero, indicating that the value is in the range.
               // The value 0x7b has the value 0x3b (59) in the low 6 bits,
               // 1 << 59 == 0x800000000000000
               // 0x080000000 & 0x07fffffe07fffffe == 0, i.e. the value is not in the range.
               ((1L << (pByte & 0x3f)) & 0x07fffffe07fffffeL) != 0;
    }


    /**
     * Check if a {@code char} is in the ASCII range A-Z ({@code 0x41-0x5A}) or a-z
     * ({@code 0x61-0x7A}).
     *
     * @param pChar The {@code char} to check.
     *
     * @return  True if {@code pChar} is in the range {@code 0x41-0x5A} or in the range
     *          {@code 0x61-0x7A}, false otherwise.
     */
    static public boolean isAsciiAZaz(char pChar)
    {
        // The high byte must be 0 and the two most significant bits in the low byte must be 01,
        // otherwise the high nibble in the low bytre cannot be  0x4, 0x5, 0x6 or 0x7 (0100, 0101,
        // 0110 or 0111).
        return (pChar & 0xffc0) == 0x40
               &&
               // Check if the 6 least significant bits are correct, see isAsciiAZaz(byte) for an
               // explanation of this bit operation.
               ((1L << (pChar & 0x3f)) & 0x07fffffe07fffffeL) != 0;
    }


    /**
     * Check if a {@code byte} is in the ASCII range 0-9 ({@code 0x30-0x39}).
     *
     * @param pByte The {@code byte} to check.
     *
     * @return  True if {@code pByte} is in the range {@code 0x30-0x39}, false otherwise.
     */
    static public boolean isAsciiDigit(byte pByte)
    {
        // If the high nibble isn't 0011 the byte cannot be in the range 0x30-0x39
        // (00110000 - 00111001).
        return (pByte & 0xf0) == 0x30
               &&
               // Check if the 4 least significant bits are correct. They must be in the range
               // 0000 - 1001 (0x30 - 0x39), i.e. have a value between 0x00 and 0x09 (0 and 9).
               // This is checked by setting the bit corresponding to the value of these 4 bits in a
               // 32-bit value, and masking it with a 32-bit value where bits 0-9 are set
               // (0x000003ff).
               // Example: if the 5 bits are 0010 (2) the bitwise operation is
               // 0x4 & 0x000003ff
               // which is non-zero, indicating that the value is in the range.
               // The value 0x3c has the value 0x0c (12) in the low 4 bits, 1 << 12 == 0x1000,
               // 0x1000 & 0x000003ff == 0, i.e. the value is not in the range.
               ((1 << (pByte & 0x0f)) & 0x000003ff) != 0;
    }


    /**
     * Check if a {@code char} is in the ASCII range 0-9 ({@code 0x30-0x39}).
     *
     * @param pChar The {@code char} to check.
     *
     * @return  True if {@code pChar} is in the range {@code 0x30-0x39}, false otherwise.
     */
    static public boolean isAsciiDigit(char pChar)
    {
        // The high byte must be 0 and the  high nibble in the low byte must be 0011, otherwise the
        // char cannot be in the range 0x30-0x39 (00110000 - 00111001).
        return (pChar & 0xfff0) == 0x30
               &&
               // Check if the 4 least significant bits are correct, see isAsciiDigit(byte) for an
               // explanation of this bit operation.
               ((1 << (pChar & 0x0f)) & 0x000003ff) != 0;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy