![JAR search and dependency download from the Maven repository](/logo.png)
edu.princeton.cs.algs4.Alphabet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithm Show documentation
Show all versions of algorithm Show documentation
Simple implementation of basic algorithm.
The newest version!
/******************************************************************************
* Compilation: javac Alphabet.java
* Execution: java Alphabet
* Dependencies: StdOut.java
*
* A data type for alphabets, for use with string-processing code
* that must convert between an alphabet of size R and the integers
* 0 through R-1.
*
* Warning: supports only the basic multilingual plane (BMP), i.e,
* Unicode characters between U+0000 and U+FFFF.
*
******************************************************************************/
package edu.princeton.cs.algs4;
public class Alphabet {
/**
* The binary alphabet { 0, 1 }.
*/
public static final Alphabet BINARY = new Alphabet("01");
/**
* The octal alphabet { 0, 1, 2, 3, 4, 5, 6, 7 }.
*/
public static final Alphabet OCTAL = new Alphabet("01234567");
/**
* The decimal alphabet { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }.
*/
public static final Alphabet DECIMAL = new Alphabet("0123456789");
/**
* The hexadecimal alphabet { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F }.
*/
public static final Alphabet HEXADECIMAL = new Alphabet("0123456789ABCDEF");
/**
* The DNA alphabet { A, C, T, G }.
*/
public static final Alphabet DNA = new Alphabet("ACGT");
/**
* The lowercase alphabet { a, b, c, ..., z }.
*/
public static final Alphabet LOWERCASE = new Alphabet("abcdefghijklmnopqrstuvwxyz");
/**
* The uppercase alphabet { A, B, C, ..., Z }.
*/
public static final Alphabet UPPERCASE = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
/**
* The protein alphabet { A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y }.
*/
public static final Alphabet PROTEIN = new Alphabet("ACDEFGHIKLMNPQRSTVWY");
/**
* The base-64 alphabet (64 characters).
*/
public static final Alphabet BASE64 = new Alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
/**
* The ASCII alphabet (0-127).
*/
public static final Alphabet ASCII = new Alphabet(128);
/**
* The extended ASCII alphabet (0-255).
*/
public static final Alphabet EXTENDED_ASCII = new Alphabet(256);
/**
* The Unicode 16 alphabet (0-65,535).
*/
public static final Alphabet UNICODE16 = new Alphabet(65536);
private char[] alphabet; // the characters in the alphabet
private int[] inverse; // indices
private final int R; // the radix of the alphabet
/**
* Initializes a new alphabet from the given set of characters.
*
* @param alpha the set of characters
*/
public Alphabet(String alpha) {
// check that alphabet contains no duplicate chars
boolean[] unicode = new boolean[Character.MAX_VALUE];
for (int i = 0; i < alpha.length(); i++) {
char c = alpha.charAt(i);
if (unicode[c])
throw new IllegalArgumentException("Illegal alphabet: repeated character = '" + c + "'");
unicode[c] = true;
}
alphabet = alpha.toCharArray();
R = alpha.length();
inverse = new int[Character.MAX_VALUE];
for (int i = 0; i < inverse.length; i++)
inverse[i] = -1;
// can't use char since R can be as big as 65,536
for (int c = 0; c < R; c++)
inverse[alphabet[c]] = c;
}
/**
* Initializes a new alphabet using characters 0 through R-1.
*
* @param radix the number of characters in the alphabet (the radix R)
*/
private Alphabet(int radix) {
this.R = radix;
alphabet = new char[R];
inverse = new int[R];
// can't use char since R can be as big as 65,536
for (int i = 0; i < R; i++)
alphabet[i] = (char) i;
for (int i = 0; i < R; i++)
inverse[i] = i;
}
/**
* Initializes a new alphabet using characters 0 through 255.
*/
public Alphabet() {
this(256);
}
/**
* Returns true if the argument is a character in this alphabet.
*
* @param c the character
* @return {@code true} if {@code c} is a character in this alphabet;
* {@code false} otherwise
*/
public boolean contains(char c) {
return inverse[c] != -1;
}
/**
* Returns the number of characters in this alphabet (the radix).
*
* @return the number of characters in this alphabet
* @deprecated Replaced by {@link #radix()}.
*/
@Deprecated
public int R() {
return R;
}
/**
* Returns the number of characters in this alphabet (the radix).
*
* @return the number of characters in this alphabet
*/
public int radix() {
return R;
}
/**
* Returns the binary logarithm of the number of characters in this alphabet.
*
* @return the binary logarithm (rounded up) of the number of characters in this alphabet
*/
public int lgR() {
int lgR = 0;
for (int t = R-1; t >= 1; t /= 2)
lgR++;
return lgR;
}
/**
* Returns the index corresponding to the argument character.
*
* @param c the character
* @return the index corresponding to the character {@code c}
* @throws IllegalArgumentException unless {@code c} is a character in this alphabet
*/
public int toIndex(char c) {
if (c >= inverse.length || inverse[c] == -1) {
throw new IllegalArgumentException("Character " + c + " not in alphabet");
}
return inverse[c];
}
/**
* Returns the indices corresponding to the argument characters.
*
* @param s the characters
* @return the indices corresponding to the characters {@code s}
* @throws IllegalArgumentException unless every character in {@code s}
* is a character in this alphabet
*/
public int[] toIndices(String s) {
char[] source = s.toCharArray();
int[] target = new int[s.length()];
for (int i = 0; i < source.length; i++)
target[i] = toIndex(source[i]);
return target;
}
/**
* Returns the character corresponding to the argument index.
*
* @param index the index
* @return the character corresponding to the index {@code index}
* @throws IllegalArgumentException unless {@code 0 <= index < R}
*/
public char toChar(int index) {
if (index < 0 || index >= R) {
throw new IllegalArgumentException("index must be between 0 and " + R + ": " + index);
}
return alphabet[index];
}
/**
* Returns the characters corresponding to the argument indices.
*
* @param indices the indices
* @return the characters corresponding to the indices {@code indices}
* @throws IllegalArgumentException unless {@code 0 < indices[i] < R}
* for every {@code i}
*/
public String toChars(int[] indices) {
StringBuilder s = new StringBuilder(indices.length);
for (int i = 0; i < indices.length; i++)
s.append(toChar(indices[i]));
return s.toString();
}
/**
* Unit tests the {@code Alphabet} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
int[] encoded1 = Alphabet.BASE64.toIndices("NowIsTheTimeForAllGoodMen");
String decoded1 = Alphabet.BASE64.toChars(encoded1);
StdOut.println(decoded1);
int[] encoded2 = Alphabet.DNA.toIndices("AACGAACGGTTTACCCCG");
String decoded2 = Alphabet.DNA.toChars(encoded2);
StdOut.println(decoded2);
int[] encoded3 = Alphabet.DECIMAL.toIndices("01234567890123456789");
String decoded3 = Alphabet.DECIMAL.toChars(encoded3);
StdOut.println(decoded3);
}
}
/******************************************************************************
* Copyright 2002-2018, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/
© 2015 - 2025 Weber Informatics LLC | Privacy Policy