All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.luaj.vm2.LuaString Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
* Copyright (c) 2009-2011 Luaj.org. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
package org.luaj.vm2;


import java.io.ByteArrayInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;

import org.luaj.vm2.lib.MathLib;

/**
 * Subclass of {@link LuaValue} for representing lua strings. 
 * 

* Because lua string values are more nearly sequences of bytes than * sequences of characters or unicode code points, the {@link LuaString} * implementation holds the string value in an internal byte array. *

* {@link LuaString} values are not considered mutable once constructed, * so multiple {@link LuaString} values can chare a single byte array. *

* Currently {@link LuaString}s are pooled via a centrally managed weak table. * To ensure that as many string values as possible take advantage of this, * Constructors are not exposed directly. As with number, booleans, and nil, * instance construction should be via {@link LuaValue#valueOf(byte[])} or similar API. *

* Because of this pooling, users of LuaString must not directly alter the * bytes in a LuaString, or undefined behavior will result. *

* When Java Strings are used to initialize {@link LuaString} data, the UTF8 encoding is assumed. * The functions * {@link #lengthAsUtf8(char[])}, * {@link #encodeToUtf8(char[], int, byte[], int)}, and * {@link #decodeAsUtf8(byte[], int, int)} * are used to convert back and forth between UTF8 byte arrays and character arrays. * * @see LuaValue * @see LuaValue#valueOf(String) * @see LuaValue#valueOf(byte[]) */ public class LuaString extends LuaValue { /** The singleton instance for string metatables that forwards to the string functions. * Typically, this is set to the string metatable as a side effect of loading the string * library, and is read-write to provide flexible behavior by default. When used in a * server environment where there may be roge scripts, this should be replaced with a * read-only table since it is shared across all lua code in this Java VM. */ public static LuaValue s_metatable; /** The bytes for the string. These must not be mutated directly because * the backing may be shared by multiple LuaStrings, and the hash code is * computed only at construction time. * It is exposed only for performance and legacy reasons. */ public final byte[] m_bytes; /** The offset into the byte array, 0 means start at the first byte */ public final int m_offset; /** The number of bytes that comprise this string */ public final int m_length; /** The hashcode for this string. Computed at construct time. */ private final int m_hashcode; /** Size of cache of recent short strings. This is the maximum number of LuaStrings that * will be retained in the cache of recent short strings. Exposed to package for testing. */ static final int RECENT_STRINGS_CACHE_SIZE = 128; /** Maximum length of a string to be considered for recent short strings caching. * This effectively limits the total memory that can be spent on the recent strings cache, * because no LuaString whose backing exceeds this length will be put into the cache. * Exposed to package for testing. */ static final int RECENT_STRINGS_MAX_LENGTH = 32; /** Simple cache of recently created strings that are short. * This is simply a list of strings, indexed by their hash codes modulo the cache size * that have been recently constructed. If a string is being constructed frequently * from different contexts, it will generally show up as a cache hit and resolve * to the same value. */ private static final class RecentShortStrings { private static final LuaString recent_short_strings[] = new LuaString[RECENT_STRINGS_CACHE_SIZE]; } /** * Get a {@link LuaString} instance whose bytes match * the supplied Java String using the UTF8 encoding. * @param string Java String containing characters to encode as UTF8 * @return {@link LuaString} with UTF8 bytes corresponding to the supplied String */ public static LuaString valueOf(String string) { char[] c = string.toCharArray(); byte[] b = new byte[lengthAsUtf8(c)]; encodeToUtf8(c, c.length, b, 0); return valueUsing(b, 0, b.length); } /** Construct a {@link LuaString} for a portion of a byte array. *

* The array is first be used as the backing for this object, so clients must not change contents. * If the supplied value for 'len' is more than half the length of the container, the * supplied byte array will be used as the backing, otherwise the bytes will be copied to a * new byte array, and cache lookup may be performed. *

* @param bytes byte buffer * @param off offset into the byte buffer * @param len length of the byte buffer * @return {@link LuaString} wrapping the byte buffer */ public static LuaString valueOf(byte[] bytes, int off, int len) { if (len > RECENT_STRINGS_MAX_LENGTH) return valueFromCopy(bytes, off, len); final int hash = hashCode(bytes, off, len); final int bucket = hash & (RECENT_STRINGS_CACHE_SIZE - 1); final LuaString t = RecentShortStrings.recent_short_strings[bucket]; if (t != null && t.m_hashcode == hash && t.byteseq(bytes, off, len)) return t; final LuaString s = valueFromCopy(bytes, off, len); RecentShortStrings.recent_short_strings[bucket] = s; return s; } /** Construct a new LuaString using a copy of the bytes array supplied */ private static LuaString valueFromCopy(byte[] bytes, int off, int len) { final byte[] copy = new byte[len]; for (int i=0; i * The caller must ensure that the array is not mutated after the call. * However, if the string is short enough the short-string cache is checked * for a match which may be used instead of the supplied byte array. *

* @param bytes byte buffer * @return {@link LuaString} wrapping the byte buffer, or an equivalent string. */ static public LuaString valueUsing(byte[] bytes, int off, int len) { if (bytes.length > RECENT_STRINGS_MAX_LENGTH) return new LuaString(bytes, off, len); final int hash = hashCode(bytes, off, len); final int bucket = hash & (RECENT_STRINGS_CACHE_SIZE - 1); final LuaString t = RecentShortStrings.recent_short_strings[bucket]; if (t != null && t.m_hashcode == hash && t.byteseq(bytes, off, len)) return t; final LuaString s = new LuaString(bytes, off, len); RecentShortStrings.recent_short_strings[bucket] = s; return s; } /** Construct a {@link LuaString} using the supplied characters as byte values. *

* Only the low-order 8-bits of each character are used, the remainder is ignored. *

* This is most useful for constructing byte sequences that do not conform to UTF8. * @param bytes array of char, whose values are truncated at 8-bits each and put into a byte array. * @return {@link LuaString} wrapping a copy of the byte buffer */ public static LuaString valueOf(char[] bytes) { return valueOf(bytes, 0, bytes.length); } /** Construct a {@link LuaString} using the supplied characters as byte values. *

* Only the low-order 8-bits of each character are used, the remainder is ignored. *

* This is most useful for constructing byte sequences that do not conform to UTF8. * @param bytes array of char, whose values are truncated at 8-bits each and put into a byte array. * @return {@link LuaString} wrapping a copy of the byte buffer */ public static LuaString valueOf(char[] bytes, int off, int len) { byte[] b = new byte[len]; for ( int i=0; i * The LuaString returned will either be a new LuaString containing a copy * of the bytes array, or be an existing LuaString used already having the same value. *

* @param bytes byte buffer * @return {@link LuaString} wrapping the byte buffer */ public static LuaString valueOf(byte[] bytes) { return valueOf(bytes, 0, bytes.length); } /** Construct a {@link LuaString} for all the bytes in a byte array, possibly using * the supplied array as the backing store. *

* The LuaString returned will either be a new LuaString containing the byte array, * or be an existing LuaString used already having the same value. *

* The caller must not mutate the contents of the byte array after this call, as * it may be used elsewhere due to recent short string caching. * @param bytes byte buffer * @return {@link LuaString} wrapping the byte buffer */ public static LuaString valueUsing(byte[] bytes) { return valueUsing(bytes, 0, bytes.length); } /** Construct a {@link LuaString} around a byte array without copying the contents. *

* The array is used directly after this is called, so clients must not change contents. *

* @param bytes byte buffer * @param offset offset into the byte buffer * @param length length of the byte buffer * @return {@link LuaString} wrapping the byte buffer */ private LuaString(byte[] bytes, int offset, int length) { this.m_bytes = bytes; this.m_offset = offset; this.m_length = length; this.m_hashcode = hashCode(bytes, offset, length); } public boolean isstring() { return true; } public LuaValue getmetatable() { return s_metatable; } public int type() { return LuaValue.TSTRING; } public String typename() { return "string"; } public String tojstring() { return decodeAsUtf8(m_bytes, m_offset, m_length); } // unary operators public LuaValue neg() { double d = scannumber(); return Double.isNaN(d)? super.neg(): valueOf(-d); } // basic binary arithmetic public LuaValue add( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(ADD,rhs): rhs.add(d); } public LuaValue add( double rhs ) { return valueOf( checkarith() + rhs ); } public LuaValue add( int rhs ) { return valueOf( checkarith() + rhs ); } public LuaValue sub( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(SUB,rhs): rhs.subFrom(d); } public LuaValue sub( double rhs ) { return valueOf( checkarith() - rhs ); } public LuaValue sub( int rhs ) { return valueOf( checkarith() - rhs ); } public LuaValue subFrom( double lhs ) { return valueOf( lhs - checkarith() ); } public LuaValue mul( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(MUL,rhs): rhs.mul(d); } public LuaValue mul( double rhs ) { return valueOf( checkarith() * rhs ); } public LuaValue mul( int rhs ) { return valueOf( checkarith() * rhs ); } public LuaValue pow( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(POW,rhs): rhs.powWith(d); } public LuaValue pow( double rhs ) { return MathLib.dpow(checkarith(),rhs); } public LuaValue pow( int rhs ) { return MathLib.dpow(checkarith(),rhs); } public LuaValue powWith( double lhs ) { return MathLib.dpow(lhs, checkarith()); } public LuaValue powWith( int lhs ) { return MathLib.dpow(lhs, checkarith()); } public LuaValue div( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(DIV,rhs): rhs.divInto(d); } public LuaValue div( double rhs ) { return LuaDouble.ddiv(checkarith(),rhs); } public LuaValue div( int rhs ) { return LuaDouble.ddiv(checkarith(),rhs); } public LuaValue divInto( double lhs ) { return LuaDouble.ddiv(lhs, checkarith()); } public LuaValue mod( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(MOD,rhs): rhs.modFrom(d); } public LuaValue mod( double rhs ) { return LuaDouble.dmod(checkarith(), rhs); } public LuaValue mod( int rhs ) { return LuaDouble.dmod(checkarith(), rhs); } public LuaValue modFrom( double lhs ) { return LuaDouble.dmod(lhs, checkarith()); } // relational operators, these only work with other strings public LuaValue lt( LuaValue rhs ) { return rhs.strcmp(this)>0? LuaValue.TRUE: FALSE; } public boolean lt_b( LuaValue rhs ) { return rhs.strcmp(this)>0; } public boolean lt_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean lt_b( double rhs ) { typerror("attempt to compare string with number"); return false; } public LuaValue lteq( LuaValue rhs ) { return rhs.strcmp(this)>=0? LuaValue.TRUE: FALSE; } public boolean lteq_b( LuaValue rhs ) { return rhs.strcmp(this)>=0; } public boolean lteq_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean lteq_b( double rhs ) { typerror("attempt to compare string with number"); return false; } public LuaValue gt( LuaValue rhs ) { return rhs.strcmp(this)<0? LuaValue.TRUE: FALSE; } public boolean gt_b( LuaValue rhs ) { return rhs.strcmp(this)<0; } public boolean gt_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean gt_b( double rhs ) { typerror("attempt to compare string with number"); return false; } public LuaValue gteq( LuaValue rhs ) { return rhs.strcmp(this)<=0? LuaValue.TRUE: FALSE; } public boolean gteq_b( LuaValue rhs ) { return rhs.strcmp(this)<=0; } public boolean gteq_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean gteq_b( double rhs ) { typerror("attempt to compare string with number"); return false; } // concatenation public LuaValue concat(LuaValue rhs) { return rhs.concatTo(this); } public Buffer concat(Buffer rhs) { return rhs.concatTo(this); } public LuaValue concatTo(LuaNumber lhs) { return concatTo(lhs.strvalue()); } public LuaValue concatTo(LuaString lhs) { byte[] b = new byte[lhs.m_length+this.m_length]; System.arraycopy(lhs.m_bytes, lhs.m_offset, b, 0, lhs.m_length); System.arraycopy(this.m_bytes, this.m_offset, b, lhs.m_length, this.m_length); return valueUsing(b, 0, b.length); } // string comparison public int strcmp(LuaValue lhs) { return -lhs.strcmp(this); } public int strcmp(LuaString rhs) { for ( int i=0, j=0; i= m_length / 2? valueUsing(m_bytes, off, len): valueOf(m_bytes, off, len); } public int hashCode() { return m_hashcode; } /** Compute the hash code of a sequence of bytes within a byte array using * lua's rules for string hashes. For long strings, not all bytes are hashed. * @param bytes byte array containing the bytes. * @param offset offset into the hash for the first byte. * @param length number of bytes starting with offset that are part of the string. * @return hash for the string defined by bytes, offset, and length. */ public static int hashCode(byte[] bytes, int offset, int length) { int h = length; /* seed */ int step = (length>>5)+1; /* if string is too long, don't hash all its chars */ for (int l1=length; l1>=step; l1-=step) /* compute hash */ h = h ^ ((h<<5)+(h>>2)+(((int) bytes[offset+l1-1] ) & 0x0FF )); return h; } // object comparison, used in key comparison public boolean equals( Object o ) { if ( o instanceof LuaString ) { return raweq( (LuaString) o ); } return false; } // equality w/ metatable processing public LuaValue eq( LuaValue val ) { return val.raweq(this)? TRUE: FALSE; } public boolean eq_b( LuaValue val ) { return val.raweq(this); } // equality w/o metatable processing public boolean raweq( LuaValue val ) { return val.raweq(this); } public boolean raweq( LuaString s ) { if ( this == s ) return true; if ( s.m_length != m_length ) return false; if ( s.m_bytes == m_bytes && s.m_offset == m_offset ) return true; if ( s.hashCode() != hashCode() ) return false; for ( int i=0; i=0 ) if ( a[i++]!=b[j++] ) return false; return true; } public void write(DataOutputStream writer, int i, int len) throws IOException { writer.write(m_bytes,m_offset+i,len); } public LuaValue len() { return LuaInteger.valueOf(m_length); } public int length() { return m_length; } public int rawlen() { return m_length; } public int luaByte(int index) { return m_bytes[m_offset + index] & 0x0FF; } public int charAt( int index ) { if ( index < 0 || index >= m_length ) throw new IndexOutOfBoundsException(); return luaByte( index ); } public String checkjstring() { return tojstring(); } public LuaString checkstring() { return this; } /** Convert value to an input stream. * * @return {@link InputStream} whose data matches the bytes in this {@link LuaString} */ public InputStream toInputStream() { return new ByteArrayInputStream(m_bytes, m_offset, m_length); } /** * Copy the bytes of the string into the given byte array. * @param strOffset offset from which to copy * @param bytes destination byte array * @param arrayOffset offset in destination * @param len number of bytes to copy */ public void copyInto( int strOffset, byte[] bytes, int arrayOffset, int len ) { System.arraycopy( m_bytes, m_offset+strOffset, bytes, arrayOffset, len ); } /** Java version of strpbrk - find index of any byte that in an accept string. * @param accept {@link LuaString} containing characters to look for. * @return index of first match in the {@code accept} string, or -1 if not found. */ public int indexOfAny( LuaString accept ) { final int ilimit = m_offset + m_length; final int jlimit = accept.m_offset + accept.m_length; for ( int i = m_offset; i < ilimit; ++i ) { for ( int j = accept.m_offset; j < jlimit; ++j ) { if ( m_bytes[i] == accept.m_bytes[j] ) { return i - m_offset; } } } return -1; } /** * Find the index of a byte starting at a point in this string * @param b the byte to look for * @param start the first index in the string * @return index of first match found, or -1 if not found. */ public int indexOf( byte b, int start ) { for ( int i=start; i < m_length; ++i ) { if ( m_bytes[m_offset+i] == b ) return i; } return -1; } /** * Find the index of a string starting at a point in this string * @param s the string to search for * @param start the first index in the string * @return index of first match found, or -1 if not found. */ public int indexOf( LuaString s, int start ) { final int slen = s.length(); final int limit = m_length - slen; for ( int i=start; i <= limit; ++i ) { if ( equals( m_bytes, m_offset+i, s.m_bytes, s.m_offset, slen ) ) return i; } return -1; } /** * Find the last index of a string in this string * @param s the string to search for * @return index of last match found, or -1 if not found. */ public int lastIndexOf( LuaString s ) { final int slen = s.length(); final int limit = m_length - slen; for ( int i=limit; i >= 0; --i ) { if ( equals( m_bytes, m_offset+i, s.m_bytes, s.m_offset, slen ) ) return i; } return -1; } /** * Convert to Java String interpreting as utf8 characters. * * @param bytes byte array in UTF8 encoding to convert * @param offset starting index in byte array * @param length number of bytes to convert * @return Java String corresponding to the value of bytes interpreted using UTF8 * @see #lengthAsUtf8(char[]) * @see #encodeToUtf8(char[], int, byte[], int) * @see #isValidUtf8() */ public static String decodeAsUtf8(byte[] bytes, int offset, int length) { int i,j,n,b; for ( i=offset,j=offset+length,n=0; i=0||i>=j)? b: (b<-32||i+1>=j)? (((b&0x3f) << 6) | (bytes[i++]&0x3f)): (((b&0xf) << 12) | ((bytes[i++]&0x3f)<<6) | (bytes[i++]&0x3f))); } return new String(chars); } /** * Count the number of bytes required to encode the string as UTF-8. * @param chars Array of unicode characters to be encoded as UTF-8 * @return count of bytes needed to encode using UTF-8 * @see #encodeToUtf8(char[], int, byte[], int) * @see #decodeAsUtf8(byte[], int, int) * @see #isValidUtf8() */ public static int lengthAsUtf8(char[] chars) { int i,b; char c; for ( i=b=chars.length; --i>=0; ) if ( (c=chars[i]) >=0x80 ) b += (c>=0x800)? 2: 1; return b; } /** * Encode the given Java string as UTF-8 bytes, writing the result to bytes * starting at offset. *

* The string should be measured first with lengthAsUtf8 * to make sure the given byte array is large enough. * @param chars Array of unicode characters to be encoded as UTF-8 * @param nchars Number of characters in the array to convert. * @param bytes byte array to hold the result * @param off offset into the byte array to start writing * @return number of bytes converted. * @see #lengthAsUtf8(char[]) * @see #decodeAsUtf8(byte[], int, int) * @see #isValidUtf8() */ public static int encodeToUtf8(char[] chars, int nchars, byte[] bytes, int off) { char c; int j = off; for ( int i=0; i>6) & 0x1f)); bytes[j++] = (byte) (0x80 | ( c & 0x3f)); } else { bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f)); bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f)); bytes[j++] = (byte) (0x80 | ( c & 0x3f)); } } return j - off; } /** Check that a byte sequence is valid UTF-8 * @return true if it is valid UTF-8, otherwise false * @see #lengthAsUtf8(char[]) * @see #encodeToUtf8(char[], int, byte[], int) * @see #decodeAsUtf8(byte[], int, int) */ public boolean isValidUtf8() { for (int i=m_offset,j=m_offset+m_length; i= 0 ) continue; if ( ((c & 0xE0) == 0xC0) && i=j ) return Double.NaN; if ( m_bytes[i]=='0' && i+1 36 ) return Double.NaN; int i=m_offset,j=m_offset+m_length; while ( i=j ) return Double.NaN; return scanlong( base, i, j ); } /** * Scan and convert a long value, or return Double.NaN if not found. * @param base the base to use, such as 10 * @param start the index to start searching from * @param end the first index beyond the search range * @return double value if conversion is valid, * or Double.NaN if not */ private double scanlong( int base, int start, int end ) { long x = 0; boolean neg = (m_bytes[start] == '-'); for ( int i=(neg?start+1:start); i='0'&&m_bytes[i]<='9')? '0': m_bytes[i]>='A'&&m_bytes[i]<='Z'? ('A'-10): ('a'-10)); if ( digit < 0 || digit >= base ) return Double.NaN; x = x * base + digit; if ( x < 0 ) return Double.NaN; // overflow } return neg? -x: x; } /** * Scan and convert a double value, or return Double.NaN if not a double. * @param start the index to start searching from * @param end the first index beyond the search range * @return double value if conversion is valid, * or Double.NaN if not */ private double scandouble(int start, int end) { if ( end>start+64 ) end=start+64; for ( int i=start; i





© 2015 - 2024 Weber Informatics LLC | Privacy Policy