All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.luaj.vm2.LuaString Maven / Gradle / Ivy

There is a newer version: 3.0.1
Show newest version
/*******************************************************************************
* Copyright (c) 2009-2011 Luaj.org. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* 
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
package org.luaj.vm2;


import java.io.ByteArrayInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.ref.WeakReference;
import java.util.Hashtable;

import org.luaj.vm2.lib.MathLib;
import org.luaj.vm2.lib.StringLib;

/**
 * Subclass of {@link LuaValue} for representing lua strings. 
 * 

* Because lua string values are more nearly sequences of bytes than * sequences of characters or unicode code points, the {@link LuaString} * implementation holds the string value in an internal byte array. *

* {@link LuaString} values are generally not mutable once constructed, * so multiple {@link LuaString} values can chare a single byte array. *

* Currently {@link LuaString}s are pooled via a centrally managed weak table. * To ensure that as many string values as possible take advantage of this, * Constructors are not exposed directly. As with number, booleans, and nil, * instance construction should be via {@link LuaValue#valueOf(byte[])} or similar API. *

* When Java Strings are used to initialize {@link LuaString} data, the UTF8 encoding is assumed. * The functions * {@link LuaString#lengthAsUtf8(char[]), * {@link LuaString#encodeToUtf8(char[], byte[], int)}, and * {@link LuaString#decodeAsUtf8(byte[], int, int) * are used to convert back and forth between UTF8 byte arrays and character arrays. * * @see LuaValue * @see LuaValue#valueOf(String) * @see LuaValue#valueOf(byte[]) */ public class LuaString extends LuaValue { /** The singleton instance representing lua {@code true} */ public static LuaValue s_metatable; /** The bytes for the string */ public final byte[] m_bytes; /** The offset into the byte array, 0 means start at the first byte */ public final int m_offset; /** The number of bytes that comprise this string */ public final int m_length; private static final Hashtable index_java = new Hashtable(); private final static LuaString index_get(Hashtable indextable, Object key) { WeakReference w = (WeakReference) indextable.get(key); return w!=null? (LuaString) w.get(): null; } private final static void index_set(Hashtable indextable, Object key, LuaString value) { indextable.put(key, new WeakReference(value)); } /** * Get a {@link LuaString} instance whose bytes match * the supplied Java String using the UTF8 encoding. * @param string Java String containing characters to encode as UTF8 * @return {@link LuaString} with UTF8 bytes corresponding to the supplied String */ public static LuaString valueOf(String string) { LuaString s = index_get( index_java, string ); if ( s != null ) return s; char[] c = string.toCharArray(); byte[] b = new byte[lengthAsUtf8(c)]; encodeToUtf8(c, b, 0); s = valueOf(b, 0, b.length); index_set( index_java, string, s ); return s; } // TODO: should this be deprecated or made private? /** Construct a {@link LuaString} around a byte array without copying the contents. *

* The array is used directly after this is called, so clients must not change contents. *

* @param bytes byte buffer * @param off offset into the byte buffer * @param len length of the byte buffer * @return {@link LuaString} wrapping the byte buffer */ public static LuaString valueOf(byte[] bytes, int off, int len) { return new LuaString(bytes, off, len); } /** Construct a {@link LuaString} using the supplied characters as byte values. *

* Only th elow-order 8-bits of each character are used, the remainder is ignored. *

* This is most useful for constructing byte sequences that do not conform to UTF8. * @param bytes array of char, whose values are truncated at 8-bits each and put into a byte array. * @return {@link LuaString} wrapping a copy of the byte buffer */ public static LuaString valueOf(char[] bytes) { int n = bytes.length; byte[] b = new byte[n]; for ( int i=0; i * The array is used directly after this is called, so clients must not change contents. *

* @param bytes byte buffer * @return {@link LuaString} wrapping the byte buffer */ public static LuaString valueOf(byte[] bytes) { return valueOf(bytes, 0, bytes.length); } /** Construct a {@link LuaString} around a byte array without copying the contents. *

* The array is used directly after this is called, so clients must not change contents. *

* @param bytes byte buffer * @param offset offset into the byte buffer * @param length length of the byte buffer * @return {@link LuaString} wrapping the byte buffer */ private LuaString(byte[] bytes, int offset, int length) { this.m_bytes = bytes; this.m_offset = offset; this.m_length = length; } public boolean isstring() { return true; } public LuaValue getmetatable() { return s_metatable; } public int type() { return LuaValue.TSTRING; } public String typename() { return "string"; } public String tojstring() { return decodeAsUtf8(m_bytes, m_offset, m_length); } // get is delegated to the string library public LuaValue get(LuaValue key) { return s_metatable!=null? gettable(this,key): StringLib.instance.get(key); } // unary operators public LuaValue neg() { double d = scannumber(); return Double.isNaN(d)? super.neg(): valueOf(-d); } // basic binary arithmetic public LuaValue add( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(ADD,rhs): rhs.add(d); } public LuaValue add( double rhs ) { return valueOf( checkarith() + rhs ); } public LuaValue add( int rhs ) { return valueOf( checkarith() + rhs ); } public LuaValue sub( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(SUB,rhs): rhs.subFrom(d); } public LuaValue sub( double rhs ) { return valueOf( checkarith() - rhs ); } public LuaValue sub( int rhs ) { return valueOf( checkarith() - rhs ); } public LuaValue subFrom( double lhs ) { return valueOf( lhs - checkarith() ); } public LuaValue mul( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(MUL,rhs): rhs.mul(d); } public LuaValue mul( double rhs ) { return valueOf( checkarith() * rhs ); } public LuaValue mul( int rhs ) { return valueOf( checkarith() * rhs ); } public LuaValue pow( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(POW,rhs): rhs.powWith(d); } public LuaValue pow( double rhs ) { return MathLib.dpow(checkarith(),rhs); } public LuaValue pow( int rhs ) { return MathLib.dpow(checkarith(),rhs); } public LuaValue powWith( double lhs ) { return MathLib.dpow(lhs, checkarith()); } public LuaValue powWith( int lhs ) { return MathLib.dpow(lhs, checkarith()); } public LuaValue div( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(DIV,rhs): rhs.divInto(d); } public LuaValue div( double rhs ) { return LuaDouble.ddiv(checkarith(),rhs); } public LuaValue div( int rhs ) { return LuaDouble.ddiv(checkarith(),rhs); } public LuaValue divInto( double lhs ) { return LuaDouble.ddiv(lhs, checkarith()); } public LuaValue mod( LuaValue rhs ) { double d = scannumber(); return Double.isNaN(d)? arithmt(MOD,rhs): rhs.modFrom(d); } public LuaValue mod( double rhs ) { return LuaDouble.dmod(checkarith(), rhs); } public LuaValue mod( int rhs ) { return LuaDouble.dmod(checkarith(), rhs); } public LuaValue modFrom( double lhs ) { return LuaDouble.dmod(lhs, checkarith()); } // relational operators, these only work with other strings public LuaValue lt( LuaValue rhs ) { return rhs.strcmp(this)>0? LuaValue.TRUE: FALSE; } public boolean lt_b( LuaValue rhs ) { return rhs.strcmp(this)>0; } public boolean lt_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean lt_b( double rhs ) { typerror("attempt to compare string with number"); return false; } public LuaValue lteq( LuaValue rhs ) { return rhs.strcmp(this)>=0? LuaValue.TRUE: FALSE; } public boolean lteq_b( LuaValue rhs ) { return rhs.strcmp(this)>=0; } public boolean lteq_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean lteq_b( double rhs ) { typerror("attempt to compare string with number"); return false; } public LuaValue gt( LuaValue rhs ) { return rhs.strcmp(this)<0? LuaValue.TRUE: FALSE; } public boolean gt_b( LuaValue rhs ) { return rhs.strcmp(this)<0; } public boolean gt_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean gt_b( double rhs ) { typerror("attempt to compare string with number"); return false; } public LuaValue gteq( LuaValue rhs ) { return rhs.strcmp(this)<=0? LuaValue.TRUE: FALSE; } public boolean gteq_b( LuaValue rhs ) { return rhs.strcmp(this)<=0; } public boolean gteq_b( int rhs ) { typerror("attempt to compare string with number"); return false; } public boolean gteq_b( double rhs ) { typerror("attempt to compare string with number"); return false; } // concatenation public LuaValue concat(LuaValue rhs) { return rhs.concatTo(this); } public Buffer concat(Buffer rhs) { return rhs.concatTo(this); } public LuaValue concatTo(LuaNumber lhs) { return concatTo(lhs.strvalue()); } public LuaValue concatTo(LuaString lhs) { byte[] b = new byte[lhs.m_length+this.m_length]; System.arraycopy(lhs.m_bytes, lhs.m_offset, b, 0, lhs.m_length); System.arraycopy(this.m_bytes, this.m_offset, b, lhs.m_length, this.m_length); return new LuaString(b, 0, b.length); } // string comparison public int strcmp(LuaValue lhs) { return -lhs.strcmp(this); } public int strcmp(LuaString rhs) { for ( int i=0, j=0; i>5)+1; /* if string is too long, don't hash all its chars */ for (int l1=m_length; l1>=step; l1-=step) /* compute hash */ h = h ^ ((h<<5)+(h>>2)+(((int) m_bytes[m_offset+l1-1] ) & 0x0FF )); return h; } // object comparison, used in key comparison public boolean equals( Object o ) { if ( o instanceof LuaString ) { return raweq( (LuaString) o ); } return false; } // equality w/ metatable processing public LuaValue eq( LuaValue val ) { return val.raweq(this)? TRUE: FALSE; } public boolean eq_b( LuaValue val ) { return val.raweq(this); } // equality w/o metatable processing public boolean raweq( LuaValue val ) { return val.raweq(this); } public boolean raweq( LuaString s ) { if ( this == s ) return true; if ( s.m_length != m_length ) return false; if ( s.m_bytes == m_bytes && s.m_offset == m_offset ) return true; if ( s.hashCode() != hashCode() ) return false; for ( int i=0; i=0 ) if ( a[i++]!=b[j++] ) return false; return true; } public void write(DataOutputStream writer, int i, int len) throws IOException { writer.write(m_bytes,m_offset+i,len); } public LuaValue len() { return LuaInteger.valueOf(m_length); } public int length() { return m_length; } public int rawlen() { return m_length; } public int luaByte(int index) { return m_bytes[m_offset + index] & 0x0FF; } public int charAt( int index ) { if ( index < 0 || index >= m_length ) throw new IndexOutOfBoundsException(); return luaByte( index ); } public String checkjstring() { return tojstring(); } public LuaString checkstring() { return this; } /** Convert value to an input stream. * * @return {@link InputStream} whose data matches the bytes in this {@link LuaString} */ public InputStream toInputStream() { return new ByteArrayInputStream(m_bytes, m_offset, m_length); } /** * Copy the bytes of the string into the given byte array. * @param strOffset offset from which to copy * @param bytes destination byte array * @param arrayOffset offset in destination * @param len number of bytes to copy */ public void copyInto( int strOffset, byte[] bytes, int arrayOffset, int len ) { System.arraycopy( m_bytes, m_offset+strOffset, bytes, arrayOffset, len ); } /** Java version of strpbrk - find index of any byte that in an accept string. * @param accept {@link LuaString} containing characters to look for. * @return index of first match in the {@code accept} string, or -1 if not found. */ public int indexOfAny( LuaString accept ) { final int ilimit = m_offset + m_length; final int jlimit = accept.m_offset + accept.m_length; for ( int i = m_offset; i < ilimit; ++i ) { for ( int j = accept.m_offset; j < jlimit; ++j ) { if ( m_bytes[i] == accept.m_bytes[j] ) { return i - m_offset; } } } return -1; } /** * Find the index of a byte starting at a point in this string * @param b the byte to look for * @param start the first index in the string * @return index of first match found, or -1 if not found. */ public int indexOf( byte b, int start ) { for ( int i=0, j=m_offset+start; i < m_length; ++i ) { if ( m_bytes[j++] == b ) return i; } return -1; } /** * Find the index of a string starting at a point in this string * @param s the string to search for * @param start the first index in the string * @return index of first match found, or -1 if not found. */ public int indexOf( LuaString s, int start ) { final int slen = s.length(); final int limit = m_offset + m_length - slen; for ( int i = m_offset + start; i <= limit; ++i ) { if ( equals( m_bytes, i, s.m_bytes, s.m_offset, slen ) ) { return i; } } return -1; } /** * Find the last index of a string in this string * @param s the string to search for * @return index of last match found, or -1 if not found. */ public int lastIndexOf( LuaString s ) { final int slen = s.length(); final int limit = m_offset + m_length - slen; for ( int i = limit; i >= m_offset; --i ) { if ( equals( m_bytes, i, s.m_bytes, s.m_offset, slen ) ) { return i; } } return -1; } /** * Convert to Java String interpreting as utf8 characters. * * @param bytes byte array in UTF8 encoding to convert * @param offset starting index in byte array * @param length number of bytes to convert * @return Java String corresponding to the value of bytes interpreted using UTF8 * @see #lengthAsUtf8(char[]) * @see #encodeToUtf8(char[], byte[], int) * @see #isValidUtf8() */ public static String decodeAsUtf8(byte[] bytes, int offset, int length) { int i,j,n,b; for ( i=offset,j=offset+length,n=0; i=0||i>=j)? b: (b<-32||i+1>=j)? (((b&0x3f) << 6) | (bytes[i++]&0x3f)): (((b&0xf) << 12) | ((bytes[i++]&0x3f)<<6) | (bytes[i++]&0x3f))); } return new String(chars); } /** * Count the number of bytes required to encode the string as UTF-8. * @param chars Array of unicode characters to be encoded as UTF-8 * @return count of bytes needed to encode using UTF-8 * @see #encodeToUtf8(char[], byte[], int) * @see #decodeAsUtf8(byte[], int, int) * @see #isValidUtf8() */ public static int lengthAsUtf8(char[] chars) { int i,b; char c; for ( i=b=chars.length; --i>=0; ) if ( (c=chars[i]) >=0x80 ) b += (c>=0x800)? 2: 1; return b; } /** * Encode the given Java string as UTF-8 bytes, writing the result to bytes * starting at offset. *

* The string should be measured first with lengthAsUtf8 * to make sure the given byte array is large enough. * @param chars Array of unicode characters to be encoded as UTF-8 * @param bytes byte array to hold the result * @param off offset into the byte array to start writing * @see #lengthAsUtf8(char[]) * @see #decodeAsUtf8(byte[], int, int) * @see #isValidUtf8() */ public static void encodeToUtf8(char[] chars, byte[] bytes, int off) { final int n = chars.length; char c; for ( int i=0, j=off; i>6) & 0x1f)); bytes[j++] = (byte) (0x80 | ( c & 0x3f)); } else { bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f)); bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f)); bytes[j++] = (byte) (0x80 | ( c & 0x3f)); } } } /** Check that a byte sequence is valid UTF-8 * @return true if it is valid UTF-8, otherwise false * @see #lengthAsUtf8(char[]) * @see #encodeToUtf8(char[], byte[], int) * @see #decodeAsUtf8(byte[], int, int) */ public boolean isValidUtf8() { int i,j,n,b,e=0; for ( i=m_offset,j=m_offset+m_length,n=0; i= 0 ) continue; if ( ((c & 0xE0) == 0xC0) && i=j ) return Double.NaN; if ( m_bytes[i]=='0' && i+1 36 ) return Double.NaN; int i=m_offset,j=m_offset+m_length; while ( i=j ) return Double.NaN; return scanlong( base, i, j ); } /** * Scan and convert a long value, or return Double.NaN if not found. * @param base the base to use, such as 10 * @param start the index to start searching from * @param end the first index beyond the search range * @return double value if conversion is valid, * or Double.NaN if not */ private double scanlong( int base, int start, int end ) { long x = 0; boolean neg = (m_bytes[start] == '-'); for ( int i=(neg?start+1:start); i='0'&&m_bytes[i]<='9')? '0': m_bytes[i]>='A'&&m_bytes[i]<='Z'? ('A'-10): ('a'-10)); if ( digit < 0 || digit >= base ) return Double.NaN; x = x * base + digit; } return neg? -x: x; } /** * Scan and convert a double value, or return Double.NaN if not a double. * @param start the index to start searching from * @param end the first index beyond the search range * @return double value if conversion is valid, * or Double.NaN if not */ private double scandouble(int start, int end) { if ( end>start+64 ) end=start+64; for ( int i=start; i





© 2015 - 2024 Weber Informatics LLC | Privacy Policy