
org.jruby.util.io.EncodingUtils Maven / Gradle / Ivy
package org.jruby.util.io;
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jcodings.Ptr;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.exception.EncodingError;
import org.jcodings.exception.EncodingException;
import org.jcodings.exception.ErrorCodes;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jcodings.specific.UTF16BEEncoding;
import org.jcodings.specific.UTF16LEEncoding;
import org.jcodings.specific.UTF32BEEncoding;
import org.jcodings.specific.UTF32LEEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.transcode.EConv;
import org.jcodings.transcode.EConvFlags;
import org.jcodings.transcode.EConvResult;
import org.jcodings.transcode.Transcoder;
import org.jcodings.transcode.TranscoderDB;
import org.jcodings.transcode.Transcoding;
import org.jcodings.unicode.UnicodeEncoding;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyBasicObject;
import org.jruby.RubyConverter;
import org.jruby.RubyEncoding;
import org.jruby.RubyFixnum;
import org.jruby.RubyHash;
import org.jruby.RubyIO;
import org.jruby.RubyMethod;
import org.jruby.RubyNumeric;
import org.jruby.RubyProc;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.exceptions.RaiseException;
import org.jruby.platform.Platform;
import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.runtime.encoding.EncodingCapable;
import org.jruby.runtime.encoding.EncodingService;
import org.jruby.util.ByteList;
import org.jruby.util.ByteListHolder;
import org.jruby.util.CodeRangeSupport;
import org.jruby.util.CodeRangeable;
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.jruby.RubyString.*;
import static org.jruby.util.StringSupport.CR_UNKNOWN;
import static org.jruby.util.StringSupport.searchNonAscii;
public class EncodingUtils {
public static final int ECONV_DEFAULT_NEWLINE_DECORATOR = Platform.IS_WINDOWS ? EConvFlags.CRLF_NEWLINE_DECORATOR : 0;
public static final int DEFAULT_TEXTMODE = Platform.IS_WINDOWS ? OpenFile.TEXTMODE : 0;
public static final int TEXTMODE_NEWLINE_DECORATOR_ON_WRITE = Platform.IS_WINDOWS ? EConvFlags.CRLF_NEWLINE_DECORATOR : 0;
private static final byte[] NULL_BYTE_ARRAY = ByteList.NULL_ARRAY;
// rb_to_encoding
public static Encoding rbToEncoding(ThreadContext context, IRubyObject enc) {
if (enc instanceof RubyEncoding) return ((RubyEncoding) enc).getEncoding();
return toEncoding(context, enc);
}
// to_encoding
public static Encoding toEncoding(ThreadContext context, IRubyObject enc) {
RubyString encStr = enc.convertToString();
if (!encStr.getEncoding().isAsciiCompatible()) {
throw context.runtime.newArgumentError("invalid name encoding (non ASCII)");
}
Encoding idx = context.runtime.getEncodingService().getEncodingFromObject(encStr);
// check for missing encoding is in getEncodingFromObject
return idx;
}
public static IRubyObject[] openArgsToArgs(Ruby runtime, IRubyObject firstElement, RubyHash options) {
IRubyObject value = hashARef(runtime, options, "open_args");
if (value.isNil()) return new IRubyObject[] { firstElement, options };
RubyArray array = value.convertToArray();
IRubyObject[] openArgs = new IRubyObject[array.size()];
value.convertToArray().toArray(openArgs);
IRubyObject[] args = new IRubyObject[openArgs.length + 1];
args[0] = firstElement;
System.arraycopy(openArgs, 0, args, 1, openArgs.length);
return args;
}
// FIXME: This could be smarter amount determining whether optionsArg is a RubyHash and !null (invariant)
// mri: extract_binmode
public static void extractBinmode(Ruby runtime, IRubyObject optionsArg, int[] fmode_p) {
int fmodeMask = 0;
IRubyObject v = hashARef(runtime, optionsArg, "textmode");
if (!v.isNil() && v.isTrue()) fmodeMask |= OpenFile.TEXTMODE;
v = hashARef(runtime, optionsArg, "binmode");
if (!v.isNil() && v.isTrue()) fmodeMask |= OpenFile.BINMODE;
if ((fmodeMask & OpenFile.BINMODE) != 0 && (fmodeMask & OpenFile.TEXTMODE) != 0) {
throw runtime.newArgumentError("both textmode and binmode specified");
}
fmode_p[0] |= fmodeMask;
}
private static IRubyObject hashARef(Ruby runtime, IRubyObject hash, String symbol) {
if (hash == null || !(hash instanceof RubyHash)) return runtime.getNil();
IRubyObject value = ((RubyHash) hash).fastARef(runtime.newSymbol(symbol));
return value == null ? runtime.getNil() : value;
}
public static Encoding ascii8bitEncoding(Ruby runtime) {
return runtime.getEncodingService().getAscii8bitEncoding();
}
static final int VMODE = 0;
static final int PERM = 1;
public static Object vmodeVperm(IRubyObject vmode, IRubyObject vperm) {
return new IRubyObject[] {vmode, vperm};
}
public static IRubyObject vmode(Object vmodeVperm) {
return ((IRubyObject[])vmodeVperm)[VMODE];
}
public static void vmode(Object vmodeVperm, IRubyObject vmode) {
((IRubyObject[])vmodeVperm)[VMODE] = vmode;
}
public static IRubyObject vperm(Object vmodeVperm) {
return ((IRubyObject[])vmodeVperm)[PERM];
}
public static void vperm(Object vmodeVperm, IRubyObject vperm) {
((IRubyObject[])vmodeVperm)[PERM] = vperm;
}
public static final int MODE_BTMODE(int fmode, int a, int b, int c) {
if ((fmode & OpenFile.BINMODE) != 0) {
return b;
} else if ((fmode & OpenFile.TEXTMODE) != 0) {
return c;
}
return a;
}
public static int SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(Encoding enc2, int ecflags) {
if (enc2 != null && (ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) != 0) {
return ecflags | EConvFlags.UNIVERSAL_NEWLINE_DECORATOR;
}
return ecflags;
}
/*
* This is a wacky method which is a very near port from MRI. pm passes in
* a permissions value and a mode value. As a side-effect mode will get set
* if this found any 'mode'-like stuff so the caller can know whether mode
* has been handled yet. The same story for permission value. If it has
* not been set then we know it needs to default permissions from the caller.
*/
// mri: rb_io_extract_modeenc
public static void extractModeEncoding(ThreadContext context,
IOEncodable ioEncodable, Object vmodeAndVperm_p, IRubyObject options, int[] oflags_p, int[] fmode_p) {
Ruby runtime = context.runtime;
int ecflags;
IRubyObject[] ecopts_p = {context.nil};
boolean hasEnc = false, hasVmode = false;
IRubyObject intmode;
// Give default encodings
ioExtIntToEncs(context, ioEncodable, null, null, 0);
vmode_handle: do {
if (vmode(vmodeAndVperm_p) == null || vmode(vmodeAndVperm_p).isNil()) {
fmode_p[0] = OpenFile.READABLE;
oflags_p[0] = ModeFlags.RDONLY;
} else {
intmode = TypeConverter.checkIntegerType(context, vmode(vmodeAndVperm_p));
if (!intmode.isNil()) {
vmode(vmodeAndVperm_p, intmode);
oflags_p[0] = RubyNumeric.num2int(intmode);
fmode_p[0] = ModeFlags.getOpenFileFlagsFor(oflags_p[0]);
} else {
String p = vmode(vmodeAndVperm_p).convertToString().asJavaString();
fmode_p[0] = OpenFile.ioModestrFmode(runtime, p);
oflags_p[0] = OpenFile.ioFmodeOflags(fmode_p[0]);
int colonSplit = p.indexOf(":");
if (colonSplit != -1) {
hasEnc = true;
parseModeEncoding(context, ioEncodable, p.substring(colonSplit + 1), fmode_p);
} else {
Encoding e = (fmode_p[0] & OpenFile.BINMODE) != 0 ? ascii8bitEncoding(runtime) : null;
ioExtIntToEncs(context, ioEncodable, e, null, fmode_p[0]);
}
}
}
if (options == null || options.isNil()) {
ecflags = (fmode_p[0] & OpenFile.READABLE) != 0
? MODE_BTMODE(fmode_p[0], ECONV_DEFAULT_NEWLINE_DECORATOR, 0, EConvFlags.UNIVERSAL_NEWLINE_DECORATOR)
: 0;
if (TEXTMODE_NEWLINE_DECORATOR_ON_WRITE != 0) {
ecflags |= (fmode_p[0] & OpenFile.WRITABLE) != 0
? MODE_BTMODE(fmode_p[0], TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE)
: 0;
}
ecflags = SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(ioEncodable.getEnc2(), ecflags);
ecopts_p[0] = context.nil;
} else {
if (!hasVmode) {
IRubyObject v = ((RubyHash) options).op_aref(context, runtime.newSymbol("mode"));
if (!v.isNil()) {
if (vmode(vmodeAndVperm_p) != null && !vmode(vmodeAndVperm_p).isNil()) {
throw runtime.newArgumentError("mode specified twice");
}
hasVmode = true;
vmode(vmodeAndVperm_p, v);
continue vmode_handle;
}
}
IRubyObject v = ((RubyHash) options).op_aref(context, runtime.newSymbol("flags"));
if (!v.isNil()) {
v = v.convertToInteger();
oflags_p[0] |= RubyNumeric.num2int(v);
vmode(vmodeAndVperm_p, runtime.newFixnum(oflags_p[0]));
fmode_p[0] = ModeFlags.getOpenFileFlagsFor(oflags_p[0]);
}
extractBinmode(runtime, options, fmode_p);
// Differs from MRI but we open with ModeFlags
if ((fmode_p[0] & OpenFile.BINMODE) != 0) {
oflags_p[0] |= ModeFlags.BINARY;
if (!hasEnc) {
ioExtIntToEncs(context, ioEncodable, ascii8bitEncoding(runtime), null, fmode_p[0]);
}
} else if (DEFAULT_TEXTMODE != 0 && (vmode(vmodeAndVperm_p) == null || vmode(vmodeAndVperm_p).isNil())) {
fmode_p[0] |= DEFAULT_TEXTMODE;
}
v = hashARef(runtime, options, "perm");
if (!v.isNil()) {
if (vperm(vmodeAndVperm_p) != null) {
if (!vperm(vmodeAndVperm_p).isNil()) throw runtime.newArgumentError("perm specified twice");
vperm(vmodeAndVperm_p, v);
}
}
IRubyObject extraFlags = hashARef(runtime, options, "flags");
if (!extraFlags.isNil()) {
oflags_p[0] |= extraFlags.convertToInteger().getIntValue();
}
ecflags = (fmode_p[0] & OpenFile.READABLE) != 0 ?
MODE_BTMODE(fmode_p[0], ECONV_DEFAULT_NEWLINE_DECORATOR, 0, EConvFlags.UNIVERSAL_NEWLINE_DECORATOR) : 0;
if (TEXTMODE_NEWLINE_DECORATOR_ON_WRITE != -1) {
ecflags |= (fmode_p[0] & OpenFile.WRITABLE) != 0 ?
MODE_BTMODE(fmode_p[0], TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0;
}
if (ioExtractEncodingOption(context, ioEncodable, options, fmode_p)) {
if (hasEnc) throw runtime.newArgumentError("encoding specified twice");
}
ecflags = SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(ioEncodable.getEnc2(), ecflags);
ecflags = econvPrepareOptions(context, options, ecopts_p, ecflags);
}
EncodingUtils.validateEncodingBinmode(context, fmode_p, ecflags, ioEncodable);
ioEncodable.setEcflags(ecflags);
ioEncodable.setEcopts(ecopts_p[0]);
return;
} while (true);
}
// mri: rb_io_extract_encoding_option
public static boolean ioExtractEncodingOption(ThreadContext context, IOEncodable ioEncodable, IRubyObject options, int[] fmode_p) {
Ruby runtime = context.runtime;
IRubyObject encoding = context.nil;
IRubyObject extenc = null;
IRubyObject intenc = null;
IRubyObject tmp;
boolean extracted = false;
Encoding extencoding = null;
Encoding intencoding = null;
if (options != null && !options.isNil()) {
RubyHash opts = (RubyHash) options;
IRubyObject encodingOpt = opts.op_aref(context, runtime.newSymbol("encoding"));
if (!encodingOpt.isNil()) encoding = encodingOpt;
IRubyObject externalOpt = opts.op_aref(context, runtime.newSymbol("external_encoding"));
if (!externalOpt.isNil()) extenc = externalOpt;
IRubyObject internalOpt = opts.op_aref(context, runtime.newSymbol("internal_encoding"));
if (!internalOpt.isNil()) intenc = internalOpt;
}
if ((extenc != null || intenc != null) && !encoding.isNil()) {
if (runtime.isVerbose()) {
runtime.getWarnings().warn("Ignoring encoding parameter '" + encoding + "': " +
(extenc == null ? "internal" : "external") + "_encoding is used");
}
encoding = context.nil;
}
if (extenc != null && !extenc.isNil()) {
extencoding = rbToEncoding(context, extenc);
}
if (intenc != null) {
if (intenc.isNil()) {
intencoding = null;
} else if (!(tmp = intenc.checkStringType()).isNil()) {
String p = tmp.toString();
if (p.equals("-")) {
intencoding = null;
} else {
intencoding = rbToEncoding(context, intenc);
}
} else {
intencoding = rbToEncoding(context, intenc);
}
if (extencoding == intencoding) {
intencoding = null;
}
}
if (!encoding.isNil()) {
extracted = true;
if (!(tmp = encoding.checkStringType()).isNil()) {
parseModeEncoding(context, ioEncodable, tmp.asJavaString(), fmode_p);
} else {
ioExtIntToEncs(context, ioEncodable, rbToEncoding(context, encoding), null, 0);
}
} else if (extenc != null || intenc != null) {
extracted = true;
ioExtIntToEncs(context, ioEncodable, extencoding, intencoding, 0);
}
return extracted;
}
// MRI: rb_external_str_new_with_enc
public static RubyString newExternalStringWithEncoding(Ruby runtime, String string, Encoding encoding) {
if (string == null) return newEmptyString(runtime, encoding);
/* ASCII-8BIT case, no conversion */
if ((encoding == ASCIIEncoding.INSTANCE) ||
(encoding == USASCIIEncoding.INSTANCE && searchNonAscii(string) != -1)) {
return newBinaryString(runtime, string);
}
/* no default_internal or same encoding, no conversion */
Encoding internalEncoding = runtime.getDefaultInternalEncoding();
if (internalEncoding == null || encoding == internalEncoding) return newString(runtime, string, encoding);
/* ASCII compatible, and ASCII only string, no conversion in
* default_internal */
if ((encoding == ASCIIEncoding.INSTANCE) ||
(encoding == USASCIIEncoding.INSTANCE) ||
(encoding.isAsciiCompatible() && searchNonAscii(string) == -1)) {
return newString(runtime, string, internalEncoding);
}
/* convert from the given encoding to default_internal */
RubyString convertedString = newEmptyString(runtime, internalEncoding);
/* when the conversion failed for some reason, just ignore the
* default_internal and result in the given encoding as-is. */
try {
convertedString.cat19(encodeBytelist(string, encoding), CR_UNKNOWN);
} catch (org.jruby.exceptions.EncodingError.CompatibilityError ce) {
return newString(runtime, string, encoding);
}
return convertedString;
}
// MRI: rb_external_str_new_with_enc
public static RubyString newExternalStringWithEncoding(Ruby runtime, ByteList bytelist, Encoding encoding) {
if (bytelist == null) return newEmptyString(runtime, encoding);
/* ASCII-8BIT case, no conversion */
if ((encoding == ASCIIEncoding.INSTANCE) ||
(encoding == USASCIIEncoding.INSTANCE && searchNonAscii(bytelist) != -1)) {
return newBinaryString(runtime, bytelist);
}
/* no default_internal or same encoding, no conversion */
Encoding internalEncoding = runtime.getDefaultInternalEncoding();
if (internalEncoding == null || encoding == internalEncoding) return newString(runtime, bytelist, encoding);
/* ASCII compatible, and ASCII only string, no conversion in
* default_internal */
if ((encoding == ASCIIEncoding.INSTANCE) ||
(encoding == USASCIIEncoding.INSTANCE) ||
(encoding.isAsciiCompatible() && searchNonAscii(bytelist) == -1)) {
return newString(runtime, bytelist, internalEncoding);
}
/* convert from the given encoding to default_internal */
RubyString convertedString = newEmptyString(runtime, internalEncoding);
/* when the conversion failed for some reason, just ignore the
* default_internal and result in the given encoding as-is. */
try {
convertedString.cat19(encodeBytelist(bytelist, encoding), CR_UNKNOWN);
} catch (org.jruby.exceptions.EncodingError.CompatibilityError ce) {
return newString(runtime, bytelist, encoding);
}
return convertedString;
}
// mri: rb_io_ext_int_to_encs
public static void ioExtIntToEncs(ThreadContext context, IOEncodable encodable, Encoding external, Encoding internal, int fmode) {
boolean defaultExternal = false;
if (external == null) {
external = context.runtime.getDefaultExternalEncoding();
defaultExternal = true;
}
if (external == ascii8bitEncoding(context.runtime)) {
internal = null;
} else if (internal == null) {
internal = context.runtime.getDefaultInternalEncoding();
}
if (internal == null ||
((fmode & OpenFile.SETENC_BY_BOM) == 0 && internal == external)) {
encodable.setEnc((defaultExternal && internal != external) ? null : external);
encodable.setEnc2(null);
} else {
encodable.setEnc(internal);
encodable.setEnc2(external);
}
}
// mri: parse_mode_enc
public static void parseModeEncoding(ThreadContext context, IOEncodable ioEncodable, String option, int[] fmode_p) {
final Ruby runtime = context.runtime;
EncodingService service = runtime.getEncodingService();
Encoding intEnc, extEnc;
if (fmode_p == null) fmode_p = new int[]{0};
List encs = StringSupport.split(option, ':', 2);
String estr = encs.size() == 2 ? encs.get(0) : option;
if (estr.toLowerCase().startsWith("bom|")) {
estr = estr.substring(4);
if (estr.toLowerCase().startsWith("utf-")) {
fmode_p[0] |= OpenFile.SETENC_BY_BOM;
ioEncodable.setBOM(true);
} else {
runtime.getWarnings().warn("BOM with non-UTF encoding " + estr + " is nonsense");
fmode_p[0] &= ~OpenFile.SETENC_BY_BOM;
}
}
Encoding idx = service.findEncodingNoError(new ByteList(estr.getBytes(), false));
if (idx == null) {
runtime.getWarnings().warn("Unsupported encoding " + estr + " ignored");
extEnc = null;
} else {
extEnc = idx;
}
intEnc = null;
if (encs.size() == 2) {
String istr = encs.get(1);
if (istr.equals("-")) {
intEnc = null;
} else {
idx = service.getEncodingFromString(istr);
if (idx == null) {
runtime.getWarnings().warn("ignoring internal encoding " + idx + ": it is identical to external encoding " + idx);
intEnc = null;
} else {
intEnc = idx;
}
}
}
ioExtIntToEncs(context, ioEncodable, extEnc, intEnc, fmode_p[0]);
}
// rb_econv_str_convert
public static ByteList econvStrConvert(ThreadContext context, EConv ec, ByteList src, int flags) {
return econvSubstrAppend(context, ec, src, null, flags);
}
// rb_econv_str_convert with source bytes
public static ByteList econvByteConvert(ThreadContext context, EConv ec, byte[] bytes, int start, int length, int flags) {
return econvAppend(context, ec, bytes, start, length, new ByteList(length, ec.destinationEncoding), flags);
}
// rb_econv_substr_append
public static ByteList econvSubstrAppend(ThreadContext context, EConv ec, ByteList src, ByteList dst, int flags) {
return econvAppend(context, ec, src, dst, flags);
}
// rb_econv_append
public static ByteList econvAppend(ThreadContext context, EConv ec, ByteList sByteList, ByteList dst, int flags) {
int len = sByteList.getRealSize();
if (dst == null) {
dst = new ByteList(len, ec.destinationEncoding);
}
return econvAppend(context, ec, sByteList.unsafeBytes(), sByteList.begin(), len, dst, flags);
}
// rb_econv_append with source bytes
public static ByteList econvAppend(ThreadContext context, EConv ec, byte[] bytes, int start, int length, ByteList dst, int flags) {
Ptr sp = new Ptr(0);
int se;
int ds;
int ss = start;
byte[] dBytes;
Ptr dp = new Ptr(0);
int de;
EConvResult res;
int maxOutput;
if (ec.lastTranscoding != null) {
maxOutput = ec.lastTranscoding.transcoder.maxOutput;
} else {
maxOutput = 1;
}
do {
int dlen = dst.getRealSize();
if ((dst.getUnsafeBytes().length - dst.getBegin()) - dlen < length + maxOutput) {
long newCapa = dlen + length + maxOutput;
if (Integer.MAX_VALUE < newCapa) {
throw context.runtime.newArgumentError("too long string");
}
dst.ensure((int)newCapa);
dst.setRealSize(dlen);
}
sp.p = ss;
se = sp.p + length;
dBytes = dst.getUnsafeBytes();
ds = dst.getBegin();
de = dBytes.length;
dp.p = ds += dlen;
res = ec.convert(bytes, sp, se, dBytes, dp, de, flags);
length -= sp.p - ss;
ss = sp.p;
dst.setRealSize(dlen + (dp.p - ds));
EncodingUtils.econvCheckError(context, ec);
} while (res == EConvResult.DestinationBufferFull);
return dst;
}
// rb_econv_check_error
public static void econvCheckError(ThreadContext context, EConv ec) {
RaiseException re = makeEconvException(context.runtime, ec);
if (re != null) throw re;
}
// rb_econv_prepare_opts
public static int econvPrepareOpts(ThreadContext context, IRubyObject opthash, IRubyObject[] opts) {
return econvPrepareOptions(context, opthash, opts, 0);
}
// rb_econv_prepare_options
public static int econvPrepareOptions(ThreadContext context, IRubyObject opthash, IRubyObject[] opts, int ecflags) {
IRubyObject newhash = context.nil;
IRubyObject v;
if (opthash.isNil()) {
opts[0] = context.nil;
return ecflags;
}
RubyHash optHash2 = (RubyHash)opthash;
ecflags = econvOpts(context, opthash, ecflags);
v = optHash2.op_aref(context, context.runtime.newSymbol("replace"));
if (!v.isNil()) {
RubyString v_str = v.convertToString();
if (v_str.scanForCodeRange() == StringSupport.CR_BROKEN) {
throw context.runtime.newArgumentError("replacement string is broken: " + v_str);
}
v = v_str.freeze(context);
newhash = RubyHash.newHash(context.runtime);
((RubyHash)newhash).op_aset(context, context.runtime.newSymbol("replace"), v);
}
v = optHash2.op_aref(context, context.runtime.newSymbol("fallback"));
if (!v.isNil()) {
IRubyObject h = TypeConverter.checkHashType(context.runtime, v);
boolean condition;
if (h.isNil()) {
condition = (v instanceof RubyProc || v instanceof RubyMethod || v.respondsTo("[]"));
} else {
v = h;
condition = true;
}
if (condition) {
if (newhash.isNil()) {
newhash = RubyHash.newHash(context.runtime);
}
((RubyHash)newhash).op_aset(context, context.runtime.newSymbol("fallback"), v);
}
}
if (!newhash.isNil()) {
newhash.setFrozen(true);
}
opts[0] = newhash;
return ecflags;
}
// econv_opts
public static int econvOpts(ThreadContext context, IRubyObject opt, int ecflags) {
Ruby runtime = context.runtime;
IRubyObject v;
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("invalid"));
if (v.isNil()) {
} else if (v.toString().equals("replace")) {
ecflags |= EConvFlags.INVALID_REPLACE;
} else {
throw runtime.newArgumentError("unknown value for invalid character option");
}
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("undef"));
if (v.isNil()) {
} else if (v.toString().equals("replace")) {
ecflags |= EConvFlags.UNDEF_REPLACE;
} else {
throw runtime.newArgumentError("unknown value for undefined character option");
}
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("replace"));
if (!v.isNil() && (ecflags & EConvFlags.INVALID_REPLACE) != 0) {
ecflags |= EConvFlags.UNDEF_REPLACE;
}
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("xml"));
if (!v.isNil()) {
if (v.toString().equals("text")) {
ecflags |= EConvFlags.XML_TEXT_DECORATOR | EConvFlags.UNDEF_HEX_CHARREF;
} else if (v.toString().equals("attr")) {
ecflags |= EConvFlags.XML_ATTR_CONTENT_DECORATOR | EConvFlags.XML_ATTR_QUOTE_DECORATOR | EConvFlags.UNDEF_HEX_CHARREF;
} else {
throw runtime.newArgumentError("unexpected value for xml option: " + v);
}
}
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("newline"));
if (!v.isNil()) {
ecflags &= ~EConvFlags.NEWLINE_DECORATOR_MASK;
if (v.toString().equals("universal")) {
ecflags |= EConvFlags.UNIVERSAL_NEWLINE_DECORATOR;
} else if (v.toString().equals("crlf")) {
ecflags |= EConvFlags.CRLF_NEWLINE_DECORATOR;
} else if (v.toString().equals("cr")) {
ecflags |= EConvFlags.CR_NEWLINE_DECORATOR;
} else if (v.toString().equals("lf")) {
// ecflags |= ECONV_LF_NEWLINE_DECORATOR;
} else if (v instanceof RubySymbol) {
throw runtime.newArgumentError("unexpected value for newline option: " + ((RubySymbol) v).to_s(context).toString());
} else {
throw runtime.newArgumentError("unexpected value for newline option");
}
}
int setflags = 0;
boolean newlineflag = false;
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("universal_newline"));
if (v.isTrue()) {
setflags |= EConvFlags.UNIVERSAL_NEWLINE_DECORATOR;
}
newlineflag |= !v.isNil();
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("crlf_newline"));
if (v.isTrue()) {
setflags |= EConvFlags.CRLF_NEWLINE_DECORATOR;
}
newlineflag |= !v.isNil();
v = ((RubyHash)opt).op_aref(context, runtime.newSymbol("cr_newline"));
if (v.isTrue()) {
setflags |= EConvFlags.CR_NEWLINE_DECORATOR;
}
newlineflag |= !v.isNil();
if (newlineflag) {
ecflags &= ~EConvFlags.NEWLINE_DECORATOR_MASK;
ecflags |= setflags;
}
return ecflags;
}
// rb_econv_open_opts
public static EConv econvOpenOpts(ThreadContext context, byte[] sourceEncoding, byte[] destinationEncoding, int ecflags, IRubyObject opthash) {
Ruby runtime = context.runtime;
IRubyObject replacement;
if (opthash == null || opthash.isNil()) {
replacement = context.nil;
} else {
if (!(opthash instanceof RubyHash) || !opthash.isFrozen()) {
throw runtime.newRuntimeError("bug: EncodingUtils.econvOpenOpts called with invalid opthash");
}
replacement = ((RubyHash)opthash).op_aref(context, runtime.newSymbol("replace"));
}
EConv ec = TranscoderDB.open(sourceEncoding, destinationEncoding, ecflags);
if (ec == null) return ec;
if (!replacement.isNil()) {
int ret;
RubyString replStr = (RubyString)replacement;
ByteList replBL = replStr.getByteList();
ec.makeReplacement();
ret = ec.setReplacement(replBL.getUnsafeBytes(), replBL.getBegin(), replBL.getRealSize(), replBL.getEncoding().getName());
if (ret == -1) {
ec.close();
return null;
}
}
return ec;
}
// rb_econv_open_exc
public static RaiseException econvOpenExc(ThreadContext context, byte[] sourceEncoding, byte[] destinationEncoding, int ecflags) {
String message = econvDescription(context, sourceEncoding, destinationEncoding, ecflags, "code converter not found (") + ")";
return context.runtime.newConverterNotFoundError(message);
}
// rb_econv_description
public static String econvDescription(ThreadContext context, byte[] sourceEncoding, byte[] destinationEncoding, int ecflags, String message) {
// limited port for now
return message + new String(sourceEncoding) + " to " + new String(destinationEncoding);
}
// rb_econv_asciicompat_encoding
// Missing proper logic from transcoding subsystem
public static Encoding econvAsciicompatEncoding(Encoding enc) {
return RubyConverter.NONASCII_TO_ASCII.get(enc);
}
// rb_enc_asciicompat
public static boolean encAsciicompat(Encoding enc) {
return encMbminlen(enc) == 1 && !encDummy(enc);
}
// rb_enc_ascget
public static int encAscget(byte[] pBytes, int p, int e, int[] len, Encoding enc) {
int c;
int l;
if (e <= p) {
return -1;
}
if (encAsciicompat(enc)) {
c = pBytes[p] & 0xFF;
if (!Encoding.isAscii((byte)c)) {
return -1;
}
if (len != null) len[0] = 1;
return c;
}
l = StringSupport.preciseLength(enc, pBytes, p, e);
if (!StringSupport.MBCLEN_CHARFOUND_P(l)) {
return -1;
}
c = enc.mbcToCode(pBytes, p, e);
if (!Encoding.isAscii(c)) {
return -1;
}
if (len != null) len[0] = l;
return c;
}
// rb_enc_mbminlen
public static int encMbminlen(Encoding encoding) {
return encoding.minLength();
}
// rb_enc_dummy_p
public static boolean encDummy(Encoding enc) {
return enc.isDummy();
}
// rb_enc_get
public static Encoding encGet(ThreadContext context, IRubyObject obj) {
if (obj instanceof EncodingCapable) {
return ((EncodingCapable)obj).getEncoding();
}
return context.runtime.getDefaultInternalEncoding();
}
// encoding_equal
public static boolean encodingEqual(byte[] enc1, byte[] enc2) {
return ByteList.memcmp(enc1, 0, enc1.length, enc2, 0, enc2.length) == 0;
}
// enc_arg
public static Encoding encArg(ThreadContext context, IRubyObject encval, byte[][] name_p, Encoding[] enc_p) {
Encoding enc;
if ((enc = toEncodingIndex(context, encval)) == null) {
name_p[0] = encval.convertToString().getBytes();
} else {
name_p[0] = enc.getName();
}
return enc_p[0] = enc;
}
// rb_to_encoding_index
public static Encoding toEncodingIndex(ThreadContext context, IRubyObject enc) {
if (enc instanceof RubyEncoding) {
return ((RubyEncoding)enc).getEncoding();
} else if ((enc = enc.checkStringType()).isNil()) {
return null;
}
if (!((RubyString)enc).getEncoding().isAsciiCompatible()) {
return null;
}
return context.runtime.getEncodingService().getEncodingFromObjectNoError(enc);
}
// encoded_dup
public static IRubyObject encodedDup(ThreadContext context, IRubyObject newstr, IRubyObject str, Encoding encindex) {
if (encindex == null) return str.dup();
if (newstr == str) {
newstr = str.dup();
} else {
// set to same superclass
((RubyBasicObject)newstr).setMetaClass(str.getMetaClass());
}
((RubyString)newstr).modify19();
return strEncodeAssociate(context, newstr, encindex);
}
// str_encode_associate
public static IRubyObject strEncodeAssociate(ThreadContext context, IRubyObject str, Encoding encidx) {
encAssociateIndex(str, encidx);
if (encAsciicompat(encidx)) {
((RubyString)str).scanForCodeRange();
} else {
((RubyString)str).setCodeRange(StringSupport.CR_VALID);
}
return str;
}
// rb_enc_associate_index
public static IRubyObject encAssociateIndex(IRubyObject obj, Encoding encidx) {
((RubyBasicObject)obj).checkFrozen();
if (encidx == null) encidx = ASCIIEncoding.INSTANCE;
if (((EncodingCapable)obj).getEncoding() == encidx) {
return obj;
}
if (obj instanceof RubyString && !CodeRangeSupport.isCodeRangeAsciiOnly((RubyString) obj) ||
encAsciicompat(encidx)) {
((RubyString)obj).clearCodeRange();
}
((EncodingCapable)obj).setEncoding(encidx);
return obj;
}
// str_encode
public static IRubyObject strEncode(ThreadContext context, IRubyObject str, IRubyObject... args) {
IRubyObject[] newstr_p = {str};
Encoding dencindex = strTranscode(context, args, newstr_p);
return encodedDup(context, newstr_p[0], str, dencindex);
}
// rb_str_encode
public static IRubyObject rbStrEncode(ThreadContext context, IRubyObject str, IRubyObject to, int ecflags, IRubyObject ecopt) {
IRubyObject[] newstr_p = {str};
Encoding dencindex = strTranscode0(context, 1, new IRubyObject[]{to}, newstr_p, ecflags, ecopt);
return encodedDup(context, newstr_p[0], str, dencindex);
}
// rb_str_encode
public static ByteList rbByteEncode(ThreadContext context, byte[] bytes, int start, int length, Encoding encoding, int cr, Encoding to, int ecflags, IRubyObject ecopt) {
byte[] sname, dname;
sname = encoding.getName();
dname = to.getName();
if (noDecorators(ecflags)) {
if (encoding.isAsciiCompatible() && to.isAsciiCompatible()) {
if (cr == StringSupport.CR_7BIT) {
return null;
}
} else if (encodingEqual(sname, dname)) {
return null;
}
} else if (encodingEqual(sname, dname)) {
sname = NULL_BYTE_ARRAY;
dname = NULL_BYTE_ARRAY;
}
int slen = length;
int blen = slen + 30;
ByteList dest = new ByteList(blen, to);
Ptr fromPos = new Ptr(start);
int destBegin = dest.getBegin();
transcodeLoop(context, bytes, fromPos, dest.unsafeBytes(), new Ptr(destBegin), start + slen, destBegin + blen, dest, strTranscodingResize, sname, dname, ecflags, ecopt);
if (fromPos.p != start + slen) {
throw context.runtime.newArgumentError("not fully converted, " + (slen - fromPos.p) + " bytes left");
}
dest.setEncoding(to);
return dest;
}
protected static boolean noDecorators(int ecflags) {
return (ecflags & (EConvFlags.NEWLINE_DECORATOR_MASK
| EConvFlags.XML_TEXT_DECORATOR
| EConvFlags.XML_ATTR_CONTENT_DECORATOR
| EConvFlags.XML_ATTR_QUOTE_DECORATOR)) == 0;
}
// str_transcode
public static Encoding strTranscode(ThreadContext context, IRubyObject[] args, IRubyObject[] self_p) {
int ecflags = 0;
int argc = args.length;
IRubyObject[] ecopts_p = {context.nil};
if (args.length >= 1) {
IRubyObject tmp = TypeConverter.checkHashType(context.runtime, args[args.length - 1]);
if (!tmp.isNil()) {
argc--;
ecflags = econvPrepareOpts(context, tmp, ecopts_p);
}
}
return strTranscode0(context, argc, args, self_p, ecflags, ecopts_p[0]);
}
// str_transcode0
public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObject[] args, IRubyObject[] self_p, int ecflags, IRubyObject ecopts) {
Ruby runtime = context.runtime;
IRubyObject str = self_p[0];
IRubyObject arg1, arg2;
Encoding[] senc_p = {null}, denc_p = {null};
byte[][] sname_p = {null}, dname_p = {null};
Encoding dencindex;
boolean explicitlyInvalidReplace = true;
if (argc > 2) {
throw context.runtime.newArgumentError(args.length, 2);
}
if (argc == 0) {
arg1 = runtime.getEncodingService().getDefaultInternal();
if (arg1 == null || arg1.isNil()) {
if (ecflags == 0) return null;
arg1 = objEncoding(context, str);
}
if ((ecflags & EConvFlags.INVALID_MASK) == 0) {
explicitlyInvalidReplace = false;
}
ecflags |= EConvFlags.INVALID_REPLACE | EConvFlags.UNDEF_REPLACE;
} else {
arg1 = args[0];
}
arg2 = argc <= 1 ? context.nil : args[1];
dencindex = strTranscodeEncArgs(context, str, arg1, arg2, sname_p, senc_p, dname_p, denc_p);
IRubyObject dest;
if (noDecorators(ecflags)) {
if (senc_p[0] != null && senc_p[0] == denc_p[0]) {
if ((ecflags & EConvFlags.INVALID_MASK) != 0 && explicitlyInvalidReplace) {
IRubyObject rep = context.nil;
if (!ecopts.isNil()) {
rep = ((RubyHash)ecopts).op_aref(context, runtime.newSymbol("replace"));
}
dest = ((RubyString)str).encStrScrub(context, senc_p[0], rep, Block.NULL_BLOCK);
if (dest.isNil()) dest = str;
self_p[0] = dest;
return dencindex;
}
return arg2.isNil() ? null : dencindex;
} else if (senc_p[0] != null && denc_p[0] != null && senc_p[0].isAsciiCompatible() && denc_p[0].isAsciiCompatible()) {
if (((RubyString)str).scanForCodeRange() == StringSupport.CR_7BIT) {
return dencindex;
}
}
if (encodingEqual(sname_p[0], dname_p[0])) {
return arg2.isNil() ? null : dencindex;
}
} else {
if (encodingEqual(sname_p[0], dname_p[0])) {
sname_p[0] = NULL_BYTE_ARRAY;
dname_p[0] = NULL_BYTE_ARRAY;
}
}
ByteList sp = ((RubyString)str).getByteList();
ByteList fromp = sp;
int slen = ((RubyString)str).size();
int blen = slen + 30;
dest = RubyString.newStringLight(runtime, blen);
ByteList destp = ((RubyString)dest).getByteList();
byte[] frompBytes = fromp.unsafeBytes();
byte[] destpBytes = destp.unsafeBytes();
Ptr frompPos = new Ptr(fromp.getBegin());
Ptr destpPos = new Ptr(destp.getBegin());
transcodeLoop(context, frompBytes, frompPos, destpBytes, destpPos, frompPos.p + slen, destpPos.p + blen, destp, strTranscodingResize, sname_p[0], dname_p[0], ecflags, ecopts);
if (frompPos.p != sp.begin() + slen) {
throw runtime.newArgumentError("not fully converted, " + (slen - frompPos.p) + " bytes left");
}
// MRI sets length of dest here, but we've already done it in the inner transcodeLoop
if (denc_p[0] == null) {
dencindex = defineDummyEncoding(context, dname_p[0]);
}
self_p[0] = dest;
return dencindex;
}
// rb_obj_encoding
public static IRubyObject objEncoding(ThreadContext context, IRubyObject obj) {
Encoding enc = encGet(context, obj);
if (enc == null) {
throw context.runtime.newTypeError("unknown encoding");
}
return context.runtime.getEncodingService().convertEncodingToRubyEncoding(enc);
}
public static Encoding strTranscodeEncArgs(ThreadContext context, IRubyObject str, IRubyObject arg1, IRubyObject arg2, byte[][] sname_p, Encoding[] senc_p, byte[][] dname_p, Encoding[] denc_p) {
Encoding dencindex;
dencindex = encArg(context, arg1, dname_p, denc_p);
if (arg2.isNil()) {
senc_p[0] = encGet(context, str);
sname_p[0] = senc_p[0].getName();
} else {
encArg(context, arg2, sname_p, senc_p);
}
return dencindex;
}
public static boolean encRegistered(byte[] name) {
return EncodingDB.getEncodings().get(name) != null;
}
// enc_check_duplication
public static void encCheckDuplication(ThreadContext context, byte[] name) {
if (encRegistered(name)) {
throw context.runtime.newArgumentError("encoding " + new String(name) + " is already registered");
}
}
// rb_enc_replicate
public static Encoding encReplicate(ThreadContext context, byte[] name, Encoding encoding) {
encCheckDuplication(context, name);
EncodingDB.replicate(new String(name), new String(encoding.getName()));
return EncodingDB.getEncodings().get(name).getEncoding();
}
// rb_define_dummy_encoding
public static Encoding defineDummyEncoding(ThreadContext context, byte[] name) {
Encoding dummy = encReplicate(context, name, ascii8bitEncoding(context.runtime));
// TODO: set dummy on encoding; this probably should live in jcodings
return dummy;
}
public static boolean DECORATOR_P(byte[] sname, byte[] dname) {
return sname == null || sname.length == 0 || sname[0] == 0;
}
// TODO: Get rid of this and get consumers calling with existing RubyString
public static ByteList strConvEncOpts(ThreadContext context, ByteList str, Encoding fromEncoding,
Encoding toEncoding, int ecflags, IRubyObject ecopts) {
return strConvEncOpts(
context,
newString(context.runtime, str),
fromEncoding, toEncoding, ecflags, ecopts).getByteList();
}
/**
* This will try and transcode the supplied ByteList to the supplied toEncoding. It will use
* forceEncoding as its encoding if it is supplied; otherwise it will use the encoding it has
* tucked away in the bytelist. This will return a new copy of a ByteList in the request
* encoding or die trying (ConverterNotFound).
*
* c: rb_str_conv_enc_opts
*/
public static RubyString strConvEncOpts(ThreadContext context, RubyString str, Encoding fromEncoding,
Encoding toEncoding, int ecflags, IRubyObject ecopts) {
if (toEncoding == null) return str;
if (fromEncoding == null) fromEncoding = str.getEncoding();
if (fromEncoding == toEncoding) return str;
if ((toEncoding.isAsciiCompatible() && str.isAsciiOnly()) ||
toEncoding == ASCIIEncoding.INSTANCE) {
if (str.getEncoding() != toEncoding) {
str = (RubyString)str.dup();
str.setEncoding(toEncoding);
}
return str;
}
ByteList strByteList = str.getByteList();
int len = strByteList.getRealSize();
ByteList newStr = new ByteList(len);
int olen = len;
EConv ec = econvOpenOpts(context, fromEncoding.getName(), toEncoding.getName(), ecflags, ecopts);
if (ec == null) return str;
byte[] sbytes = strByteList.getUnsafeBytes();
Ptr sp = new Ptr(strByteList.getBegin());
int start = sp.p;
byte[] destbytes;
Ptr dp = new Ptr(0);
EConvResult ret;
int convertedOutput = 0;
// these are in the while clause in MRI
destbytes = newStr.getUnsafeBytes();
int dest = newStr.begin();
dp.p = dest + convertedOutput;
ret = ec.convert(sbytes, sp, start + len, destbytes, dp, dest + olen, 0);
while (ret == EConvResult.DestinationBufferFull) {
int convertedInput = sp.p - start;
int rest = len - convertedInput;
convertedOutput = dp.p - dest;
newStr.setRealSize(convertedOutput);
if (convertedInput != 0 && convertedOutput != 0 &&
rest < (Integer.MAX_VALUE / convertedOutput)) {
rest = (rest * convertedOutput) / convertedInput;
} else {
rest = olen;
}
olen += rest < 2 ? 2 : rest;
newStr.ensure(olen);
// these are the while clause in MRI
destbytes = newStr.getUnsafeBytes();
dest = newStr.begin();
dp.p = dest + convertedOutput;
ret = ec.convert(sbytes, sp, start + len, destbytes, dp, dest + olen, 0);
}
ec.close();
switch (ret) {
case Finished:
len = dp.p;
newStr.setRealSize(len);
newStr.setEncoding(toEncoding);
return newString(context.runtime, newStr);
default:
// some error, return original
return str;
}
}
// rb_str_conv_enc
public static RubyString strConvEnc(ThreadContext context, RubyString value, Encoding fromEncoding, Encoding toEncoding) {
return strConvEncOpts(context, value, fromEncoding, toEncoding, 0, context.nil);
}
public static ByteList strConvEnc(ThreadContext context, ByteList value, Encoding fromEncoding, Encoding toEncoding) {
return strConvEncOpts(context, value, fromEncoding, toEncoding, 0, context.nil);
}
public static RubyString setStrBuf(Ruby runtime, final IRubyObject obj, final int len) {
final RubyString str;
if (obj == null || obj.isNil()) {
str = RubyString.newStringLight(runtime, len);
}
else {
str = obj.convertToString();
int clen = str.size();
if (clen >= len) {
str.modify();
return str;
}
str.modifyExpand(len);
}
return str;
}
public static List encodingNames(byte[] name, int p, int end) {
final List names = new ArrayList();
Encoding enc = ASCIIEncoding.INSTANCE;
int s = p;
int code = name[s] & 0xff;
if (enc.isDigit(code)) return names;
boolean hasUpper = false;
boolean hasLower = false;
if (enc.isUpper(code)) {
hasUpper = true;
while (++s < end && (enc.isAlnum(name[s] & 0xff) || name[s] == (byte)'_')) {
if (enc.isLower(name[s] & 0xff)) hasLower = true;
}
}
boolean isValid = false;
if (s >= end) {
isValid = true;
names.add(new String(name, p, end));
}
if (!isValid || hasLower) {
if (!hasLower || !hasUpper) {
do {
code = name[s] & 0xff;
if (enc.isLower(code)) hasLower = true;
if (enc.isUpper(code)) hasUpper = true;
} while (++s < end && (!hasLower || !hasUpper));
}
byte[]constName = new byte[end - p];
System.arraycopy(name, p, constName, 0, end - p);
s = 0;
code = constName[s] & 0xff;
if (!isValid) {
if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
for (; s < constName.length; ++s) {
if (!enc.isAlnum(constName[s] & 0xff)) constName[s] = (byte)'_';
}
if (hasUpper) {
names.add(new String(constName, 0, constName.length));
}
}
if (hasLower) {
for (s = 0; s < constName.length; ++s) {
code = constName[s] & 0xff;
if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
}
names.add(new String(constName, 0, constName.length));
}
}
return names;
}
public interface ResizeFunction {
/**
* Resize the destination, returning the new begin offset.
*
* @param destination
* @param len
* @param new_len
* @return
*/
int resize(ByteList destination, int len, int new_len);
}
public static final ResizeFunction strTranscodingResize = new ResizeFunction() {
@Override
public int resize(ByteList destination, int len, int new_len) {
destination.setRealSize(len);
destination.ensure(new_len);
return destination.getBegin();
}
};
/**
* Fallback function to provide replacements for characters that fail to transcode.
*
* @param Data needed for the function to execute
*/
public interface TranscodeFallback {
/**
* Return a replacement character for the given byte range and encoding.
*
* @param context runtime state for the function
* @param fallback data for the function
* @param ec the transcoder that stumbled over the character
* @return true if the character was successfully replaced; false otherwise
*/
boolean call(ThreadContext context, Data fallback, EConv ec);
}
private static abstract class AbstractTranscodeFallback implements TranscodeFallback {
@Override
public boolean call(ThreadContext context, IRubyObject fallback, EConv ec) {
Ruby runtime = context.runtime;
IRubyObject rep = RubyString.newStringNoCopy(
runtime,
new ByteList(
ec.lastError.getErrorBytes(),
ec.lastError.getErrorBytesP(),
ec.lastError.getErrorBytesLength(),
runtime.getEncodingService().findEncodingOrAliasEntry(ec.lastError.getSource()).getEncoding(),
false)
);
rep = innerCall(context, fallback, rep);
if (!rep.isNil()) {
rep = rep.convertToString();
Encoding repEnc = ((RubyString) rep).getEncoding();
ByteList repByteList = ((RubyString) rep).getByteList();
ec.insertOutput(repByteList.getUnsafeBytes(), repByteList.begin(), repByteList.getRealSize(), repEnc.getName());
// TODO: check for too-large replacement
return true;
}
return false;
}
protected abstract IRubyObject innerCall(ThreadContext context, IRubyObject fallback, IRubyObject c);
}
private static final AbstractTranscodeFallback HASH_FALLBACK = new AbstractTranscodeFallback() {
@Override
protected IRubyObject innerCall(ThreadContext context, IRubyObject fallback, IRubyObject c) {
return ((RubyHash)fallback).op_aref(context, c);
}
};
private static final AbstractTranscodeFallback PROC_FALLBACK = new AbstractTranscodeFallback() {
@Override
protected IRubyObject innerCall(ThreadContext context, IRubyObject fallback, IRubyObject c) {
return ((RubyProc)fallback).call(context, c);
}
};
private static final AbstractTranscodeFallback METHOD_FALLBACK = new AbstractTranscodeFallback() {
@Override
protected IRubyObject innerCall(ThreadContext context, IRubyObject fallback, IRubyObject c) {
return fallback.callMethod(context, "call", c);
}
};
private static final AbstractTranscodeFallback AREF_FALLBACK = new AbstractTranscodeFallback() {
@Override
protected IRubyObject innerCall(ThreadContext context, IRubyObject fallback, IRubyObject c) {
return fallback.callMethod(context, "[]", c);
}
};
/**
* Perform the inner transcoding loop.
*
* @see #transcodeLoop(EConv, TranscodeFallback, Object, Object, byte[], Ptr, byte[], Ptr, int, int, ByteList, ResizeFunction)
*
* This version will determine fallback function and encoding options from the given options object.
*
* MRI: transcode_loop Ruby-related bits
*/
public static void transcodeLoop(ThreadContext context, byte[] inBytes, Ptr inPos, byte[] outBytes, Ptr outPos, int inStop, int _outStop, ByteList destination, ResizeFunction resizeFunction, byte[] sname, byte[] dname, int ecflags, IRubyObject ecopts) {
Ruby runtime = context.runtime;
EConv ec;
IRubyObject fallback = context.nil;
TranscodeFallback fallbackFunc = null;
ec = econvOpenOpts(context, sname, dname, ecflags, ecopts);
if (ec == null) {
throw econvOpenExc(context, sname, dname, ecflags);
}
if (!ecopts.isNil() && ecopts instanceof RubyHash) {
fallback = ((RubyHash)ecopts).op_aref(context, runtime.newSymbol("fallback"));
if (fallback instanceof RubyHash) {
fallbackFunc = HASH_FALLBACK;
} else if (fallback instanceof RubyProc) { // not quite same check as MRI
fallbackFunc = PROC_FALLBACK;
} else if (fallback instanceof RubyMethod) { // not quite same check as MRI
fallbackFunc = METHOD_FALLBACK;
} else {
fallbackFunc = AREF_FALLBACK;
}
}
boolean success = transcodeLoop(ec, fallbackFunc, context, fallback, inBytes, inPos, outBytes, outPos, inStop, _outStop, destination, resizeFunction);
if (!success) {
RaiseException re = makeEconvException(runtime, ec);
ec.close();
throw re;
}
}
/**
* A version of transcodeLoop for working without any Ruby runtime available.
*
* MRI: transcode_loop with no fallback and java.lang.String input
*/
public static ByteList transcodeString(String string, Encoding toEncoding, int ecflags) {
Encoding encoding;
encoding = getUTF16ForPlatform();
EConv ec = TranscoderDB.open(encoding.getName(), toEncoding.getName(), ecflags);
byte[] inBytes = string.getBytes(EncodingUtils.charsetForEncoding(encoding));
Ptr inPos = new Ptr(0);
int inStop = inBytes.length;
// most encodings will be shorter than UTF-16 for typical input
int outStop = (int)((double) inBytes.length / 1.5 + 1);
byte[] outBytes = new byte[outStop];
Ptr outPos = new Ptr(0);
ByteList destination = new ByteList(outBytes, toEncoding, false);
boolean success = transcodeLoop(ec, null, null, null, inBytes, inPos, outBytes, outPos, inStop, outStop, destination, strTranscodingResize);
if (!success) {
// TODO: anything?
}
return destination;
}
public static Encoding getUTF16ForPlatform() {
Encoding encoding;// This may be inefficient if we aren't matching endianness right
if (Platform.BYTE_ORDER == Platform.LITTLE_ENDIAN) {
encoding = UTF16LEEncoding.INSTANCE;
} else {
encoding = UTF16BEEncoding.INSTANCE;
}
return encoding;
}
/**
* Perform the inner transcoding loop.
*
* The data in inBytes will be transcoded from the source encoding to the destination, eventually
* replacing the contents of the given ByteList. Along the way, invalid characters may be handled by
* calling the fallback function (if non-null) with the given state and data. If the destination
* needs to be resized, use the given function to do so. Upon completion, destination will
* contain the resulting transcoded bytes.
*
* MRI: transcode_loop generified with EConv and fallback function provided
*
* @param ec the encoding converter
* @param fallbackFunc the fallback function for non-transcodable characters, or null if none
* @param context runtime state to pass into the fallback
* @param fallbackData call state to pass into the fallback
* @param inBytes the incoming byte array
* @param inPos the position from which to start in the incoming bytearray
* @param outBytes the initial output byte array
* @param outPos the position from which to start in the initial output byte array
* @param inStop the position at which to stop in the input
* @param outStop the number of bytes at which to stop in the output
* @param destination the ByteList to hold the eventual output
* @param resizeFunction a function to use to grow the destination
* @param type of data for the fallback function
* @return
*/
public static boolean transcodeLoop(EConv ec, TranscodeFallback fallbackFunc, ThreadContext context, Data fallbackData, byte[] inBytes, Ptr inPos, byte[] outBytes, Ptr outPos, int inStop, int outStop, ByteList destination, ResizeFunction resizeFunction) {
Ptr outstopPos = new Ptr(outStop);
Transcoding lastTC = ec.lastTranscoding;
int maxOutput = lastTC != null ? lastTC.transcoder.maxOutput : 1;
Ptr outStart = new Ptr(outPos.p);
// resume:
while (true) {
EConvResult ret = ec.convert(inBytes, inPos, inStop, outBytes, outPos, outstopPos.p, 0);
if (fallbackFunc != null && ret == EConvResult.UndefinedConversion) {
if (fallbackFunc.call(context, fallbackData, ec)) {
continue;
}
}
if (ret == EConvResult.InvalidByteSequence ||
ret == EConvResult.IncompleteInput ||
ret == EConvResult.UndefinedConversion) {
RaiseException exc = makeEconvException(context.runtime, ec);
ec.close();
destination.setRealSize(outPos.p);
throw exc;
}
if (ret == EConvResult.DestinationBufferFull) {
moreOutputBuffer(destination, resizeFunction, maxOutput, outStart, outPos, outstopPos);
outBytes = destination.getUnsafeBytes();
continue;
}
ec.close();
destination.setRealSize(outPos.p);
return true;
}
}
// make_econv_exception
public static RaiseException makeEconvException(Ruby runtime, EConv ec) {
final StringBuilder mesg = new StringBuilder(); RaiseException exc;
final EConvResult result = ec.lastError.getResult();
if (result == EConvResult.InvalidByteSequence || result == EConvResult.IncompleteInput) {
byte[] errBytes = ec.lastError.getErrorBytes();
int errBytesP = ec.lastError.getErrorBytesP();
int errorLen = ec.lastError.getErrorBytesLength();
ByteList _bytes = new ByteList(errBytes, errBytesP, errorLen - errBytesP);
RubyString bytes = newString(runtime, _bytes);
RubyString dumped = (RubyString)bytes.dump();
int readagainLen = ec.lastError.getReadAgainLength();
IRubyObject bytes2 = runtime.getNil();
if (result == EConvResult.IncompleteInput) {
mesg.append("incomplete ").append(dumped).append(" on ").append(new String(ec.lastError.getSource()));
} else if (readagainLen != 0) {
bytes2 = newString(runtime, new ByteList(errBytes, errorLen + errBytesP, ec.lastError.getReadAgainLength()));
IRubyObject dumped2 = ((RubyString) bytes2).dump();
mesg.append(dumped).append(" followed by ").append(dumped2).append(" on ").append( new String(ec.lastError.getSource()) );
} else {
mesg.append(dumped).append(" on ").append( new String(ec.lastError.getSource()) );
}
exc = runtime.newInvalidByteSequenceError(mesg.toString());
exc.getException().setInternalVariable("error_bytes", bytes);
exc.getException().setInternalVariable("readagain_bytes", bytes2);
exc.getException().setInternalVariable("incomplete_input", result == EConvResult.IncompleteInput ? runtime.getTrue() : runtime.getFalse());
return makeEConvExceptionSetEncs(exc, runtime, ec);
}
else if (result == EConvResult.UndefinedConversion) {
byte[] errBytes = ec.lastError.getErrorBytes();
int errBytesP = ec.lastError.getErrorBytesP();
int errorLen = ec.lastError.getErrorBytesLength();
final byte[] errSource = ec.lastError.getSource();
if (Arrays.equals(errSource, "UTF-8".getBytes())) {
// prepare dumped form
}
RubyString bytes = newString(runtime, new ByteList(errBytes, errBytesP, errorLen - errBytesP));
RubyString dumped = (RubyString) bytes.dump();
if (Arrays.equals(errSource, ec.source) && Arrays.equals(ec.lastError.getDestination(), ec.destination)) {
mesg.append(dumped).append(" from ").append( new String(errSource) ).append(" to ").append( new String(ec.lastError.getDestination()) );
} else {
mesg.append(dumped).append(" to ").append( new String(ec.lastError.getDestination()) ).append(" in conversion from ").append( new String(ec.source) );
for (int i = 0; i < ec.numTranscoders; i++) {
mesg.append(" to ").append( new String(ec.elements[i].transcoding.transcoder.getDestination()) );
}
}
exc = runtime.newUndefinedConversionError(mesg.toString());
EncodingDB.Entry entry = runtime.getEncodingService().findEncodingOrAliasEntry(errSource);
if (entry != null) {
bytes.setEncoding(entry.getEncoding());
exc.getException().setInternalVariable("error_char", bytes);
}
return makeEConvExceptionSetEncs(exc, runtime, ec);
}
return null;
}
private static RaiseException makeEConvExceptionSetEncs(RaiseException exc, Ruby runtime, EConv ec) {
exc.getException().setInternalVariable("source_encoding_name", newString(runtime, ec.lastError.getSource()));
exc.getException().setInternalVariable("destination_encoding_name", newString(runtime, ec.lastError.getDestination()));
EncodingDB.Entry entry = runtime.getEncodingService().findEncodingOrAliasEntry(ec.lastError.getSource());
if (entry != null) {
exc.getException().setInternalVariable("source_encoding", runtime.getEncodingService().convertEncodingToRubyEncoding(entry.getEncoding()));
}
entry = runtime.getEncodingService().findEncodingOrAliasEntry(ec.lastError.getDestination());
if (entry != null) {
exc.getException().setInternalVariable("destination_encoding", runtime.getEncodingService().convertEncodingToRubyEncoding(entry.getEncoding()));
}
return exc;
}
// more_output_buffer
static void moreOutputBuffer(ByteList destination, ResizeFunction resizeDestination, int maxOutput, Ptr outStart, Ptr outPos, Ptr outStop) {
int len = outPos.p - outStart.p;
int newLen = (len + maxOutput) * 2;
outStart.p = resizeDestination.resize(destination, len, newLen);
outPos.p = outStart.p + len;
outStop.p = outStart.p + newLen;
}
// MRI: io_set_encoding_by_bom
public static void ioSetEncodingByBOM(ThreadContext context, RubyIO io) {
Ruby runtime = context.runtime;
Encoding bomEncoding = ioStripBOM(context, io);
if (bomEncoding != null) {
// FIXME: Wonky that we acquire RubyEncoding to pass these encodings through
IRubyObject theBom = runtime.getEncodingService().getEncoding(bomEncoding);
IRubyObject theInternal = io.internal_encoding(context);
io.setEncoding(runtime.getCurrentContext(), theBom, theInternal, context.nil);
} else {
io.setEnc2(null);
}
}
// MRI: io_strip_bom
public static Encoding ioStripBOM(ThreadContext context, RubyIO io) {
IRubyObject b1, b2, b3, b4;
if ((io.getOpenFile().getMode() & OpenFile.READABLE) == 0) return null;
if ((b1 = io.getbyte(context)).isNil()) return null;
switch ((int)((RubyFixnum)b1).getLongValue()) {
case 0xEF:
if ((b2 = io.getbyte(context)).isNil()) break;
if (b2 instanceof RubyFixnum && ((RubyFixnum)b2).getLongValue() == 0xBB && !(b3 = io.getbyte(context)).isNil()) {
if (((RubyFixnum)b3).getLongValue() == 0xBF) {
return UTF8Encoding.INSTANCE;
}
io.ungetbyte(context, b3);
}
io.ungetbyte(context, b2);
break;
case 0xFE:
if ((b2 = io.getbyte(context)).isNil()) break;
if (b2 instanceof RubyFixnum && ((RubyFixnum)b2).getLongValue() == 0xFF) {
return UTF16BEEncoding.INSTANCE;
}
io.ungetbyte(context, b2);
break;
case 0xFF:
if ((b2 = io.getbyte(context)).isNil()) break;
if (b2 instanceof RubyFixnum && ((RubyFixnum)b2).getLongValue() == 0xFE) {
b3 = io.getbyte(context);
if (b3 instanceof RubyFixnum && ((RubyFixnum)b3).getLongValue() == 0 && !(b4 = io.getbyte(context)).isNil()) {
if (((RubyFixnum)b4).getLongValue() == 0) {
return UTF32LEEncoding.INSTANCE;
}
io.ungetbyte(context, b4);
} else {
io.ungetbyte(context, b3);
return UTF16LEEncoding.INSTANCE;
}
io.ungetbyte(context, b3);
}
io.ungetbyte(context, b2);
break;
case 0:
if ((b2 = io.getbyte(context)).isNil()) break;
if (b2 instanceof RubyFixnum && ((RubyFixnum)b2).getLongValue() == 0 && !(b3 = io.getbyte(context)).isNil()) {
if (b3 instanceof RubyFixnum && ((RubyFixnum)b3).getLongValue() == 0xFE && !(b4 = io.getbyte(context)).isNil()) {
if (b4 instanceof RubyFixnum && ((RubyFixnum)b4).getLongValue() == 0xFF) {
return UTF32BEEncoding.INSTANCE;
}
io.ungetbyte(context, b4);
}
io.ungetbyte(context, b3);
}
io.ungetbyte(context, b2);
break;
}
io.ungetbyte(context, b1);
return null;
}
// validate_enc_binmode
public static void validateEncodingBinmode(ThreadContext context, int[] fmode_p, int ecflags, IOEncodable ioEncodable) {
Ruby runtime = context.runtime;
int fmode = fmode_p[0];
if ((fmode & OpenFile.READABLE) != 0 &&
ioEncodable.getEnc2() == null &&
(fmode & OpenFile.BINMODE) == 0 &&
!(ioEncodable.getEnc() != null ? ioEncodable.getEnc() : runtime.getDefaultExternalEncoding()).isAsciiCompatible()) {
throw runtime.newArgumentError("ASCII incompatible encoding needs binmode");
}
if ((fmode & OpenFile.BINMODE) != 0 && (ecflags & EConvFlags.NEWLINE_DECORATOR_MASK) != 0) {
throw runtime.newArgumentError("newline decorator with binary mode");
}
if ((fmode & OpenFile.BINMODE) == 0 && (EncodingUtils.DEFAULT_TEXTMODE != 0 || (ecflags & EConvFlags.NEWLINE_DECORATOR_MASK) != 0)) {
fmode |= OpenFile.TEXTMODE;
fmode_p[0] = fmode;
} else if (EncodingUtils.DEFAULT_TEXTMODE == 0 && (ecflags & EConvFlags.NEWLINE_DECORATOR_MASK) == 0) {
fmode &= ~OpenFile.TEXTMODE;
fmode_p[0] = fmode;
}
}
// rb_enc_set_default_external
public static void rbEncSetDefaultExternal(ThreadContext context, IRubyObject encoding) {
if (encoding.isNil()) {
throw context.runtime.newArgumentError("default external can not be nil");
}
Encoding[] enc_p = {context.runtime.getDefaultExternalEncoding()};
encSetDefaultEncoding(context, enc_p, encoding, "external");
context.runtime.setDefaultExternalEncoding(enc_p[0]);
}
// rb_enc_set_default_internal
public static void rbEncSetDefaultInternal(ThreadContext context, IRubyObject encoding) {
Encoding[] enc_p = {context.runtime.getDefaultInternalEncoding()};
encSetDefaultEncoding(context, enc_p, encoding, "internal");
context.runtime.setDefaultInternalEncoding(enc_p[0]);
}
// enc_set_default_encoding
public static boolean encSetDefaultEncoding(ThreadContext context, Encoding[] def_p, IRubyObject encoding, String name) {
boolean overridden = false;
if (def_p != null) {
overridden = true;
}
if (encoding.isNil()) {
def_p[0] = null;
// don't set back into encoding table since it defers to us
} else {
def_p[0] = rbToEncoding(context, encoding);
// don't set back into encoding table since it defers to us
}
if (name.equals("external")) {
// TODO: set filesystem encoding
}
return overridden;
}
// rb_default_external_encoding
public static Encoding defaultExternalEncoding(Ruby runtime) {
if (runtime.getDefaultExternalEncoding() != null) return runtime.getDefaultExternalEncoding();
return runtime.getEncodingService().getLocaleEncoding();
}
// rb_str_buf_cat
public static void rbStrBufCat(Ruby runtime, RubyString str, ByteList ptr) {
if (ptr.length() == 0) return;
// negative length check here, we shouldn't need
strBufCat(runtime, str, ptr);
}
public static void rbStrBufCat(Ruby runtime, ByteListHolder str, byte[] ptrBytes, int ptr, int len) {
if (len == 0) return;
// negative length check here, we shouldn't need
strBufCat(runtime, str, ptrBytes, ptr, len);
}
public static void rbStrBufCat(Ruby runtime, ByteList str, byte[] ptrBytes, int ptr, int len) {
if (len == 0) return;
// negative length check here, we shouldn't need
strBufCat(str, ptrBytes, ptr, len);
}
// str_buf_cat
public static void strBufCat(Ruby runtime, RubyString str, ByteList ptr) {
strBufCat(runtime, str, ptr.getUnsafeBytes(), ptr.getBegin(), ptr.getRealSize());
}
public static void strBufCat(Ruby runtime, ByteListHolder str, byte[] ptrBytes, int ptr, int len) {
str.modify();
strBufCat(str.getByteList(), ptrBytes, ptr, len);
}
public static void strBufCat(ByteList str, byte[] ptrBytes, int ptr, int len) {
int total, off = -1;
// termlen is not relevant since we have no termination sequence
// missing: if ptr string is inside str, off = ptr start minus str start
// str.modify();
if (len == 0) return;
// much logic is missing here, since we don't manually manage the ByteList buffer
total = str.getRealSize() + len;
str.ensure(total);
str.append(ptrBytes, ptr, len);
}
// rb_enc_str_buf_cat
public static void encStrBufCat(Ruby runtime, RubyString str, ByteList ptr, Encoding enc) {
encCrStrBufCat(runtime, str, ptr.getUnsafeBytes(), ptr.getBegin(), ptr.getRealSize(),
enc, StringSupport.CR_UNKNOWN);
}
public static void encStrBufCat(Ruby runtime, RubyString str, ByteList ptr) {
encCrStrBufCat(runtime, str, ptr.getUnsafeBytes(), ptr.getBegin(), ptr.getRealSize(),
ptr.getEncoding(), StringSupport.CR_UNKNOWN);
}
public static void encStrBufCat(Ruby runtime, RubyString str, byte[] ptrBytes) {
encCrStrBufCat(runtime, str, ptrBytes, 0, ptrBytes.length, USASCIIEncoding.INSTANCE, StringSupport.CR_UNKNOWN);
}
public static void encStrBufCat(Ruby runtime, RubyString str, byte[] ptrBytes, Encoding enc) {
encCrStrBufCat(runtime, str, ptrBytes, 0, ptrBytes.length, enc, StringSupport.CR_UNKNOWN);
}
public static void encStrBufCat(Ruby runtime, RubyString str, byte[] ptrBytes, int ptr, int len, Encoding enc) {
encCrStrBufCat(runtime, str, ptrBytes, ptr, len,
enc, StringSupport.CR_UNKNOWN);
}
public static void encStrBufCat(Ruby runtime, RubyString str, CharSequence cseq) {
byte[] utf8 = RubyEncoding.encodeUTF8(cseq.toString());
encCrStrBufCat(runtime, str, utf8, 0, utf8.length, UTF8Encoding.INSTANCE, StringSupport.CR_UNKNOWN);
}
// rb_enc_cr_str_buf_cat
public static int encCrStrBufCat(Ruby runtime, CodeRangeable str, ByteList ptr, Encoding ptrEnc, int ptr_cr) {
return encCrStrBufCat(runtime, str, ptr.getUnsafeBytes(), ptr.getBegin(), ptr.getRealSize(), ptrEnc, ptr_cr);
}
public static int encCrStrBufCat(Ruby runtime, CodeRangeable str, byte[] ptrBytes, int ptr, int len, Encoding ptrEnc, int ptr_cr) {
Encoding strEnc = str.getByteList().getEncoding();
Encoding resEnc;
int str_cr, res_cr;
boolean incompatible = false;
str_cr = str.getByteList().getRealSize() > 0 ? str.getCodeRange() : StringSupport.CR_7BIT;
if (strEnc == ptrEnc) {
if (str_cr == StringSupport.CR_UNKNOWN) {
ptr_cr = StringSupport.CR_UNKNOWN;
} else if (ptr_cr == StringSupport.CR_UNKNOWN) {
ptr_cr = StringSupport.codeRangeScan(ptrEnc, ptrBytes, ptr, len);
}
} else {
if (!EncodingUtils.encAsciicompat(strEnc) || !EncodingUtils.encAsciicompat(ptrEnc)) {
if (len == 0) return ptr_cr;
if (str.getByteList().getRealSize() == 0) {
strBufCat(runtime, str, ptrBytes, ptr, len);
str.getByteList().setEncoding(ptrEnc);
str.setCodeRange(ptr_cr);
return ptr_cr;
}
incompatible = true;
}
if (!incompatible) {
if (ptr_cr == StringSupport.CR_UNKNOWN) {
ptr_cr = StringSupport.codeRangeScan(ptrEnc, ptrBytes, ptr, len);
}
if (str_cr == StringSupport.CR_UNKNOWN) {
if (strEnc == ASCIIEncoding.INSTANCE || ptr_cr != StringSupport.CR_7BIT) {
str_cr = str.scanForCodeRange();
}
}
}
}
if (incompatible ||
(strEnc != ptrEnc &&
str_cr != StringSupport.CR_7BIT &&
ptr_cr != StringSupport.CR_7BIT)) {
throw runtime.newEncodingCompatibilityError("incompatible encodings: " + strEnc + " and " + ptrEnc);
}
if (str_cr == StringSupport.CR_UNKNOWN) {
resEnc = strEnc;
res_cr = StringSupport.CR_UNKNOWN;
} else if (str_cr == StringSupport.CR_7BIT) {
if (ptr_cr == StringSupport.CR_7BIT) {
resEnc = strEnc;
res_cr = StringSupport.CR_7BIT;
} else {
resEnc = ptrEnc;
res_cr = ptr_cr;
}
} else if (str_cr == StringSupport.CR_VALID) {
resEnc = strEnc;
if (ptr_cr == StringSupport.CR_7BIT || ptr_cr == StringSupport.CR_VALID) {
res_cr = str_cr;
} else {
res_cr = ptr_cr;
}
} else { // str_cr must be BROKEN at this point
resEnc = strEnc;
res_cr = str_cr;
if (0 < len) res_cr = StringSupport.CR_UNKNOWN;
}
// MRI checks for len < 0 here, but I don't think that's possible for us
strBufCat(runtime, str, ptrBytes, ptr, len);
str.getByteList().setEncoding(resEnc);
str.setCodeRange(res_cr);
return ptr_cr;
}
// econv_args
public static void econvArgs(ThreadContext context, IRubyObject[] args, byte[][] encNames, Encoding[] encs, int[] ecflags_p, IRubyObject[] ecopts_p) {
Ruby runtime = context.runtime;
IRubyObject snamev = context.nil;
IRubyObject dnamev = context.nil;
IRubyObject flags = context.nil;
IRubyObject opt = context.nil;
// scan args logic
{
switch (args.length) {
case 3:
flags = args[2];
case 2:
dnamev = args[1];
case 1:
snamev = args[0];
}
IRubyObject tmp;
if (!(tmp = TypeConverter.checkHashType(runtime, flags)).isNil()) {
opt = tmp;
flags = context.nil;
}
}
if (!flags.isNil()) {
if (!opt.isNil()) {
throw runtime.newArgumentError(args.length, 3);
}
ecflags_p[0] = (int)flags.convertToInteger().getLongValue();
ecopts_p[0] = context.nil;
} else if (!opt.isNil()) {
ecflags_p[0] = EncodingUtils.econvPrepareOpts(context, opt, ecopts_p);
} else {
ecflags_p[0] = 0;
ecopts_p[0] = context.nil;
}
encs[0] = runtime.getEncodingService().getEncodingFromObjectNoError(snamev);
if (encs[0] == null) {
snamev = snamev.convertToString();
}
encs[1] = runtime.getEncodingService().getEncodingFromObjectNoError(dnamev);
if (encs[1] == null) {
dnamev = dnamev.convertToString();
}
encNames[0] = encs[0] != null ? encs[0].getName() : ((RubyString)snamev).getBytes();
encNames[1] = encs[1] != null ? encs[1].getName() : ((RubyString)dnamev).getBytes();
return;
}
// rb_econv_init_by_convpath
public static EConv econvInitByConvpath(ThreadContext context, IRubyObject convpath, byte[][] encNames, Encoding[] encs) {
final Ruby runtime = context.runtime;
final EConv ec = TranscoderDB.alloc(convpath.convertToArray().size());
IRubyObject[] sname_v = {context.nil};
IRubyObject[] dname_v = {context.nil};
byte[][] sname = {null};
byte[][] dname = {null};
Encoding[] senc = {null};
Encoding[] denc = {null};
boolean first = true;
for (int i = 0; i < ((RubyArray)convpath).size(); i++) {
IRubyObject elt = ((RubyArray)convpath).eltOk(i);
IRubyObject pair;
if (!(pair = elt.checkArrayType()).isNil()) {
if (((RubyArray)pair).size() != 2) {
throw context.runtime.newArgumentError("not a 2-element array in convpath");
}
sname_v[0] = ((RubyArray)pair).eltOk(0);
encArg(context, sname_v[0], sname, senc);
dname_v[0] = ((RubyArray)pair).eltOk(1);
encArg(context, dname_v[0], dname, denc);
} else {
sname[0] = NULL_BYTE_ARRAY;
dname[0] = elt.convertToString().getBytes();
}
if (DECORATOR_P(sname[0], dname[0])) {
boolean ret = ec.addConverter(sname[0], dname[0], ec.numTranscoders);
if (!ret) {
throw runtime.newArgumentError("decoration failed: " + new String(dname[0]));
}
} else {
int j = ec.numTranscoders;
final int[] arg = {j,0};
int ret = TranscoderDB.searchPath(sname[0], dname[0], new TranscoderDB.SearchPathCallback() {
@Override
public void call(byte[] source, byte[] destination, int depth) {
if (arg[1] == -1) return;
arg[1] = ec.addConverter(source, destination, arg[0]) ? 0 : -1;
}
});
if (ret == -1 || arg[1] == -1) {
throw runtime.newArgumentError("adding conversion failed: " + new String(sname[0]) + " to " + new String(dname[0]));
}
if (first) {
first = false;
encs[0] = senc[0];
encNames[0] = ec.elements[j].transcoding.transcoder.getSource();
}
encs[1] = denc[0];
encNames[1] = ec.elements[ec.numTranscoders - 1].transcoding.transcoder.getDestination();
}
}
if (first) {
encs[0] = null;
encs[1] = null;
encNames[0] = NULL_BYTE_ARRAY;
encNames[1] = NULL_BYTE_ARRAY;
}
ec.source = encNames[0];
ec.destination = encNames[0];
return ec;
}
// decorate_convpath
public static int decorateConvpath(ThreadContext context, IRubyObject convpath, int ecflags) {
Ruby runtime = context.runtime;
int num_decorators;
byte[][] decorators = new byte[EConvFlags.MAX_ECFLAGS_DECORATORS][];
int i;
int n, len;
num_decorators = TranscoderDB.decoratorNames(ecflags, decorators);
if (num_decorators == -1)
return -1;
len = n = ((RubyArray)convpath).size();
if (n != 0) {
IRubyObject pair = ((RubyArray)convpath).eltOk(n - 1);
if (pair instanceof RubyArray) {
byte[] sname = runtime.getEncodingService().getEncodingFromObject(((RubyArray)pair).eltOk(0)).getName();
byte[] dname = runtime.getEncodingService().getEncodingFromObject(((RubyArray)pair).eltOk(1)).getName();
TranscoderDB.Entry entry = TranscoderDB.getEntry(sname, dname);
Transcoder tr = entry.getTranscoder();
if (tr == null)
return -1;
if (!DECORATOR_P(tr.getSource(), tr.getDestination()) &&
tr.compatibility.isEncoder()) {
n--;
((RubyArray)convpath).store(len + num_decorators - 1, pair);
}
} else {
((RubyArray)convpath).store(len + num_decorators - 1, pair);
}
}
for (i = 0; i < num_decorators; i++)
((RubyArray)convpath).store(n + i, newString(runtime, decorators[i]));
return 0;
}
// io_enc_str
public static IRubyObject ioEncStr(Ruby runtime, IRubyObject str, OpenFile fptr)
{
str.setTaint(true);
((RubyString)str).setEncoding(fptr.readEncoding(runtime));
return str;
}
// rb_enc_uint_chr
public static RubyString encUintChr(ThreadContext context, int code, Encoding enc) {
Ruby runtime = context.runtime;
long i = code & 0xFFFFFFFFL;
int n;
switch (n = EncodingUtils.encCodelen(context, code, enc)) {
case ErrorCodes.ERR_INVALID_CODE_POINT_VALUE:
throw runtime.newRangeError("invalid codepoint " + Long.toHexString(i) + " in " + enc);
case ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE:
case 0:
throw runtime.newRangeError(i + " out of char range");
}
ByteList strBytes = new ByteList(n);
strBytes.setEncoding(enc);
strBytes.length(n);
byte[] bytes = strBytes.unsafeBytes();
int begin = strBytes.begin();
int end = strBytes.realSize();
encMbcput(context, code, bytes, begin, enc);
if (StringSupport.preciseLength(enc, bytes, begin, end) != n) {
throw runtime.newRangeError("invalid codepoint " + Long.toHexString(i) + " in " + enc);
}
return newString(runtime, strBytes);
}
// rb_enc_mbcput with Java exception
public static int encMbcput(int c, byte[] buf, int p, Encoding enc) {
int len = enc.codeToMbc(c, buf, p);
if (len < 0) {
throw new EncodingException(EncodingError.fromCode(len));
}
return len;
}
// rb_enc_mbcput with Ruby exception
public static int encMbcput(ThreadContext context, int c, byte[] buf, int p, Encoding enc) {
int len = enc.codeToMbc(c, buf, p);
// in MRI, this check occurs within some of the individual encoding functions, such as the
// US-ASCII check for values >= 0x80. In MRI, unlike in JRuby, we can't throw Ruby errors
// from within encoding logic, so we try to reproduce the expected results via normal
// error codes here.
// See MRI's rb_enc_mbcput and related downstream encoding functions.
if (len < 0) {
switch (len) {
case ErrorCodes.ERR_INVALID_CODE_POINT_VALUE:
throw context.runtime.newRangeError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc);
case ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE:
throw context.runtime.newRangeError("" + (c & 0xFFFFFFFFL) + " out of char range");
}
throw context.runtime.newEncodingError(EncodingError.fromCode(len).getMessage());
}
return len;
}
// rb_enc_codepoint_len
public static int encCodepointLength(byte[] pBytes, int p, int e, int[] len_p, Encoding enc) {
int r;
if (e <= p)
throw new IllegalArgumentException("empty string");
r = StringSupport.preciseLength(enc, pBytes, p, e);
if (!StringSupport.MBCLEN_CHARFOUND_P(r)) {
throw new IllegalArgumentException("invalid byte sequence in " + enc);
}
if (len_p != null) len_p[0] = StringSupport.MBCLEN_CHARFOUND_LEN(r);
return StringSupport.codePoint(enc, pBytes, p, e);
}
public static int encCodepointLength(Ruby runtime, byte[] pBytes, int p, int e, int[] len_p, Encoding enc) {
try {
return encCodepointLength(pBytes, p, e, len_p, enc);
} catch (IllegalArgumentException ex) {
throw runtime.newArgumentError(ex.getMessage());
}
}
// MRI: str_compat_and_valid
public static IRubyObject strCompatAndValid(ThreadContext context, IRubyObject _str, Encoding enc) {
int cr;
RubyString str = _str.convertToString();
cr = str.scanForCodeRange();
if (cr == StringSupport.CR_BROKEN) {
throw context.runtime.newArgumentError("replacement must be valid byte sequence '" + str + "'");
}
else {
Encoding e = STR_ENC_GET(str);
if (cr == StringSupport.CR_7BIT ? enc.minLength() != 1 : enc != e) {
throw context.runtime.newEncodingCompatibilityError("incompatible character encodings: " + enc + " and " + e);
}
}
return str;
}
// MRI: get_encoding
public static Encoding getEncoding(ByteList str) {
return getActualEncoding(str.getEncoding(), str);
}
private static final Encoding UTF16Dummy = EncodingDB.getEncodings().get("UTF-16".getBytes()).getEncoding();
private static final Encoding UTF32Dummy = EncodingDB.getEncodings().get("UTF-32".getBytes()).getEncoding();
// MRI: get_actual_encoding
public static Encoding getActualEncoding(Encoding enc, ByteList byteList) {
return getActualEncoding(enc, byteList.getUnsafeBytes(), byteList.begin(), byteList.begin() + byteList.realSize());
}
public static Encoding getActualEncoding(Encoding enc, byte[] bytes, int p, int end) {
if (enc.isDummy() && enc instanceof UnicodeEncoding) {
// handle dummy UTF-16 and UTF-32 by scanning for BOM, as in MRI
if (enc == UTF16Dummy && end - p >= 2) {
int c0 = bytes[p] & 0xff;
int c1 = bytes[p + 1] & 0xff;
if (c0 == 0xFE && c1 == 0xFF) {
return UTF16BEEncoding.INSTANCE;
} else if (c0 == 0xFF && c1 == 0xFE) {
return UTF16LEEncoding.INSTANCE;
}
return ASCIIEncoding.INSTANCE;
} else if (enc == UTF32Dummy && end - p >= 4) {
int c0 = bytes[p] & 0xff;
int c1 = bytes[p + 1] & 0xff;
int c2 = bytes[p + 2] & 0xff;
int c3 = bytes[p + 3] & 0xff;
if (c0 == 0 && c1 == 0 && c2 == 0xFE && c3 == 0xFF) {
return UTF32BEEncoding.INSTANCE;
} else if (c3 == 0 && c2 == 0 && c1 == 0xFE && c0 == 0xFF) {
return UTF32LEEncoding.INSTANCE;
}
return ASCIIEncoding.INSTANCE;
}
}
return enc;
}
public static Encoding STR_ENC_GET(ByteListHolder str) {
return getEncoding(str.getByteList());
}
public static RubyString rbStrEscape(Ruby runtime, RubyString str) {
return (RubyString) RubyString.rbStrEscape(runtime.getCurrentContext(), str);
}
public static int rbStrBufCatEscapedChar(RubyString result, long c, boolean unicode_p) {
// FIXME: inefficient
byte[] buf;
int l;
c &= 0xffffffff;
if (unicode_p) {
if (c < 0x7F && c > 31 /*ISPRINT(c)*/) {
buf = String.format("%c", (char)c).getBytes();
}
else if (c < 0x10000) {
buf = String.format("\\u%04X", c).getBytes();
}
else {
buf = String.format("\\u{%X}", c).getBytes();
}
}
else {
if (c < 0x100) {
buf = String.format("\\x{%02X}", c).getBytes();
}
else {
buf = String.format("\\x{%X}", c).getBytes();
}
}
result.cat(buf);
return buf.length;
}
/**
* Get an appropriate Java Charset for the given Encoding.
*
* This works around a bug in jcodings where it would return null as the charset for encodings that should have
* a match, like Windows-1252. This method is equivalent to enc.getCharset in jcodings 1.0.25 and higher.
*
* See https://github.com/jruby/jruby/issues/4716 for more information.
*
* @param enc the encoding for which to get a matching charset
* @return the matching charset
*/
public static Charset charsetForEncoding(Encoding enc) {
Charset charset = enc.getCharset();
if (charset == null) {
charset = Charset.forName(enc.toString());
}
return charset;
}
public static int encCodelen(ThreadContext context, int c, Encoding enc) {
int n = enc.codeToMbcLength(c);
if (n == 0) {
throw context.runtime.newArgumentError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc);
}
return n;
}
@Deprecated
public static Encoding ioStripBOM(RubyIO io) {
return ioStripBOM(io.getRuntime().getCurrentContext(), io);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy