All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jruby.ext.iconv.RubyIconv Maven / Gradle / Ivy

There is a newer version: 9.4.9.0
Show newest version
/***** BEGIN LICENSE BLOCK *****
 * Version: EPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2006 Thomas E Enebo 
 * Copyright (C) 2007-2010 Koichiro Ohba 
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/
package org.jruby.ext.iconv;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import java.nio.charset.UnsupportedCharsetException;
import org.jcodings.Encoding;
import org.jcodings.EncodingDB;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.RubyException;
import org.jruby.RubyHash;
import org.jruby.RubyModule;
import org.jruby.RubyNumeric;
import org.jruby.RubyObject;
import org.jruby.RubyString;

import org.jruby.anno.JRubyMethod;
import org.jruby.anno.JRubyModule;
import org.jruby.anno.JRubyClass;
import org.jruby.runtime.Arity;

import org.jruby.runtime.Block;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Visibility;
import org.jruby.runtime.builtin.IRubyObject;

import org.jruby.util.ByteList;

@JRubyClass(name="Iconv")
public class RubyIconv extends RubyObject {
    //static private final String TRANSLIT = "//translit";
    static private final String IGNORE = "//ignore";

    private CharsetDecoder fromEncoding;
    private CharsetEncoder toEncoding;
    private int count;
    private String endian = "";

    public RubyIconv(Ruby runtime, RubyClass type) {
        super(runtime, type);
    }
    
    private static final ObjectAllocator ICONV_ALLOCATOR = new ObjectAllocator() {
        public IRubyObject allocate(Ruby runtime, RubyClass klass) {
            return new RubyIconv(runtime, klass);
        }
    };

    @JRubyModule(name="Iconv::Failure")
    public static class Failure {}
    @JRubyClass(name="Iconv::IllegalSequence", parent="ArgumentError", include="Iconv::Failure")
    public static class IllegalSequence {}
    @JRubyClass(name="Iconv::InvalidCharacter", parent="ArgumentError", include="Iconv::Failure")
    public static class InvalidCharacter {}
    @JRubyClass(name="Iconv::InvalidEncoding", parent="ArgumentError", include="Iconv::Failure")
    public static class InvalidEncoding {}
    @JRubyClass(name="Iconv::OutOfRange", parent="ArgumentError", include="Iconv::Failure")
    public static class OutOfRange {}
    @JRubyClass(name="Iconv::BrokenLibrary", parent="ArgumentError", include="Iconv::Failure")
    public static class BrokenLibrary {}

    public static void createIconv(Ruby runtime) {
        RubyClass iconvClass = runtime.defineClass("Iconv", runtime.getObject(), ICONV_ALLOCATOR);
        
        iconvClass.defineAnnotatedMethods(RubyIconv.class);

        RubyModule failure = iconvClass.defineModuleUnder("Failure");
        RubyClass argumentError = runtime.getArgumentError();

        String[] iconvErrors = {"IllegalSequence", "InvalidCharacter", "InvalidEncoding", 
                "OutOfRange", "BrokenLibrary"};
        
        for (int i = 0; i < iconvErrors.length; i++) {
            RubyClass subClass = iconvClass.defineClassUnder(iconvErrors[i], argumentError, RubyFailure.ICONV_FAILURE_ALLOCATOR);
            subClass.defineAnnotatedMethods(RubyFailure.class);
            subClass.includeModule(failure);
        }    
    }
    
    public static class RubyFailure extends RubyException {
        private IRubyObject success;
        private IRubyObject failed;

        public static RubyFailure newInstance(Ruby runtime, RubyClass excptnClass, String msg) {
            return new RubyFailure(runtime, excptnClass, msg);
        }

        protected static final ObjectAllocator ICONV_FAILURE_ALLOCATOR = new ObjectAllocator() {
            public IRubyObject allocate(Ruby runtime, RubyClass klass) {
                return new RubyFailure(runtime, klass);
            }
        };

        protected RubyFailure(Ruby runtime, RubyClass rubyClass) {
            this(runtime, rubyClass, null);
        }

        public RubyFailure(Ruby runtime, RubyClass rubyClass, String message) {
            super(runtime, rubyClass, message);
        }

        @JRubyMethod(required = 1, optional = 2, visibility = Visibility.PRIVATE)
        @Override
        public IRubyObject initialize(IRubyObject[] args, Block block) {
            super.initialize(args, block);
            success = args.length >= 2 ? args[1] : getRuntime().getNil();
            failed = args.length == 3 ? args[2] : getRuntime().getNil();

            return this;
        }

        @JRubyMethod(name = "success")
        public IRubyObject success() {
            return success;
        }

        @JRubyMethod(name = "failed")
        public IRubyObject failed() {
            return failed;
        }

        @JRubyMethod(name = "inspect")
        @Override
        public IRubyObject inspect() {
            RubyModule rubyClass = getMetaClass();
            StringBuilder buffer = new StringBuilder("#<");
            buffer.append(rubyClass.getName()).append(": ").append(success.inspect().toString());
            buffer.append(", ").append(failed.inspect().toString()).append(">");

            return getRuntime().newString(buffer.toString());
        }
    }

    private static String getCharset(String encoding) {
        int index = encoding.indexOf("//");
        if (index == -1) return encoding;
        return encoding.substring(0, index);
    }
    
    /* Currently dead code, but useful when we figure out how to actually perform translit.
    private static boolean isTranslit(String encoding) {
        return encoding.toLowerCase().indexOf(TRANSLIT) != -1 ? true : false;
    }*/
    
    private static boolean isIgnore(String encoding) {
        return encoding.toLowerCase().indexOf(IGNORE) != -1 ? true : false;
    }

    @JRubyMethod(required = 2, meta = true)
    public static IRubyObject open(ThreadContext context, IRubyObject recv, IRubyObject to, IRubyObject from, Block block) {
        Ruby runtime = context.runtime;

        RubyIconv iconv = newIconv(context, recv, to, from);

        if (!block.isGiven()) return iconv;

        IRubyObject result = runtime.getNil();
        try {
            result = block.yield(context, iconv);
        } finally {
            iconv.close();
        }

        return result;
    }
    
    private static RubyIconv newIconv(ThreadContext context, IRubyObject recv,
            IRubyObject to, IRubyObject from) {
        RubyClass klazz = (RubyClass)recv;

        return (RubyIconv) klazz.newInstance(
                context, new IRubyObject[] {to, from}, Block.NULL_BLOCK);
    }

    @JRubyMethod(visibility = Visibility.PRIVATE)
    public IRubyObject initialize(IRubyObject arg1, IRubyObject arg2, Block unusedBlock) {
        Ruby runtime = getRuntime();
        if (!arg1.respondsTo("to_str")) {
            throw runtime.newTypeError("can't convert " + arg1.getMetaClass() + " into String");
        }
        if (!arg2.respondsTo("to_str")) {
            throw runtime.newTypeError("can't convert " + arg2.getMetaClass() + " into String");
        }

        String to = arg1.convertToString().toString();
        String from = arg2.convertToString().toString();

        try {

            fromEncoding = Charset.forName(getCharset(from)).newDecoder();
            toEncoding = Charset.forName(getCharset(to)).newEncoder();
            count = 0;

            if (isIgnore(to)) {
                fromEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE);
                fromEncoding.onMalformedInput(CodingErrorAction.IGNORE);
                toEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE);
                toEncoding.onMalformedInput(CodingErrorAction.IGNORE);
            } else {
                fromEncoding.onUnmappableCharacter(CodingErrorAction.REPORT);
                fromEncoding.onMalformedInput(CodingErrorAction.REPORT);
                toEncoding.onUnmappableCharacter(CodingErrorAction.REPORT);
                toEncoding.onMalformedInput(CodingErrorAction.REPORT);
            }
        } catch (IllegalCharsetNameException e) {
            throw runtime.newInvalidEncoding("invalid encoding");
        } catch (UnsupportedCharsetException e) {
            throw runtime.newInvalidEncoding("invalid encoding");
        } catch (Exception e) {
            throw runtime.newSystemCallError(e.toString());
        }

        return this;
    }

    @JRubyMethod(name = "close")
    public IRubyObject close() {
        if (toEncoding == null && fromEncoding == null) {
            return getRuntime().getNil();
        }
        toEncoding = null;
        fromEncoding = null;
        return RubyString.newEmptyString(getRuntime());
    }

    @JRubyMethod
    public IRubyObject iconv(IRubyObject str) {
        return iconv(str, 0, -1);
    }

    @JRubyMethod
    public IRubyObject iconv(IRubyObject str, IRubyObject startArg) {
        int start = 0;
        if (!startArg.isNil()) start = RubyNumeric.fix2int(startArg);
        return iconv(str, start, -1);
    }

    @JRubyMethod
    public IRubyObject iconv(IRubyObject str, IRubyObject startArg, IRubyObject endArg) {
        int start = 0;
        int end = -1;

        if (!startArg.isNil()) start = RubyNumeric.fix2int(startArg);
        if (!endArg.isNil()) end = RubyNumeric.fix2int(endArg);

        return iconv(str, start, end);
    }
    
    private IRubyObject iconv(IRubyObject str, int start, int end) {
        if (str.isNil()) {
            fromEncoding.reset();
            toEncoding.reset();
            return RubyString.newEmptyString(getRuntime());
        }

        return _iconv(str.convertToString(), start, end);
    }

    /**
     * Variable-arity version for compatibility. Not bound to Ruby.
     * @deprecated Use the versions with one, two or three arguments.
     */
    public IRubyObject iconv(IRubyObject[] args) {
        switch (args.length) {
        case 1:
            return iconv(args[0]);
        case 2:
            return iconv(args[0], args[1]);
        case 3:
            return iconv(args[0], args[1], args[2]);
        default:
            Arity.raiseArgumentError(getRuntime(), args.length, 1, 2);
            return null; // not reached
        }
    }

    // FIXME: We are assuming that original string will be raw bytes.  If -Ku is provided
    // this will not be true, but that is ok for now.  Deal with that when someone needs it.
    private IRubyObject _iconv(RubyString str, int start, int length) {
        if (fromEncoding == null) {
            throw getRuntime().newArgumentError("closed iconv");
        }
        
        ByteList bytes = str.getByteList();
        
        // treat start and end as start...end for end >= 0, start..end for end < 0
        if (start < 0) {
            start += bytes.length();
        }

        if (start < 0 || start > bytes.length()) { // invalid ranges result in an empty string
            return RubyString.newEmptyString(getRuntime());
        }

        if (length < 0 || length > bytes.length() - start) {
            length = bytes.length() - start;
        }
        
        ByteBuffer buf = ByteBuffer.wrap(bytes.getUnsafeBytes(), bytes.begin() + start, length);
        
        try {
            CharBuffer cbuf = fromEncoding.decode(buf);
            buf = toEncoding.encode(cbuf);
        } catch (MalformedInputException e) {
            throw getRuntime().newIllegalSequence(str.toString());
        } catch (UnmappableCharacterException e) {
            throw getRuntime().newIllegalSequence(str.toString());
        } catch (CharacterCodingException e) {
            throw getRuntime().newInvalidEncoding("invalid sequence");
        } catch (IllegalStateException e) {
            throw getRuntime().newIllegalSequence(str.toString());
        }
        byte[] arr = buf.array();

        start = 0;
        String displayName = toEncoding.charset().displayName();

        if (arr.length >= 2) { // minimum Byte Order Mark (BOM) length
            if (displayName.toLowerCase().startsWith("utf-16")) {
                if ((arr[0] == (byte)0xfe && arr[1] == (byte)0xff)) {
                    if (count > 0) start = 2;
                    endian = "BE";
                } else if (arr[0] == (byte)0xff && arr[1] == (byte)0xfe) {
                    if (count > 0) start = 2;
                    endian = "LE";
                }
            } else if (displayName.toLowerCase().startsWith("utf-32") &&
                    arr.length >= 4) {
                if (arr[0] == (byte)0x00 && arr[1] == (byte)0x00 && arr[2] == (byte)0xfe && arr[3] == (byte)0xff) {
                    if (count > 0) start = 4;
                    endian = "BE";
                } else if (arr[0] == (byte)0xff && arr[1] == (byte)0xfe && arr[2] == (byte)0x00 && arr[3] == (byte)0x00) {
                    if (count > 0) start = 4;
                    endian = "LE";
                }
            }
        }

        count++;

        if (displayName.equalsIgnoreCase("utf-16") || displayName.equalsIgnoreCase("utf-32")) {
            displayName += endian;
        }

        ByteList r = new ByteList(arr, start, buf.limit() - start);

        EncodingDB.Entry entry = EncodingDB.getEncodings().get(displayName.getBytes());
        if (entry != null) {
            Encoding charset = entry.getEncoding();
            r.setEncoding(charset);
        }

        return getRuntime().newString(r);
    }

    @JRubyMethod(name = "iconv", required = 2, rest = true, meta = true)
    public static IRubyObject iconv(ThreadContext context, IRubyObject recv, IRubyObject[] args, Block unusedBlock) {
        return convertWithArgs(context, recv, args, "iconv");
    }
    
    @JRubyMethod(name = "conv", required = 3, rest = true, meta = true)
    public static IRubyObject conv(ThreadContext context, IRubyObject recv, IRubyObject[] args, Block unusedBlock) {
        return convertWithArgs(context, recv, args, "conv").join(context, RubyString.newEmptyString(recv.getRuntime()));
    }

    @JRubyMethod(name = "charset_map", meta= true)
    public static IRubyObject charset_map_get(IRubyObject recv) {
        return recv.getRuntime().getCharsetMap();
    }

    private static String mapCharset(ThreadContext context, IRubyObject val) {
        RubyHash charset = val.getRuntime().getCharsetMap();
        if (charset.size() > 0) {
            RubyString key = val.callMethod(context, "downcase").convertToString();
            IRubyObject tryVal = charset.fastARef(key);
            if (tryVal != null) val = tryVal;
        }

        return val.convertToString().toString();
    }

    public static RubyArray convertWithArgs(ThreadContext context, IRubyObject recv, IRubyObject[] args, String function) {
        assert args.length >= 2;

        RubyArray array = context.runtime.newArray(args.length - 2);
        RubyIconv iconv = newIconv(context, recv, args[0], args[1]);

        try {
            for (int i = 2; i < args.length; i++) {
                array.append(iconv.iconv(args[i]));
            }
        } finally {
            iconv.close();
        }

        return array;
    }
    
    /*
    private static IRubyObject convert(String fromEncoding, String toEncoding, RubyString original) 
        throws UnsupportedEncodingException {
        // Get all bytes from PLAIN string pretend they are not encoded in any way.
        byte[] string = original.getBytes();
        // Now create a string pretending it is from fromEncoding
        string = new String(string, fromEncoding).getBytes(toEncoding);
        // Finally recode back to PLAIN
        return RubyString.newString(original.getRuntime(), string);
    }
    */
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy